rohelab / fastrg Goto Github PK

View Code? Open in Web Editor NEW

5.0 3.0 0.0 24.59 MB

Sample Generalized Random Dot Product Graphs in Linear Time

Home Page: https://rohelab.github.io/fastRG/

License: Other

R 100.00%

graph-sampling latent-factors adjacency-matrix

fastrg's Issues

Make sure nodes don't go missing as in the following

latent <- fastRG::sbm(
  n = 10000,
  pi = dbetabinom(0:9,9,.22,.18),
  B = diag(10),
  expected_degree = 50,
  sort_nodes = T
)

graph2 <- sample_tidygraph(latent)

graph2 %>%
  activate(nodes) %>%
  mutate(
    block = latent$z
  ) %>% as_tibble() -> nodes

Enforce identifying constraints for SBMs

Add node contextualized stochastic blockmodel

From Binkiewicz and Zhang papers

Test directedness of sampled graphs

Document singular value scaling for undirected graphs

Scratch code

library(fastRG)
library(tidyverse)

n <- 200

k <- 5
B <- matrix(0.01, nrow = k, ncol = k)
diag(B) <- 0.8

pi <- rep(1 / k, k)


m <- fastRG::dcsbm(
  theta = stats::runif(n, min = 1, max = 3),
  B = B,
  pi = pi
)



EA <- m$X %*% m$S %*% t(m$X)
pop_s <- svds(EA, k)
pop_s$d

sum(EA)

A <- sample_sparse(m)
expected_edges(m)
sum(triu(A))

s <- svds(m)
s$d

expected_edges(m)
sum(triu(sample_sparse(m)))

once <- function() {
  sqrt(svds(sample_sparse(m), k)$d)
}

num_reps <- 50
samp <- do.call(rbind, map(1:num_reps, ~once()))

colnames(samp) <- paste0("d", 1:k)

true <- tibble(
  value = sqrt(s$d * 2),
  space = paste0("d", 1:k)
)

as_tibble(samp) |> 
  pivot_longer(
    cols = contains("d"),
    names_to = "space"
  ) |> 
  ggplot(aes(value)) +
  geom_histogram() +
  geom_vline(data = true, mapping = aes(xintercept = value)) +
  scale_fill_viridis_d() +
  facet_grid(
    rows = vars(space),
    scales = "free_x"
  ) +
  theme_minimal()

fastRG::sbm without setting expected_degree is doubling the number of edges that should be there.

library(fastRG)
library(magrittr)

n = 1000
pop = n/2
a = .1
b = .05
B = matrix(c(a,b,b,a), nrow = 2)
b_model = fastRG::sbm(n = n, k =2, 
                          B = B,  edge_distribution = "bernoulli")

b_model %>% 
  sample_sparse(poisson_edges = F) %>% 
  rowSums %>% mean

pop*a + pop*b  # this should be average degree

Dividing B by two fixes the problem:

b_model = fastRG::sbm(n = n, k =2, 
                          B = B/2,  edge_distribution = "bernoulli")

b_model %>% 
  sample_sparse(poisson_edges = F) %>% 
  rowSums %>% mean

Perhaps we are symmetrizing the edges at some point... this would double the number of edges.

Bernoulli graphs can have p>1

Bernoulli probabilities can exceed one... because we simulate from poisson and threshold anything above 1.

So, I think this error shouldn't be there.

> b_model = fastRG::sbm(n = 100, k =2, 
+                       B = matrix(c(.8,.4,.4,.8), nrow = 2),  edge_distribution = "bernoulli")
Error: Elements of `B` must be not exceed 1 for bernoulli SBMs.

Improve population svds/eigs/ASE documentation

Add blockmodel vignette once requisite models have been implemented

---
title: "extended-blockmodel-examples"
output: rmarkdown::html_vignette
vignette: >
  %\VignetteIndexEntry{extended-blockmodel-examples}
  %\VignetteEngine{knitr::rmarkdown}
  %\VignetteEncoding{UTF-8}
---

```{r, include = FALSE}
knitr::opts_chunk$set(
  collapse = TRUE,
  comment = "#>",
  eval = FALSE
)

library(fastRG)

Some blockmodel examples

WARNING: content below here is rough!

K <- 10
n <- 500
pi <- rexp(K) + 1
pi <- pi / sum(pi) * 3
B <- matrix(rexp(K^2) + 1, nrow = K)
diag(B) <- diag(B) + mean(B) * K
theta <- rexp(n)

dcsbm <- dcsbm(theta = theta, pi = pi, B = B, expected_degree = 50)
A <- sample_sparse(dcsbm)
A
mean(rowSums(A))  # average degree

If we remove multiple edges, the expected_degree parameter is not trustworthy:

bernoulli_dcsbm <- dcsbm(theta = theta, pi = pi, B = B, expected_degree = 50)
A <- sample_sparse(bernoulli_dcsbm, poisson_edges = FALSE)
mean(rowSums(A))

but it is a good upper bound when the graph is sparse:

n <- 10000
A <- dcsbm(theta = rexp(n), pi = pi, B = B, expected_degree = 50, poisson_edges = FALSE)
mean(rowSums(A))

This draws a 100 x 100 adjacency matrix from each model. Each image might take around 5 seconds to render.

# helper to plot adjacency matrices

plot_matrix <- function(A) {
  image(as.matrix(A), col = grey(seq(1, 0, len = 20)))
}

K <- 10
n <- 100
pi <- rexp(K) + 1
pi <- pi / sum(pi) * 3
B <- matrix(rexp(K^2) + 1, nrow = K)
diag(B) <- diag(B) + mean(B) * K
theta <- rexp(n)
A <- sample_sparse(dcsbm(theta = theta, pi = pi, B = B, expected_degree = 50))

plot_matrix(A[, n:1])

K <- 2
n <- 100
alpha <- c(1, 1) / 5
B <- diag(c(1, 1))
theta <- n
A <- dc_mixed(theta, alpha, B, expected_degree = 50, sort_nodes = TRUE)

plot_matrix(A[, theta:1] / max(A))

n <- 100
K <- 2
pi <- c(.7, .7)
B <- diag(c(1, 1))
theta <- n
A <- dc_overlapping(theta, pi, B, expected_degree = 50, sort_nodes = TRUE)

plot_matrix(t(A[, 1:n] / max(A)))

K <- 10
n <- 100
pi <- rexp(K) + 1
pi <- pi / sum(pi)
B <- matrix(rexp(K^2), nrow = K)
B <- B / (3 * max(B))
diag(B) <- diag(B) + mean(B) * K
A <- fastRG::sbm(n, pi, B, sort_nodes = TRUE)

plot_matrix(A)

Next sample a DC-SBM with 10,000 nodes. Then compute and plot the leading eigenspace.

library(RSpectra)

K <- 10
n <- 10000
pi <- rexp(K) + 1
pi <- pi / sum(pi)
pi <- -sort(-pi)
B <- matrix(rexp(K^2) + 1, nrow = K)
diag(B) <- diag(B) + mean(B) * K
A <- dcsbm(rgamma(n, shape = 2, scale = .4), pi, B, expected_degree = 20, simple = TRUE)
mean(rowSums(A))

K <- 10
n <- 10000
pi <- rexp(K) + 1
pi <- pi / sum(pi)
pi <- -sort(-pi)
B <- matrix(rexp(K^2) + 1, nrow = K)
diag(B) <- diag(B) + mean(B) * K
A <- dcsbm(rgamma(n, shape = 2, scale = .4), pi, B, expected_degree = 20, simple = TRUE, sort_nodes = TRUE)
mean(rowSums(A))

# leading eigen of regularized Laplacian with tau = 1
D <- Diagonal(n, 1 / sqrt(rowSums(A) + 1))
ei <- eigs_sym(D %*% A %*% D, 10)

# normalize the rows of X:
X <- t(apply(ei$vectors[, 1:K], 1, function(x) return(x / sqrt(sum(x^2) + 1 / n))))

par(mfrow = c(5, 1), mar = c(1, 2, 2, 2), xaxt = "n", yaxt = "n")

# plot 1000 elements of the leading eigenvectors:
s <- sort(sample(n, 1000))

for (i in 1:5) {
  plot(X[s, i], pch = ".")
}

# This samples a 1M node graph.
# Depending on the computer, sampling the graph should take between 10 and 30 seconds
#   Then, taking the eigendecomposition of the regularized graph laplacian should take between 1 and 3 minutes
# The resulting adjacency matrix is a bit larger than 100MB.
# The leading eigenvectors of A are highly localized
K <- 3
n <- 1000000
pi <- rexp(K) + 1
pi <- pi / sum(pi)
pi <- -sort(-pi)
B <- matrix(rexp(K^2) + 1, nrow = K)
diag(B) <- diag(B) + mean(B) * K
A <- dcsbm(rgamma(n, shape = 2, scale = .4), pi, B, expected_degree = 10, sort_nodes = TRUE)
D <- Diagonal(n, 1 / sqrt(rowSums(A) + 10))
L <- D %*% A %*% D
ei <- eigs_sym(L, 4)

s <- sort(sample(n, 10000))
X <- t(apply(ei$vectors[, 1:K], 1, function(x) return(x / sqrt(sum(x^2) + 1 / n))))
plot(X[s, 3]) # highly localized eigenvectors

To sample from a degree corrected and node contextualized graph...

n <- 10000 # number of nodes
d <- 1000 # number of features
K <- 5 # number of blocks

# Here are the parameters for the graph:

pi <- rexp(K) + 1
pi <- pi / sum(pi) * 3
B <- matrix(rexp(K^2) + 1, nrow = K)
diag(B) <- diag(B) + mean(B) * K
theta <- rexp(n)
paraG <- dcsbm_params(theta = theta, pi = pi, B = B)

# Here are the parameters for the features:

thetaY <- rexp(d)
piFeatures <- rexp(K) + 1
piFeatures <- piFeatures / sum(piFeatures) * 3
BFeatures <- matrix(rexp(K^2) + 1, nrow = K)
diag(BFeatures) <- diag(BFeatures) + mean(BFeatures) * K

paraFeat <- dcsbm_params(theta = thetaY, pi = piFeatures, B = BFeatures)

# the node "degrees" in the features, should be related to their degrees in the graph.
X <- paraG$X
X@x <- paraG$X@x + rexp(n) # the degree parameter should be different. X@x + rexp(n) makes feature degrees correlated to degrees in graph.

# generate the graph and features
A <- fastRG(paraG$X, paraG$S, expected_degree = 10)
features <- fastRG(X, paraFeat$S, paraFeat$X, expected_degree = 20)

Use identifiable parameterization for overlapping models

tmp_index is shorter than to_index sometimes, especially in small and degenerate blockmodels

Playing around with different seeds in the degenerate SBM test reveals a variety of new and exciting issues. TL; DR: if you need to sample small graphs (or potentially SBMs with k ~ n), you may run into issues.

For some reason this errors for me when run by testthat but not interactively.

library(fastRG)

set.seed(5)

n <- 10
k <- 10
pi <- rep(1, k) / k

B <- diag(rep(0.5, k))

sbm <- sbm(n = n, pi = pi, B = B)

sample_sparse(sbm)

If this becomes a blocker for anyone, please let me know.

Add degree-corrected overlapping membership model

#' Sample a degree corrected overlapping stochastic blockmodel graph
#'
#' @param p Community membership parameter. For each node, membership in
#'   each community is consider separately. Each node has probability
#'   `p[i]` of being in the `i^th` community. `length(p)` implicitly
#'   specifies the number of communities.
#'
#' @inherit dcsbm params
#' @inheritDotParams fastRG
#' @inherit fastRG return
#'
#' @export
#' @seealso [fastRG()]
#' @family stochastic block models
#'
#' @details TODO: write the model out in detail here
#'
#'   \deqn{
#'     xi  = \theta_i * z_i, where [z_i]_j ~ bernoulli(pi_j)
#'   }
#'
#'   \deqn{
#'     \lambda_{ij} = xi' B xj
#'   }
#'
#'   probability of \eqn{i} connecting to \eqn{j}:  \eqn{1 - exp(-\lambda_{ij})}
#'
#' @examples
#'
#' set.seed(27)
#'
#' n <- 1000
#' k <- 5
#'
#' B <- matrix(runif(k * k), nrow = k, ncol = k) # mixing probabilities
#'
#' theta <- round(rlnorm(n, 2))  # degree parameter
#' p <- c(1, 2, 4, 1, 1) / 5     # community membership
#'
#' A <- dc_overlapping(theta, p, B, avg_deg = 50)
#'
dc_overlapping <- function(theta, p, B, avg_deg = NULL,
                           poisson_edges = TRUE, sort_nodes = FALSE, ...) {

  params <- dc_overlapping_params(
    theta = theta, p = p, B = B,
    avg_deg = avg_deg,
    poisson_edges = poisson_edges,
    sort_nodes = sort_nodes
  )

  # NOTE: avg_deg is null since we handled scaling B internally
  fastRG(params$X, params$S, poisson_edges = poisson_edges, avg_deg = NULL, ...)
}

#' @rdname dc_overlapping
#' @export
dc_overlapping_params <- function(theta, p, B, avg_deg = NULL,
                                  poisson_edges = TRUE, sort_nodes = FALSE) {
  if (length(theta) == 1) {
    n <- theta
    theta <- rep(1, n)
  }

  if (length(theta) > 1) {
    n <- length(theta)
  }

  K <- length(p)

  if (K != nrow(B) || ncol(B) != nrow(B)) {
    stop("Both dimensions of B must match length of `pi`.", call. = FALSE)
  }

  X <- matrix(0, nrow = n, ncol = K)

  for (i in 1:K) {
    X[, i] <- stats::rbinom(n, 1, p[i])
  }

  if (sort_nodes)
    X <- X[order(X %*% (1:K)), ]

  Theta <- Diagonal(n, theta)
  X <- Theta %*% X

  if (is.null(avg_deg)) {
    return(list(X = X, S = B, Y = X))
  }

  # scale B just like in fastRG()
  B <- B * avg_deg / expected(X, B)$degree

  if (!poisson_edges) {

    if (max(B) > 1)
      stop(
        "Expected edge values must be not exceed 1 for bernoulli graphs. ",
        "Either diminish `avg_deg` or set `poisson_edges = TRUE`.",
        call. = FALSE
      )

    # we're still sampling from a Poisson distribution, but B has been
    # specified as Bernoulli edges probabilities. convert these edges
    # probabilities such that we can feed them into a Poisson sampling
    # procedure

    B <- -log(1 - B)
  }

  # TODO: return latent communities
  list(X = X, S = B, Y = X)
}

Test allow_self_loops

Add Chung-Lu graphs

#' Sample a Chung-Lu graph
#'
#' @param theta A vector containing expected degree of each node. That is,
#'   the resulting graph will have `length(theta)` nodes. Note that as
#'   `sum(theta)` approaches `length(theta)`, you will start to sample
#'   very dense graphs.
#'
#' @inheritDotParams fastRG
#' @inherit fastRG return
#'
#' @return Never returns Poisson edges.
#'
#' @export
#'
#' @seealso [fastRG()]
#' @family bernoulli graphs
#'
#' @examples
#'
#' set.seed(27)
#'
#' theta <- c(0, 1, 0, 2, 0)
#'
#' A <- chung_lu(theta)
#'
#' # out degree
#' rowSums(A)
#'
#' # get the random dot product model parameters
#' params <- chung_lu_params(theta)
#'
#' expected(params$X, params$S)
#'
chung_lu <- function(theta, ...) {
  params <- chung_lu_params(theta)
  fastRG(params$X, params$S, poisson_edges = FALSE, ...)
}

#' @rdname chung_lu
#' @export
chung_lu_params <- function(theta) {

  if (!is.vector(theta)) {
    stop("`theta` must be a vector.", call. = FALSE)
  }

  if (any(theta < 0)) {
    stop("Elements of `theta` must be non-negative.", call. = FALSE)
  }

  n <- length(theta)
  X <- matrix(theta, nrow = n, ncol = 1)
  S <- matrix(1, nrow = 1, ncol = 1)

  list(X = X, S = S, Y = X)
}

sbm() function not using possion_edges and allow_self_loops parameters

Looks like it's not propagating the provided parameters to the dcsbm function

fastRG/R/undirected_sbm.R

Line 105 in 534b4ca

poisson_edges = TRUE,

fastRG/R/undirected_sbm.R

Line 106 in 534b4ca

allow_self_loops = TRUE,

Recover block memberships with dcsbm?

Hi! Thanks for your work on the package!
I was hoping to use the directed_dcsbm function to generate simple degree-corrected SBMs. I've run into some issues surrounding the block memberships.

At very least, I would like to be able to recover each node's membership as we move from the factor model to the sampled network. I know that the factor model given as input has parameters z_in and z_out, but as the samples that result from that (I'm using sample_igraph) don't necessarily have the same number of nodes, there can't be a direct mapping of those vectors to what we get in any graph drawing from that.

I'm also specifying a vector of block probabilities, but their ordering don't seem to carry through to the factor model. I'm not sure what that implies for the use with the block matrix. Code to demonstrate that while the blocks are sized roughly in agreement with what's given to pi_in or pi_out, they don't appear in the same order in z_in or z_out:

set.seed(32)

bm <- as.matrix(cbind(
  c(.3, .005, .005, .005, .005),
  c(.002, .3, .005, .005, .005),
  c(.002, .01, .3, .005, .005),
  c(.002, .01, .005, .2, .005),
  c(.002, .005, .005, .005, .2)
))

pi <- c(5, 50, 20, 25, 100)

sbm <- fastRG::directed_dcsbm(n = 200, 
                              B = bm,
                              pi_in = pi,
                              pi_out = pi,
                              expected_out_degree = 3,
                              allow_self_loops = FALSE,
                              sort_nodes = TRUE)

net <- fastRG::sample_igraph(sbm)

## the order of the blocks as given for the block probabilities don't align with the order of the block memberships in the factor model:
pi
table(sbm$z_in)

All of this means that I'm unsure of the block memberships of each node, and whether it's appropriately aligning with the block matrix given in input. (In a perfect world, the igraph object that's created would have vertex attributes that are the block memberships).

Ideally, I'd yet further be able to specify that z_in == z_out. Even if I specify both p_in and p_out, they don't align (clear with table(sbm$z_in, sbm$z_out)).

Let me know if these issues are unclear! Thanks again!

Compare SBM sampling with igraph in README

igraph: SBM sampling

Time complexity: O(|V|+|E|+K^2), where |V| is the number of

vertices, |E| is the number of edges, and K is the number of

groups.

Time complexity: O(nnm), where n is the number of vertices,

and m is the length of the latent vectors.

Blockmodels should graciously handle k = 1

library(fastRG)
#> Loading required package: Matrix
dcsbm(n = 500, k = 1, expected_density = 0.8)
#> Generating random degree heterogeneity parameters `theta` from a LogNormal(2, 1) distribution. This distribution may change in the future. Explicitly set `theta` for reproducible results.
#> Generating random mixing matrix `B` with independent Uniform(0, 1) entries. This distribution may change in the future. Explicitly set `B` for reproducible results.
#> Error in B[, order(pi)]: incorrect number of dimensions

^{Created on 2021-09-23 by the reprex package (v2.0.1.9000)}

Be consistent with Poisson graphs about using weighted graphs versus multigraphs in `igraph` and `tidygraph` samplers

Release fastRG 0.3.1

Prepare for release:

Submit to CRAN:

usethis::use_version('patch')
devtools::submit_cran()
Approve email

Wait for CRAN...

Test multi-graphness of igraph and tidygraph representations?

Add co-blockmodel

#' Sample a directed stochastic blockmodel
#'
#' @param n Number of nodes in graph.
#'
#' @param pi_in Block sampling proportions for incoming blocks. Must be
#'   positive, but do not need to sum to one, as they will be
#'   normalized internally. Note that `length(pi_in)` implicitly
#'   specifies the number of incoming blocks. The number of
#'   incoming and outgoing blocks need not be the same.
#'
#' @param pi_out Block sampling proportions for outgoing blocks. Must be
#'   positive, but do not need to sum to one, as they will be
#'   normalized internally. Note that `length(pi_out)` implicitly
#'   specifies the number of outgoing blocks. The number of
#'   incoming and outgoing blocks need not be the same.
#'
#' @param B A `length(pi_in)` by `length(pi_out)` matrix of block connection
#'   probabilities. `B[i, j]` contains the probability that a node in
#'   community `i` links to a node in community `j`. Does not need to
#'   be symmetric.
#'
#' @param sort_nodes Logical indicating whether or not to sort the nodes
#'   so that they are grouped by block. This incurs the the cost of a
#'   radix sort, which is linear in the number of nodes in the graph.
#'   Defaults to `FALSE`. Useful for plotting.
#'
#' @inheritDotParams fastRG
#'
#' @inherit fastRG params return
#'
#' @export
#' @seealso [fastRG()]
#' @family stochastic block models
#' @family directed graphs
#'
#' @details TODO: describe the generative model.
#'
#' @examples
#'
#' set.seed(27)
#'
#' n <- 1000
#' pi_in <- c(0.3, 0.7)
#' pi_out <- c(0.2, 0.6, 0.2)
#'
#' B <- rbind(
#'   c(0.7, 0.1, 0.3),
#'   c(0.1, 0.6, 0.2)
#' )
#'
#' A <- disbm(n, pi_in, pi_out, B)
#'
disbm <- function(n, pi_in, pi_out, B, avg_deg = NULL, poisson_edges = TRUE,
                  sort_nodes = FALSE, ...) {

  params <- disbm_params(
    n = n, pi_in = pi_in, pi_out = pi_out, B = B, avg_deg = avg_deg,
    poisson_edges = poisson_edges, sort_nodes = sort_nodes
  )

  # NOTE: avg_deg is null since we handled scaling B internally
  fastRG(params$X, params$S, params$Y, poisson_edges = poisson_edges,
         avg_deg = NULL, ...)
}

#' @export
disbm_params <- function(n, pi_in, pi_out, B, avg_deg = NULL,
                         poisson_edges = TRUE, sort_nodes = FALSE, ...) {

  if (any(pi_in < 0)) {
    stop("Elements of `pi_in` must be non-negative.", call. = FALSE)
  }

  if (any(pi_out < 0)) {
    stop("Elements of `pi_out` must be non-negative.", call. = FALSE)
  }

  pi_in <- pi_in / sum(pi_in)
  K_in <- length(pi_in)

  pi_out <- pi_out / sum(pi_out)
  K_out <- length(pi_out)

  if (K_in != nrow(B) || K_out != ncol(B)) {
    stop(
      "`B` must be a `length(pi_in)` x `length(pi_out)` matrix.",
      call. = FALSE
    )
  }

  # incoming block memberships
  z_in <- sample(K_in, n, replace = TRUE, prob = pi_in)
  z_out <- sample(K_out, n, replace = TRUE, prob = pi_out)

  z_in <- factor(z_in, levels = 1:K_in)
  z_out <- factor(z_out, levels = 1:K_out)

  # sort(z) orders the nodes so that all nodes in the first
  # block are together, nodes in the second block are all together, etc
  if (sort_nodes) {
    z_in <- sort(z_in)
    z_out <- sort(z_out)
  }

  X <- sparse.model.matrix(~ z_in + 0)
  Y <- sparse.model.matrix(~ z_out + 0)

  # now we handle avg_deg specially to make sure we don't get a matrix B
  # with probabilities outside of [0, 1]

  if (is.null(avg_deg)) {
    return(list(X = X, S = B, Y = Y))
  }

  # scale B just like in fastRG()
  B <- B * avg_deg / expected(X, B, Y)$degree

  if (!poisson_edges) {

    if (max(B) > 1)
      stop(
        "Expected edge values must be not exceed 1 for bernoulli graphs. ",
        "Either diminish `avg_deg` or set `poisson_edges = TRUE`.",
        call. = FALSE
      )

    # we're still sampling from a Poisson distribution, but B has been
    # specified as Bernoulli edges probabilities. convert these edges
    # probabilities such that we can feed them into a Poisson sampling
    # procedure

    B <- -log(1 - B)
  }

  list(X = X, S = B, Y = Y, z_in = z_in, z_out = z_out)
}

Automatic theta_out generation either not working or not informating user in directed_dcsbm

> model <- directed_dcsbm(n = n, B = B, pi_in = pi, pi_out = pi)
Generating random degree heterogeneity parameters `theta_in` from a LogNormal(2, 1) distribution. This distribution may change in the future. Explicitly set `theta_in` for reproducible results.

Add degree corrected co-blockmodel

new_directed_dcsbm <- function(
  X, S, Y,
  theta_in,
  theta_out,
  z_in,
  z_out,
  pi_in,
  pi_out,
  sorted,
  ...,
  subclass = character()) {
  subclass <- c(subclass, "directed_dcsbm")
  dcsbm <- directed_factor_model(X, S, Y, ..., subclass = subclass)
  dcsbm$theta_in <- theta_in
  dcsbm$theta_out <- theta_out
  dcsbm$z_in <- z_in
  dcsbm$z_out <- z_out
  dcsbm$pi_in <- pi_in
  dcsbm$pi_out <- pi_out
  dcsbm$sorted <- sorted
  dcsbm
}

validate_directed_dcsbm <- function(x) {

  values <- unclass(x)

  if (!is.factor(values$z_in)) {
    stop("`z_in` must be a factor.", call. = FALSE)
  }

  if (!is.factor(values$z_out)) {
    stop("`z_out` must be a factor.", call. = FALSE)
  }

  if (length(levels(values$z_in)) != values$k1) {
    stop(
      "Number of levels of `z1` must match the incoming rank of the model.",
      call. = FALSE
    )
  }

  if (length(levels(values$z_out)) != values$k2) {
    stop(
      "Number of levels of `z_out` must match the outgoing rank of the model.",
      call. = FALSE
    )
  }

  if (length(values$z_in) != nrow(values$X)) {
    stop("There must be one element of `z_in` for each row of `X`.")
  }

  if (length(values$z_out) != nrow(values$Y)) {
    stop("There must be one element of `z_out` for each row of `Y`.")
  }

  if (!is.numeric(values$theta_in)) {
    stop("`theta_in` must be a numeric.", call. = FALSE)
  }

  if (!is.numeric(values$theta_out)) {
    stop("`theta_out` must be a numeric.", call. = FALSE)
  }

  if (any(values$theta_in < 0)) {
    stop("Elements of `theta_in` must be strictly positive.", call. = FALSE)
  }

  if (any(values$theta_out < 0)) {
    stop("Elements of `theta_out` must be strictly positive.", call. = FALSE)
  }

  if (length(values$theta_in) != nrow(values$X)) {
    stop(
      "There must be one element of `theta_in` for each row of `X`.",
      call. = FALSE
    )
  }

  if (length(values$theta_out) != nrow(values$Y)) {
    stop(
      "There must be one element of `theta_out` for each row of `Y`.",
      call. = FALSE
    )
  }

  # TODO: check dimensions of pi_in and pi_out

  x
}

#' Create a directed degree corrected stochastic blockmodel object
#'
#' To specify a degree-corrected stochastic blockmodel, you must specify
#' the degree-heterogeneity parameters (via `n_in` or `theta_in`, and
#' `n_out` or `theta_out`), the mixing matrix
#' (via `k_in` and `k_out`, or `B`), and the relative block
#' probabilites (optional, via `p_in` and `pi_out`).
#' We provide sane defaults for most of these
#' options to enable rapid exploration, or you can invest the effort
#' for more control over the model parameters. We **strongly recommend**
#' setting the `expected_in_degree`, `expected_out_degree`,
#' or `expected_density` argument
#' to avoid large memory allocations associated with
#' sampling large, dense graphs.
#'
#' @param n_in (degree heterogeneity) The number of nodes in the blockmodel.
#'   Use when you don't want to specify the degree-heterogeneity
#'   parameters `theta` by hand. When `n` is specified, `theta`
#'   is randomly generated from a `LogNormal(2, 1)` distribution.
#'   This is subject to change, and may not be reproducible.
#'   `n` defaults to `NULL`. You must specify either `n`
#'   or `theta`, but not both.
#'
#' @param n_out (degree heterogeneity) The number of nodes in the blockmodel.
#'   Use when you don't want to specify the degree-heterogeneity
#'   parameters `theta` by hand. When `n` is specified, `theta`
#'   is randomly generated from a `LogNormal(2, 1)` distribution.
#'   This is subject to change, and may not be reproducible.
#'   `n` defaults to `NULL`. You must specify either `n`
#'   or `theta`, but not both.
#'
#' @param theta_in (degree heterogeneity) A numeric vector
#'   explicitly specifying the degree heterogeneity
#'   parameters. This implicitly determines the number of nodes
#'   in the resulting graph, i.e. it will have `length(theta)` nodes.
#'   Must be positive. Setting to a vector of ones recovers
#'   a stochastic blockmodel without degree correction.
#'   Defaults to `NULL`. You must specify either `n` or `theta`,
#'   but not both.
#'
#' @param theta_out (degree heterogeneity) A numeric vector
#'   explicitly specifying the degree heterogeneity
#'   parameters. This implicitly determines the number of nodes
#'   in the resulting graph, i.e. it will have `length(theta)` nodes.
#'   Must be positive. Setting to a vector of ones recovers
#'   a stochastic blockmodel without degree correction.
#'   Defaults to `NULL`. You must specify either `n` or `theta`,
#'   but not both.
#'
#' @param k_in (mixing matrix) The number of blocks in the blockmodel.
#'   Use when you don't want to specify the mixing-matrix by hand.
#'   When `k` is specified, the elements of `B` are drawn
#'   randomly from a `Uniform(0, 1)` distribution.
#'   This is subject to change, and may not be reproducible.
#'   `k` defaults to `NULL`. You must specify either `k`
#'   or `B`, but not both.
#'
#' @param k_out (mixing matrix) The number of blocks in the blockmodel.
#'   Use when you don't want to specify the mixing-matrix by hand.
#'   When `k` is specified, the elements of `B` are drawn
#'   randomly from a `Uniform(0, 1)` distribution.
#'   This is subject to change, and may not be reproducible.
#'   `k` defaults to `NULL`. You must specify either `k`
#'   or `B`, but not both.
#'
#' @param B (mixing matrix) A `k` by `k` matrix of block connection
#'   probabilities. The probability that a node in block `i` connects
#'   to a node in community `j` is `Poisson(B[i, j])`. Must be square
#'   a square matrix. `matrix` and `Matrix` objects are both
#'   acceptable. If `B` is not symmetric, it will be
#'   symmetrized via the update `B := B + t(B)`. Defaults to `NULL`.
#'   You must specify either `k` or `B`, but not both.
#'
#' @param pi_in (relative block probabilities) Relative block
#'   probabilities. Must be positive, but do not need to sum
#'   to one, as they will be normalized internally.
#'   Must match the dimensions of `B` or `k`. Defaults to
#'   `rep(1 / k, k)`, or a balanced blocks.
#'
#' @param pi_out (relative block probabilities) Relative block
#'   probabilities. Must be positive, but do not need to sum
#'   to one, as they will be normalized internally.
#'   Must match the dimensions of `B` or `k`. Defaults to
#'   `rep(1 / k, k)`, or a balanced blocks.
#'
#' @param sort_nodes Logical indicating whether or not to sort the nodes
#'   so that they are grouped by block. Useful for plotting.
#'   Defaults to `TRUE`.
#'
#' @inheritDotParams directed_factor_model expected_in_degree expected_density
#' @inheritDotParams directed_factor_model expected_out_degree
#'
#' @return A `directed_dcsbm` S3 object, a subclass of the
#'   [directed_factor_model()] with the following additional
#'   fields:
#'
#'   - `theta_in`: A numeric vector of incoming community
#'     degree-heterogeneity parameters.
#'
#'   - `theta_out`: A numeric vector of outgoing community
#'     degree-heterogeneity parameters.
#'
#'   - `z_in`: The incoming community memberships of each node,
#'     as a [factor()]. The factor will have `k_in` levels,
#'     where `k_in` is the number of incoming
#'     communities in the stochastic blockmodel. There will not
#'     always necessarily be observed nodes in each community.
#'
#'   - `z_out`: The outgoing community memberships of each node,
#'     as a [factor()]. The factor will have `k_out` levels,
#'     where `k_out` is the number of outgoing
#'     communities in the stochastic blockmodel. There will not
#'     always necessarily be observed nodes in each community.
#'
#'   - `pi_in`: Sampling probabilities for each incoming
#'     community.
#'
#'   - `pi_out`: Sampling probabilities for each outgoing
#'     community.
#'
#'   - `sorted`: Logical indicating where nodes are arranged by
#'     block (and additionally by degree heterogeneity parameter)
#'     within each block.
#'
#' @export
#' @family stochastic block models
#' @family directed graphs
#'
#' @details
#'
#' # Generative Model
#'
#' There are two levels of randomness in a degree-corrected
#' stochastic blockmodel. First, we randomly chosen a block
#' membership for each node in the blockmodel. This is
#' handled by `dcsbm()`. Then, given these block memberships,
#' we randomly sample edges between nodes. This second
#' operation is handled by [sample_edgelist()],
#' [sample_sparse()], [sample_igraph()] and
#' [sample_tidygraph()], depending your desirable
#' graph representation.
#'
#' ## Block memberships
#'
#' Let \eqn{z_i} represent the block membership of node \eqn{i}.
#' To generate \eqn{z_i} we sample from a categorical
#' distribution (note that this is a special case of a
#' multinomial) with parameter \eqn{\pi}, such that
#' \eqn{\pi_i} represents the probability of ending up in
#' the ith block. Block memberships for each node are independent.
#'
#' ## Degree heterogeneity
#'
#' In addition to block membership, the DCSBM also allows
#' nodes to have different propensities for edge formation.
#' We represent this propensity for node \eqn{i} by a positive
#' number \eqn{\theta_i}. Typically the \eqn{\theta_i} are
#' constrained to sum to one for identifiability purposes,
#' but this doesn't really matter during sampling (i.e.
#' without the sum constraint scaling \eqn{B} and \eqn{\theta}
#' has the same effect on edge probabilities, but whether
#' \eqn{B} or \eqn{\theta} is responsible for this change
#' is uncertain).
#'
#' ## Edge formulation
#'
#' Once we know the block memberships \eqn{z} and the degree
#' heterogeneity parameters \eqn{theta}, we need one more
#' ingredient, which is the baseline intensity of connections
#' between nodes in block `i` and block `j`. Then each edge
#' \eqn{A_{i,j}} is Poisson distributed with parameter
#'
#' \deqn{
#'   \lambda[i, j] = \theta_i \cdot B_{z_i, z_j} \cdot \theta_j.
#' }{
#'   \lambda_{i, j} = \theta[i] * B[z[i], z[j]] * \theta[j].
#' }
#'
#' @examples
#'
#' set.seed(27)
#'
#' dcsbm <- directed_dcsbm(
#'   n_in = 1000,
#'   k_in = 5,
#'   k_out = 8,
#'   expected_density = 0.01
#' )
#'
#' dcsbm
#'
#' population_svd <- svds(dcsbm)
#'
directed_dcsbm <- function(
  n_in = NULL, theta_in = NULL,
  n_out = NULL, theta_out = NULL,
  k_in = NULL, k_out = NULL, B = NULL,
  ...,
  pi_in = rep(1 / k_in, k_in),
  pi_out = rep(1 / k_out, k_out),
  sort_nodes = TRUE) {

  ### in degree heterogeneity parameters

  if (is.null(n_in) && is.null(theta_in)) {
    stop("Must specify either `n_in` or `theta_in`.", call. = FALSE)
  } else if (is.null(theta_in)) {

    if (n_in < 1) {
      stop("`n_in` must be a positive integer.", call. = FALSE)
    }

    message(
      "Generating random degree heterogeneity parameters `theta_in` from a ",
      "LogNormal(2, 1) distribution. This distribution may change ",
      "in the future. Explicitly set `theta_in` for reproducible results.\n"
    )

    theta_in <- stats::rlnorm(n_in, meanlog = 2, sdlog = 1)
  } else if (is.null(n_in)) {
    n_in <- length(theta)
  }

  ### out degree heterogeneity parameters

  if (is.null(n_out) && is.null(theta_out)) {
    stop("Must specify either `n_out` or `theta_out`.", call. = FALSE)
  } else if (is.null(theta_out)) {

    if (n_out < 1) {
      stop("`n_out` must be a positive integer.", call. = FALSE)
    }

    message(
      "Generating random degree heterogeneity parameters `theta_out` from a ",
      "LogNormal(2, 1) distribution. This distribution may change ",
      "in the future. Explicitly set `theta_out` for reproducible results.\n"
    )

    theta_out <- stats::rlnorm(n_out, meanlog = 2, sdlog = 1)
  } else if (is.null(n_out)) {
    n_out <- length(theta_out)
  }

  ### mixing matrix

  # NOTE: STOPPED HERE AHHHHHHH

  if (is.null(k) && is.null(B)) {
    stop("Must specify either `k` or `B`.", call. = FALSE)
  } else if (is.null(B)) {

    if (k < 1) {
      stop("`k` must be a positive integer.", call. = FALSE)
    }

    message(
      "Generating random mixing matrix `B` with independent ",
      "Uniform(0, 1) entries. This distribution may change ",
      "in the future. Explicitly set `B` for reproducible results."
    )

    B <- Matrix(data = stats::runif(k * k), nrow = k, ncol = k)

  } else if (is.null(k)) {

    if (nrow(B) != ncol(B)) {
      stop("`B` must be a square matrix.", call. = FALSE)
    }

    k <- nrow(B)
  }

  ### block membership

  if (length(pi) != nrow(B) || length(pi) != ncol(B)) {
    stop("Length of `pi` must match dimensions of `B`.", call. = FALSE)
  }

  if (any(pi < 0)) {
    stop("All elements of `pi` must be >= 0.", call. = FALSE)
  }

  # order mixing matrix by expected group size

  B <- B[order(pi), ]
  B <- B[, order(pi)]
  pi <- sort(pi / sum(pi))

  # sample block memberships

  z <- sample(k, n, replace = TRUE, prob = pi)
  z <- factor(z, levels = 1:k, labels = paste0("block", 1:k))

  if (sort_nodes) {
    z <- sort(z)
  }

  X <- sparse.model.matrix(~z + 0)

  if (sort_nodes) {

    # sort by degree within each block
    ct <- c(0, cumsum(table(z)))

    for (i in 1:k) {
      theta[(ct[i] + 1):ct[i + 1]] <- -sort(-theta[(ct[i] + 1):ct[i + 1]])
    }
  }

  X@x <- theta

  dcsbm <- new_directed_dcsbm(
    X = X,
    S = B,
    Y = Y,
    theta_in = theta_in,
    theta_out = theta_out,
    z_in = z_in,
    z_out = z_out,
    pi_in = pi_in,
    pi_out = pi_out,
    sorted = sort_nodes,
    ...
  )

  validate_directed_dcsbm(dcsbm)
}

#' @method print directed_dcsbm
#' @export
print.directed_dcsbm <- function(x, ...) {

  cat(glue("Directed Degree-Corrected Stochastic Blockmodel\n", .trim = FALSE))
  cat(glue("-----------------------------------------------\n\n", .trim = FALSE))

  sorted <- if (x$sorted) "arranged by block" else "not arranged by block"

  cat(glue("Nodes (n): {x$n} ({sorted})\n", .trim = FALSE))
  cat(glue("Blocks (k): {x$k}\n\n", .trim = FALSE))

  cat("Traditional DCSBM parameterization:\n\n")
  cat("Block memberships (z):", dim_and_class(x$z), "\n")
  cat("Degree heterogeneity (theta):", dim_and_class(x$theta), "\n")
  cat("Block probabilities (pi):", dim_and_class(x$pi), "\n\n")

  cat("Factor model parameterization:\n\n")
  cat("X:", dim_and_class(x$X), "\n")
  cat("S:", dim_and_class(x$S), "\n\n")

  cat(glue("Expected edges: {round(expected_edges(x))}\n", .trim = FALSE))
  cat(glue("Expected degree: {round(expected_degree(x), 1)}\n", .trim = FALSE))
  cat(glue("Expected density: {round(expected_density(x), 5)}", .trim = FALSE))
}

Release fastRG 0.3.0

First release:

usethis::use_cran_comments()
Proof read Title: and Description:
Check that all exported functions have @returns and @examples
Check that Authors@R: includes a copyright holder (role 'cph')
Check licensing of included files
Review https://github.com/DavisVaughan/extrachecks

Prepare for release:

devtools::build_readme()
urlchecker::url_check()
devtools::check(remote = TRUE, manual = TRUE)
devtools::check_win_devel()
rhub::check_for_cran()
Review pkgdown reference index for, e.g., missing topics
Draft blog post

Submit to CRAN:

usethis::use_version('minor')
devtools::submit_cran()
Approve email

Wait for CRAN...

Add degree-corrected mixed membership models

#' Sample a degree corrected mixed membership stochastic blockmodel graph
#'
#' @param alpha Community membership parameter. Node memberships will be drawn
#'   from a `Dirichlet(alpha)` distribution. `length(alpha)` implicitly
#'   specifies the number of communities.
#'
#' @inherit dcsbm params
#' @inheritDotParams fastRG
#' @inherit fastRG return
#'
#' @export
#' @seealso [fastRG()]
#' @family stochastic block models
#'
#' @details TODO: write the model out in detail here
#'
#'   to remove DC, set theta=rep(1,n)
#'   xi  = theta_i * z_i, where z_i ~ dirichlet(alpha)
#'   lambda_ij = xi' B xj
#'   probability of i connecting to j:  1 - exp(-lambda_ij)
#'
#' @examples
#'
#' set.seed(27)
#'
#' n <- 1000
#' k <- 5
#'
#' B <- matrix(runif(k * k), nrow = k, ncol = k) # mixing probabilities
#'
#' theta <- round(rlnorm(n, 1))  # degree parameter
#' alpha <- rep(1, k)            # equiprobable communities
#'
#' A <- dc_mixed(theta, alpha, B, avg_deg = 50)
#'
#' params <- dc_mixed_params(theta, alpha, B, avg_deg = 50)
#'
#' expected(params$X, params$S)
#'
dc_mixed <- function(theta, alpha, B, avg_deg = NULL,
                     poisson_edges = TRUE, sort_nodes = FALSE, ...) {

  params <- dc_mixed_params(
    theta = theta, alpha = alpha, B = B,
    avg_deg = avg_deg,
    poisson_edges = poisson_edges,
    sort_nodes = sort_nodes
  )

  # NOTE: avg_deg is null since we handled scaling B internally
  fastRG(params$X, params$S, poisson_edges = poisson_edges, avg_deg = NULL, ...)
}

#' @rdname dc_mixed
#' @export
dc_mixed_params <- function(theta, alpha, B, avg_deg = NULL,
                            poisson_edges = TRUE, sort_nodes = FALSE) {

  if (length(theta) == 1) {
    n <- theta
    theta <- rep(1, n)
  }

  if (length(theta) > 1)
    n <- length(theta)

  K <- length(alpha)

  if (K != nrow(B) || ncol(B) != nrow(B)) {
    stop("Both dimensions of B must match length of alpha", call. = FALSE)
  }

  X <- t(igraph::sample_dirichlet(n, alpha))

  if (sort_nodes)
    X <- X[order(X %*% (1:K)), ]

  Theta <- Diagonal(n, theta)
  X <- Theta %*% X

  if (is.null(avg_deg)) {
    return(list(X = X, S = B, Y = X))
  }

  # scale B just like in fastRG()
  B <- B * avg_deg / expected(X, B)$degree

  if (!poisson_edges) {

    if (max(B) > 1)
      stop(
        "Expected edge values must be not exceed 1 for bernoulli graphs. ",
        "Either diminish `avg_deg` or set `poisson_edges = TRUE`.",
        call. = FALSE
      )

    # we're still sampling from a Poisson distribution, but B has been
    # specified as Bernoulli edges probabilities. convert these edges
    # probabilities such that we can feed them into a Poisson sampling
    # procedure

    B <- -log(1 - B)
  }

  # TODO: return latent communities
  list(X = X, S = B, Y = X)
}

A trick to compute the svd of the population regularized Laplacian with a single extra line of code

library(fastRG)
#> Loading required package: Matrix

# construct any undirected factor model

k <- 5
n <- 1000
B <- matrix(stats::runif(k * k), nrow = k, ncol = k)

theta <- round(stats::rlnorm(n, 2))

pi <- c(1, 2, 4, 1, 1)

mod <- dcsbm(
  theta = theta,
  B = B,
  pi = pi,
  expected_degree = 50
)

# now suppose we want the population svds of E[A], E[L], and E[L_tau]

# we already have a method the gives us the svds of E[A] with high computational
# efficiency, using the low rank structure in E[A]
s_EA <- svds(mod)

# that is, we already
# have code that computes the svds of E[A] = XSX'. we know that
# L_tau = D_tau A D_tau, and thus we have (roughly)
# E[L_tau] = E[D_tau] E[A] E[D_tau] = E[D_tau] XSX' E[D_tau]
# so we're gonna construct a new object with X_new = E[D_tau] X and run
# the old svd code on it

# we don't actually need to form the pop_D_tau matrix explicitly
# because rowScale does this for us, but here's what that would look like

# pop_degs <- expected_degrees(mod)
# pop_D_tau <- Diagonal(n = mod$n, x = 1 / sqrt(pop_degs + tau))
# X_new <- rowScale(mod$X, 1 / sqrt(pop_degs + tau))

# the very concise version of this
tau <- 10
mod$X <- rowScale(mod$X, 1 / sqrt(expected_degrees(mod) + tau))

# getting population eigenvalues of E[L_tau] only takes a single additional line
# of code
s_EL <- svds(mod)

# NOTE! Do not call `sample_sparse(mod)` anymore, it won't sample from the model
# you expect

# quick check that the SVDs are different
s_EL$d - s_EA$d
#> [1] -114.024473  -27.336110  -10.149304   -5.945424   -4.218959

^{Created on 2023-07-21 with reprex v2.0.2}

Release fastRG 0.3.2

Prepare for release:

Submit to CRAN:

usethis::use_version('patch')
devtools::submit_cran()
Approve email

Wait for CRAN...

Add directed planted partition

#' Sample a directed, balanced planted partition model
#'
#' @param n Number of nodes in graph.
#'
#' @param k Number of incoming (and outgoing) planted partitions.
#'   There will be `k` incoming and `k` outgoing communities. In the future,
#'   we may allow different numbers of incoming and outgoing communities
#'   provided there is interest. Should be an integer.
#'
#' @param within_block Probability of within block edges. Must be
#'   strictly between zero and one. Must specify either
#'   `within_block` and `between_block`, or `a` and `b` to determine
#'   edge probabilities.
#'
#' @param between_block Probability of between block edges. Must be
#'   strictly between zero and one. Must specify either
#'   `within_block` and `between_block`, or `a` and `b` to determine
#'   edge probabilities.
#'
#' @param a Integer such that `a/n` is the probability of edges
#'   within a block. Useful for sparse graphs. Must specify either
#'   `within_block` and `between_block`, or `a` and `b` to determine
#'   edge probabilities.
#'
#' @param b Integer such that `b/n` is the probability of edges
#'   between blocks. Useful for sparse graphs. Must specify either
#'   `within_block` and `between_block`, or `a` and `b` to determine
#'   edge probabilities.
#'
#' @param sort_nodes Logical indicating whether or not to sort the nodes
#'   so that they are grouped by block. This incurs the the cost of a
#'   radix sort, which is linear in the number of nodes in the graph.
#'   Defaults to `FALSE`. Useful for plotting.
#'
#' @inheritDotParams fastRG
#'
#' @inherit fastRG params return
#'
#' @export
#' @seealso [fastRG()]
#' @family stochastic block models
#' @family bernoulli graphs
#' @family directed graphs
#'
#' @details TODO: describe the generative model.
#'
#' @examples
#'
#' set.seed(27)
#'
#' A <- di_planted_partition(
#'   n = 1000,
#'   k = 3,
#'   within_block = 0.8,
#'   between_block = 0.2
#' )
#'
#' B <- di_planted_partition(
#'   n = 1000,
#'   k = 3,
#'   a = 10,
#'   b = 4
#' )
#'
di_planted_partition <- function(n, k, within_block = NULL,
                                 between_block = NULL, a = NULL, b = NULL,
                                 sort_nodes = FALSE, ...) {

  params <- di_planted_partition_params(
    n = n, k = k, a = a, b = b, within_block = within_block,
    between_block = between_block, sort_nodes = sort_nodes
  )

  # NOTE: avg_deg is null since we handled scaling B internally
  fastRG(
    params$X, params$S, params$Y,
    poisson_edges = FALSE,
    directed = TRUE,
    ...
  )
}

#' @rdname planted_partition
#' @export
di_planted_partition_params <- function(n, k, within_block = NULL,
                                        between_block = NULL, a = NULL,
                                        b = NULL, sort_nodes = FALSE, ...) {

  if (k > n) {
    stop("`k` must be less than or equal to `n`.", call. = FALSE)
  }

  if (!is.null(within_block) && length(within_block) != 1) {
    stop("`within_block` must be a scalar.", call. = FALSE)
  }

  if (!is.null(between_block) && length(between_block) != 1) {
    stop("`between_block` must be a scalar.", call. = FALSE)
  }

  if (!is.null(within_block) && (within_block < 0 || within_block > 1)) {
    stop("`within_block` must be between zero and one.", call. = FALSE)
  }

  if (!is.null(between_block) && (between_block < 0 || between_block > 1)) {
    stop("`between_block` must be between zero and one.", call. = FALSE)
  }

  if (!is.null(a) && a < 0) {
    stop("`a` must be greater than zero.", call. = FALSE)
  }

  if (!is.null(b) && b < 0) {
    stop("`b` must be greater than zero.", call. = FALSE)
  }

  # TODO: more input validation on a and b

  if (is.null(within_block)) {
    within_block <- a / n
  }

  if (is.null(between_block)) {
    between_block <- b / n
  }

  # just a special case of the SBM, so leverage that infrastructure
  pi <- rep(1 / k, k)

  B <- matrix(between_block, nrow = k, ncol = k)
  diag(B) <- within_block

  disbm_params(n, pi, pi, B, poisson_edges = FALSE, sort_nodes = sort_nodes)
}

rohelab / fastrg Goto Github PK

fastrg's Issues

Some blockmodel examples

Recommend Projects

Recommend Topics

Recommend Org