library(phyloseq) library(ggplot2) library(sbm) library(biomformat) source("utils.R") # data("enterotype") # the_data <- enterotype # data("mach") the_data <- import_biom("data/chaillou/chaillou.biom") # the_data <- import_biom("data/mach/kinetic.biom") # the_data <- import_biom("data/ravel/ravel.biom") net_tables <- collapse_otu_at_taxo(the_data) net_table <- as.matrix(net_tables[[3]]) dim(net_table) nb_inter <- sum(net_table > 0) all_possible_inter <- nrow(net_table) * ncol(net_table) (density <- nb_inter / all_possible_inter) res <- estimateBipartiteSBM(net_table, dimLabels = c("OTU", "Sample"), model = "poisson", estimOptions = list(plot = 0)) plot(res, type = "data") sdCol <- function(matrix) { sapply(seq_len(ncol(matrix)), function(col) sd(matrix[, col])) } t_net_table <- t(net_table) std_otu <- sdCol(t_net_table) mean_otu <- rowMeans(net_table) stats_df <- data.frame(otu = rownames(net_table), mean = mean_otu, std = std_otu, var = std_otu^2) ggplot(stats_df, aes(x = mean, y = var)) + geom_line() + geom_point() + geom_abline(slope = 1, color = "red") + scale_y_log10() + scale_x_log10() + ggtitle("OTU for log(mean) = log(var)")