diff --git a/lbm_full_mat.R b/lbm_full_mat.R new file mode 100644 index 0000000..1496433 --- /dev/null +++ b/lbm_full_mat.R @@ -0,0 +1,80 @@ +library(sbm) +library(ggplot2) +library(readr) + +source("load-full.R") + +sdCol <- function(matrix) { + sapply(seq_len(ncol(matrix)), function(col) sd(matrix[, col])) +} + +net_table <- otu_table(cpd_phyloseq) +rm(cpd_phyloseq) +write.csv(net_table, "data/otu_matrix.csv") +zip("data/otu_matrix.csv.zip", "data/otu_matrix.csv") +unlink("data/otu_matrix.csv") +# Ne tourne pas, probablement gros calculs matriciel +# res <- estimateBipartiteSBM(netMat = net_table, dimLabels = c("OTU", "Sample"), model = "poisson") + +nb_inter <- sum(net_table > 0) +all_possible_inter <- nrow(net_table) * ncol(net_table) +(density <- nb_inter / all_possible_inter) + +t_net_table <- t(net_table) +std_otu <- sdCol(t_net_table) +mean_otu <- rowMeans(net_table) + +stats_df <- data.frame(otu = rownames(net_table), mean = mean_otu, std = std_otu, var = std_otu^2) +ggplot(stats_df, aes(y = otu, x = mean)) + + geom_point() + + geom_errorbarh(aes(xmin = mean - std, xmax = mean + std), orientation = "y") + + theme( + axis.title.y = element_blank(), + axis.text.y = element_blank(), + axis.ticks.y = element_blank() + ) + +ggplot(stats_df, aes(x = mean, y = var)) + + geom_line() + + geom_point() + + geom_abline(slope = 1, color = "red") + + scale_y_log10() + + scale_x_log10() + + ggtitle("OTU for log(mean) = log(var)") + +ggplot(stats_df, aes(x = mean, y = var / mean)) + + geom_line() + + geom_point() + + geom_hline(yintercept = 1, color = "red") + + ggtitle("OTU for f(mean) = var/mean = 1") + +ggplot(stats_df, aes(x = 1 / mean, y = var / mean^2)) + + geom_line() + + geom_point() + + geom_abline(slope = 1, color = "red") + + scale_y_log10() + + scale_x_log10() + + ggtitle("OTU for log(1/mean) = log(var/mean^2)") + +lambdas <- seq(1, 100) +realization_pois <- sapply(lambdas, rpois, n = 1000) +r_pois_df <- data.frame(mean = colMeans(realization_pois), var = sdCol(realization_pois)^2) + +ggplot(r_pois_df, aes(x = mean, y = var)) + + geom_line() + + geom_point() + + geom_abline(slope = 1, color = "red") + + ggtitle("Poisson for mean = var") + +ggplot(r_pois_df, aes(x = mean, y = var / mean)) + + geom_line() + + geom_point() + + geom_hline(yintercept = 1, color = "red") + + ggtitle("Poisson for f(mean) = var/mean = 1") + + +ggplot(r_pois_df, aes(x = 1 / mean, y = var / mean^2)) + + geom_line() + + geom_point() + + geom_abline(slope = 1, color = "red") + + ggtitle("Poisson for 1/lambda = var/mean^2")