human-microbiome-compendium/multipartite_fungustree.R

185 lines
7.2 KiB
R

# library(GREMLINS) # Load the custom GREMLINS
devtools::load_all("../GREMLINS/")
# library(sbm)
devtools::load_all("../sbm/")
data(fungusTreeNetwork)
tree_genetic_dist_matrix <- fungusTreeNetwork$covar_tree$genetic_dist
tree_taxonomic_dist_matrix <- (fungusTreeNetwork$covar_tree$taxonomic_dist)
tree_genet_sbm <- estimateSimpleSBM(netMat = tree_genetic_dist_matrix, model = "gaussian", estimOptions = list(plot = FALSE))
plot(tree_genet_sbm)
tree_taxonomic_sbm <- estimateSimpleSBM(netMat = tree_taxonomic_dist_matrix, model = "gaussian", estimOptions = list(plot = FALSE))
plot(tree_taxonomic_sbm)
tree_geo_sbm <- estimateSimpleSBM(netMat = fungusTreeNetwork$covar_tree$geographic_dist, model = "gaussian", estimOptions = list(plot = FALSE))
plot(tree_geo_sbm)
fungus_tree_sbm <- estimateBipartiteSBM(netMat = fungusTreeNetwork$fungus_tree, model = "bernoulli", estimOptions = list(plot = FALSE))
Tree_Geo_dist <- defineNetwork(fungusTreeNetwork$covar_tree$geographic_dist, type = "adj", "Tree", "Tree")
Tree_Genet_dist <- defineNetwork(fungusTreeNetwork$covar_tree$genetic_dist, type = "adj", "Tree", "Tree")
FungusTree_Count <- defineNetwork(fungusTreeNetwork$fungus_tree, type = "inc", "Fungus", "Tree")
list_Net <- list(Tree_Geo_dist, FungusTree_Count)
v_distrib <- c("gaussian", "bernoulli")
namesFG <- c("Tree", "Fungus")
givenclassif <- list(tree_geo_sbm$memberships, fungus_tree_sbm$memberships[["row"]])
# geo_multi_noinit <- multipartiteBM(list_Net = list_Net, v_distrib = v_distrib, namesFG = namesFG, givenclassif = NULL, initBM = TRUE, verbose = TRUE)
Tree_Geo_dist_sbm <- defineSBM(-fungusTreeNetwork$covar_tree$geographic_dist, model = "gaussian", type = "simple", dimLabels = c("Tree"))
Tree_Taxo_dist_sbm <- defineSBM(-fungusTreeNetwork$covar_tree$taxonomic_dist, model = "gaussian", type = "simple", dimLabels = c("Tree"))
Tree_Genetic_dist_sbm <- defineSBM(-fungusTreeNetwork$covar_tree$genetic_dist, model = "gaussian", type = "simple", dimLabels = c("Tree"))
FungusTree_sbm <- defineSBM(fungusTreeNetwork$fungus_tree, dimLabels = c("Fungus", "Tree"), model = "bernoulli", type = "bipartite")
# geo_multi_noinit_sbm <- estimateMultipartiteSBM(list(Tree_Geo_dist_sbm, FungusTree_sbm), estimOptions = list(initBM = TRUE))
# geo_multi_noinit_nobm <- multipartiteBM(list_Net = list_Net, v_distrib = v_distrib, namesFG = namesFG, givenclassif = NULL, initBM = FALSE, verbose = TRUE)
# # Les kmins donnent le même résultat ICL -449.21
# geo_multi_noinit_nobm_kmin <- multipartiteBM(list_Net = list_Net, v_distrib = v_distrib, namesFG = namesFG, givenclassif = NULL, initBM = FALSE, verbose = TRUE, v_Kmin = c(7, 2))
# geo_multi_noinit_kmin <- multipartiteBM(list_Net = list_Net, v_distrib = v_distrib, namesFG = namesFG, givenclassif = NULL, initBM = TRUE, verbose = TRUE, v_Kmin = c(7, 2))
# geo_multi_init_kmin <- multipartiteBM(list_Net = list_Net, v_distrib = v_distrib, namesFG = namesFG, givenclassif = givenclassif, v_Kmin = c(7, 2)) # Ne s'autorise pas à descendre
# ##
# # Le meilleur ICL est ici : -408.05 avec une classif de base issue de blockmodels indépendants
# geo_multi_init <- multipartiteBM(list_Net = list_Net, v_distrib = v_distrib, namesFG = namesFG, givenclassif = givenclassif)
# library(knitr)
# kable(data.frame("Given classif" = c(FALSE, FALSE, FALSE, FALSE, TRUE, TRUE), "InitBM" = c(TRUE, FALSE, FALSE, TRUE, TRUE, TRUE), "Kmin given" = c(FALSE, FALSE, TRUE, TRUE, TRUE, FALSE), ICL = c(
# geo_multi_noinit$fittedModel[[1]]$ICL,
# geo_multi_noinit_nobm$fittedModel[[1]]$ICL,
# geo_multi_noinit_nobm_kmin$fittedModel[[1]]$ICL,
# geo_multi_noinit_kmin$fittedModel[[1]]$ICL,
# geo_multi_init_kmin$fittedModel[[1]]$ICL,
# geo_multi_init$fittedModel[[1]]$ICL
# )), format = "markdown")
conditions <- expand.grid(given = c(TRUE, FALSE), initBM = c(TRUE, FALSE), Kmin = c(TRUE, FALSE), rep = seq(3))
library(future.apply)
library(future.callr)
plan("multisession")
results_geo_df <- do.call("rbind", future_lapply(seq_len(nrow(conditions)), function(row_idx) {
initBM <- conditions[row_idx, "initBM"]
is_given <- conditions[row_idx, "given"]
is_Kmin <- conditions[row_idx, "Kmin"]
rep <- conditions[row_idx, "rep"]
if (is_given) {
the_givenclassif <- givenclassif
} else {
the_givenclassif <- NULL
}
if (is_Kmin) {
the_Kmin <- list(seq(7, 10), seq(2, 10))
} else {
the_Kmin <- list(seq(10), seq(10))
}
names(the_Kmin) <- c("Tree", "Fungus")
fit <- estimateMultipartiteSBM(
listSBM = list(Tree_Geo_dist_sbm, FungusTree_sbm),
estimOptions = list(
"initBM" = initBM,
"givenclassif" = the_givenclassif,
"nbBlocksRange" = the_Kmin,
maxVEM = 1000,
maxVE = 1000
)
)
out <- data.frame(
InitBM = initBM,
Kmin = is_Kmin,
GivenClassif = is_given,
rep = rep,
ICL = fit$ICL,
nbBlocksTree = fit$nbBlocks["Tree"], nbBlocksFungus = fit$nbBlocks["Fungus"]
)
}, future.seed = TRUE))
# future_
results_taxo_df <- do.call("rbind", lapply(seq_len(nrow(conditions)), function(row_idx) {
initBM <- conditions[row_idx, "initBM"]
is_given <- conditions[row_idx, "given"]
is_Kmin <- conditions[row_idx, "Kmin"]
rep <- conditions[row_idx, "rep"]
if (is_given) {
the_givenclassif <- givenclassif
} else {
the_givenclassif <- NULL
}
if (is_Kmin) {
the_Kmin <- list(seq(1, 10), seq(1, 10))
} else {
the_Kmin <- list(seq(10), seq(10))
}
names(the_Kmin) <- c("Tree", "Fungus")
fit <- estimateMultipartiteSBM(
listSBM = list(Tree_Taxo_dist_sbm, FungusTree_sbm),
estimOptions = list(
"initBM" = initBM,
"givenclassif" = the_givenclassif,
"nbBlocksRange" = the_Kmin,
maxVEM = 100000,
maxVE = 100000
)
)
out <- data.frame(
InitBM = initBM,
Kmin = is_Kmin,
GivenClassif = is_given,
rep = rep,
ICL = fit$ICL,
nbBlocksTree = fit$nbBlocks["Tree"], nbBlocksFungus = fit$nbBlocks["Fungus"]
)
})) # , future.seed = TRUE))
results_genetic_df <- do.call("rbind", lapply(seq_len(nrow(conditions)), function(row_idx) {
initBM <- conditions[row_idx, "initBM"]
is_given <- conditions[row_idx, "given"]
is_Kmin <- conditions[row_idx, "Kmin"]
rep <- conditions[row_idx, "rep"]
if (is_given) {
the_givenclassif <- givenclassif
} else {
the_givenclassif <- NULL
}
if (is_Kmin) {
the_Kmin <- list(seq(4, 10), seq(2, 10))
} else {
the_Kmin <- list(seq(10), seq(10))
}
names(the_Kmin) <- c("Tree", "Fungus")
fit <- estimateMultipartiteSBM(
listSBM = list(Tree_Genetic_dist_sbm, FungusTree_sbm),
estimOptions = list(
"initBM" = initBM,
"givenclassif" = the_givenclassif,
"nbBlocksRange" = the_Kmin,
maxVEM = 100000,
maxVE = 100000
)
)
out <- data.frame(
InitBM = initBM,
Kmin = is_Kmin,
GivenClassif = is_given,
rep = rep,
ICL = fit$ICL,
nbBlocksTree = fit$nbBlocks["Tree"], nbBlocksFungus = fit$nbBlocks["Fungus"]
)
})) # , future.seed = TRUE))