Compare commits

..

7 commits

Author SHA1 Message Date
Louis
d618e1b5d7 Add sge 2026-02-23 14:04:24 +01:00
Louis
3a1c84443c BM correct sampling 2026-02-23 14:01:35 +01:00
Louis
be7453fb9d For merging lbm-seq results 2026-02-23 11:17:58 +01:00
Louis
e7f41d56f7 Renaming biom data 2026-02-23 11:17:41 +01:00
Louis
97b4a43597 Usage of Blockmodels with poisson covariates to correct sequencing effort 2026-02-19 10:40:01 +01:00
Louis
6fa32f2157 Multipartite fungus tree with all dist matrix 2026-02-19 10:39:36 +01:00
Louis
65e305e297 Update stats panels with sequencing effort 2026-02-19 10:39:15 +01:00
6 changed files with 278 additions and 22 deletions

View file

@ -0,0 +1,45 @@
library(blockmodels)
library(phyloseq)
library(biomformat)
library(here)
source("utils.R")
options(parallelly.availableCores.custom = function() {
ncores <- min(parallelly::availableCores(), 64)
max(1L, ncores)
})
message(paste("Number of cores available:", parallelly::availableCores()))
args <- commandArgs(trailingOnly = TRUE)
if (length(args) == 0) {
author <- "mach"
} else {
author <- args[1]
}
message("For author: ", author)
the_data <- import_biom(here("data", author, paste0(author, ".biom")))
epoch <- as.integer(Sys.time())
level <- 2
message("At level: ", level, " starting at: ", epoch)
save_path <- here("results", "seq-effort", paste0(author, "_", level, "_", epoch, ".Rds"))
per_taxa_networks <- collapse_otu_at_taxo(the_data)
covar_mat <- t(colSums(per_taxa_networks[[level]]) %*% t(rep(1, nrow(per_taxa_networks[[level]]))))
lcovar_mat <- log(covar_mat)
fitted_model <- BM_poisson_covariates("LBM",
adj = per_taxa_networks[[level]],
covariates = list(lcovar_mat),
autosave = save_path,
ncores = parallelly::availableCores()
)
fitted_model$estimate()
fitted_model$model_parameters[[which.max(fitted_model$ICL)]]
message("Saving the model results.")
saveRDS(fitted_model, save_path)

20
merge-lbm-seq.R Normal file
View file

@ -0,0 +1,20 @@
library(stringr)
args <- commandArgs(trailingOnly = TRUE)
path <- args[1]
mode <- args[2]
base_folder <- str_remove(string = path, pattern = "/tmp[0-9]*$")
print(base_folder)
epoch <- str_extract(string = path, pattern = "(?<=tmp)([0-9]*)")
flist <- list.files(path, pattern = paste0(mode, ".Rds"), full.names = TRUE)
stopifnot("No files corresponding to mode found." = length(flist) > 0)
merged_res <- lapply(flist, readRDS)
names(merged_res) <- paste0("Rank", seq_along(flist) + 1)
saveRDS(merged_res, file.path(base_folder, paste0(mode, "_", epoch, ".Rds")))

View file

@ -1,6 +1,7 @@
# library(GREMLINS) # Load the custom GREMLINS
devtools::load_all("../GREMLINS/")
library(sbm)
# library(sbm)
devtools::load_all("../sbm/")
data(fungusTreeNetwork)
tree_genetic_dist_matrix <- fungusTreeNetwork$covar_tree$genetic_dist
@ -26,34 +27,159 @@ v_distrib <- c("gaussian", "bernoulli")
namesFG <- c("Tree", "Fungus")
givenclassif <- list(tree_geo_sbm$memberships, fungus_tree_sbm$memberships[["row"]])
geo_multi_noinit <- multipartiteBM(list_Net = list_Net, v_distrib = v_distrib, namesFG = namesFG, givenclassif = NULL, initBM = TRUE, verbose = TRUE)
# geo_multi_noinit <- multipartiteBM(list_Net = list_Net, v_distrib = v_distrib, namesFG = namesFG, givenclassif = NULL, initBM = TRUE, verbose = TRUE)
Tree_Geo_dist_sbm <- defineSBM(-fungusTreeNetwork$covar_tree$geographic_dist, model = "gaussian", type = "simple", dimLabels = c("Tree"))
Tree_Taxo_dist_sbm <- defineSBM(-fungusTreeNetwork$covar_tree$taxonomic_dist, model = "gaussian", type = "simple", dimLabels = c("Tree"))
Tree_Genetic_dist_sbm <- defineSBM(fungusTreeNetwork$covar_tree$genetic_dist, model = "gaussian", type = "simple", dimLabels = c("Tree"))
Tree_Genetic_dist_sbm <- defineSBM(-fungusTreeNetwork$covar_tree$genetic_dist, model = "gaussian", type = "simple", dimLabels = c("Tree"))
FungusTree_sbm <- defineSBM(fungusTreeNetwork$fungus_tree, dimLabels = c("Fungus", "Tree"), model = "bernoulli", type = "bipartite")
geo_multi_noinit_sbm <- estimateMultipartiteSBM(list(Tree_Geo_dist_sbm, FungusTree_sbm), estimOptions = list(initBM = TRUE, ))
# geo_multi_noinit_sbm <- estimateMultipartiteSBM(list(Tree_Geo_dist_sbm, FungusTree_sbm), estimOptions = list(initBM = TRUE))
geo_multi_noinit_nobm <- multipartiteBM(list_Net = list_Net, v_distrib = v_distrib, namesFG = namesFG, givenclassif = NULL, initBM = FALSE, verbose = TRUE)
# Les kmins donnent le même résultat ICL -449.21
geo_multi_noinit_nobm_kmin <- multipartiteBM(list_Net = list_Net, v_distrib = v_distrib, namesFG = namesFG, givenclassif = NULL, initBM = FALSE, verbose = TRUE, v_Kmin = c(7, 2))
geo_multi_noinit_kmin <- multipartiteBM(list_Net = list_Net, v_distrib = v_distrib, namesFG = namesFG, givenclassif = NULL, initBM = TRUE, verbose = TRUE, v_Kmin = c(7, 2))
geo_multi_init_kmin <- multipartiteBM(list_Net = list_Net, v_distrib = v_distrib, namesFG = namesFG, givenclassif = givenclassif, v_Kmin = c(7, 2)) # Ne s'autorise pas à descendre
##
# geo_multi_noinit_nobm <- multipartiteBM(list_Net = list_Net, v_distrib = v_distrib, namesFG = namesFG, givenclassif = NULL, initBM = FALSE, verbose = TRUE)
# # Les kmins donnent le même résultat ICL -449.21
# geo_multi_noinit_nobm_kmin <- multipartiteBM(list_Net = list_Net, v_distrib = v_distrib, namesFG = namesFG, givenclassif = NULL, initBM = FALSE, verbose = TRUE, v_Kmin = c(7, 2))
# geo_multi_noinit_kmin <- multipartiteBM(list_Net = list_Net, v_distrib = v_distrib, namesFG = namesFG, givenclassif = NULL, initBM = TRUE, verbose = TRUE, v_Kmin = c(7, 2))
# geo_multi_init_kmin <- multipartiteBM(list_Net = list_Net, v_distrib = v_distrib, namesFG = namesFG, givenclassif = givenclassif, v_Kmin = c(7, 2)) # Ne s'autorise pas à descendre
# ##
# Le meilleur ICL est ici : -408.05 avec une classif de base issue de blockmodels indépendants
geo_multi_init <- multipartiteBM(list_Net = list_Net, v_distrib = v_distrib, namesFG = namesFG, givenclassif = givenclassif)
# # Le meilleur ICL est ici : -408.05 avec une classif de base issue de blockmodels indépendants
# geo_multi_init <- multipartiteBM(list_Net = list_Net, v_distrib = v_distrib, namesFG = namesFG, givenclassif = givenclassif)
library(knitr)
# library(knitr)
kable(data.frame("Given classif" = c(FALSE, FALSE, FALSE, FALSE, TRUE, TRUE), "InitBM" = c(TRUE, FALSE, FALSE, TRUE, TRUE, TRUE), "Kmin given" = c(FALSE, FALSE, TRUE, TRUE, TRUE, FALSE), ICL = c(
geo_multi_noinit$fittedModel[[1]]$ICL,
geo_multi_noinit_nobm$fittedModel[[1]]$ICL,
geo_multi_noinit_nobm_kmin$fittedModel[[1]]$ICL,
geo_multi_noinit_kmin$fittedModel[[1]]$ICL,
geo_multi_init_kmin$fittedModel[[1]]$ICL,
geo_multi_init$fittedModel[[1]]$ICL
)), format = "markdown")
# kable(data.frame("Given classif" = c(FALSE, FALSE, FALSE, FALSE, TRUE, TRUE), "InitBM" = c(TRUE, FALSE, FALSE, TRUE, TRUE, TRUE), "Kmin given" = c(FALSE, FALSE, TRUE, TRUE, TRUE, FALSE), ICL = c(
# geo_multi_noinit$fittedModel[[1]]$ICL,
# geo_multi_noinit_nobm$fittedModel[[1]]$ICL,
# geo_multi_noinit_nobm_kmin$fittedModel[[1]]$ICL,
# geo_multi_noinit_kmin$fittedModel[[1]]$ICL,
# geo_multi_init_kmin$fittedModel[[1]]$ICL,
# geo_multi_init$fittedModel[[1]]$ICL
# )), format = "markdown")
dataR6 <- formattingData(list_Net, v_distrib)
conditions <- expand.grid(given = c(TRUE, FALSE), initBM = c(TRUE, FALSE), Kmin = c(TRUE, FALSE), rep = seq(3))
library(future.apply)
library(future.callr)
plan("multisession")
results_geo_df <- do.call("rbind", future_lapply(seq_len(nrow(conditions)), function(row_idx) {
initBM <- conditions[row_idx, "initBM"]
is_given <- conditions[row_idx, "given"]
is_Kmin <- conditions[row_idx, "Kmin"]
rep <- conditions[row_idx, "rep"]
if (is_given) {
the_givenclassif <- givenclassif
} else {
the_givenclassif <- NULL
}
if (is_Kmin) {
the_Kmin <- list(seq(7, 10), seq(2, 10))
} else {
the_Kmin <- list(seq(10), seq(10))
}
names(the_Kmin) <- c("Tree", "Fungus")
fit <- estimateMultipartiteSBM(
listSBM = list(Tree_Geo_dist_sbm, FungusTree_sbm),
estimOptions = list(
"initBM" = initBM,
"givenclassif" = the_givenclassif,
"nbBlocksRange" = the_Kmin,
maxVEM = 1000,
maxVE = 1000
)
)
out <- data.frame(
InitBM = initBM,
Kmin = is_Kmin,
GivenClassif = is_given,
rep = rep,
ICL = fit$ICL,
nbBlocksTree = fit$nbBlocks["Tree"], nbBlocksFungus = fit$nbBlocks["Fungus"]
)
}, future.seed = TRUE))
# future_
results_taxo_df <- do.call("rbind", lapply(seq_len(nrow(conditions)), function(row_idx) {
initBM <- conditions[row_idx, "initBM"]
is_given <- conditions[row_idx, "given"]
is_Kmin <- conditions[row_idx, "Kmin"]
rep <- conditions[row_idx, "rep"]
if (is_given) {
the_givenclassif <- givenclassif
} else {
the_givenclassif <- NULL
}
if (is_Kmin) {
the_Kmin <- list(seq(1, 10), seq(1, 10))
} else {
the_Kmin <- list(seq(10), seq(10))
}
names(the_Kmin) <- c("Tree", "Fungus")
fit <- estimateMultipartiteSBM(
listSBM = list(Tree_Taxo_dist_sbm, FungusTree_sbm),
estimOptions = list(
"initBM" = initBM,
"givenclassif" = the_givenclassif,
"nbBlocksRange" = the_Kmin,
maxVEM = 100000,
maxVE = 100000
)
)
out <- data.frame(
InitBM = initBM,
Kmin = is_Kmin,
GivenClassif = is_given,
rep = rep,
ICL = fit$ICL,
nbBlocksTree = fit$nbBlocks["Tree"], nbBlocksFungus = fit$nbBlocks["Fungus"]
)
})) # , future.seed = TRUE))
results_genetic_df <- do.call("rbind", lapply(seq_len(nrow(conditions)), function(row_idx) {
initBM <- conditions[row_idx, "initBM"]
is_given <- conditions[row_idx, "given"]
is_Kmin <- conditions[row_idx, "Kmin"]
rep <- conditions[row_idx, "rep"]
if (is_given) {
the_givenclassif <- givenclassif
} else {
the_givenclassif <- NULL
}
if (is_Kmin) {
the_Kmin <- list(seq(4, 10), seq(2, 10))
} else {
the_Kmin <- list(seq(10), seq(10))
}
names(the_Kmin) <- c("Tree", "Fungus")
fit <- estimateMultipartiteSBM(
listSBM = list(Tree_Genetic_dist_sbm, FungusTree_sbm),
estimOptions = list(
"initBM" = initBM,
"givenclassif" = the_givenclassif,
"nbBlocksRange" = the_Kmin,
maxVEM = 100000,
maxVE = 100000
)
)
out <- data.frame(
InitBM = initBM,
Kmin = is_Kmin,
GivenClassif = is_given,
rep = rep,
ICL = fit$ICL,
nbBlocksTree = fit$nbBlocks["Tree"], nbBlocksFungus = fit$nbBlocks["Fungus"]
)
})) # , future.seed = TRUE))

View file

@ -0,0 +1,48 @@
#!/usr/bin/env bash
#$ -V
#$ -cwd
#$ -N bmcorrectsamp
#$ -m besa
#$ -q short.q
#$ -t 1-6
#$ -pe thread 64
#$ -M louis.lacoste+migale@agroparistech.fr
#$ -o logs/$JOB_NAME
#$ -e logs/$JOB_NAME
# Creating log directory if it doesn't exists
BASE_DIR="/home/$USER/work/human-microbiome-compendium"
LOG_DIR=$(echo "$BASE_DIR/logs")
if [ ! -d "$LOG_DIR" ]; then
mkdir -p $LOG_DIR
fi
# Finding directory
APPLICATIONS_DIR=$(echo "$BASE_DIR")
echo $APPLICATIONS_DIR
ARGID=$(($SGE_TASK_ID % 3))
case $ARGID in
0)
echo -n "Model will be Chaillou"
MODE="chaillou"
;;
1)
echo -n "Model will be Ravel"
MODE="ravel"
;;
2)
echo -n "Model will be Mach"
MODE="mach"
;;
esac
readonly NB_CORES=$(sed -n 's/^#\$ -pe thread \(.*\)/\1/p' -- "$0")
echo -n "Model will be $MODE with $NB_CORES"
Rscript "${APPLICATIONS_DIR}/blockmodels_correct_sampling_effort.R.R" $MODE &>> logs/$JOB_NAME.$SGE_TASK_ID

View file

@ -91,6 +91,7 @@ otu_table_plot
```
```{r}
#| eval: false
# Below the graph of taxa abundance
# otus <- per_taxa_no_names <- collapse_otu_at_taxo(phylo_data = the_data, renameOTUs = FALSE)
otu_mat <- as(otu_table(the_data), "matrix")
@ -170,4 +171,20 @@ kables_sd <- lapply(per_taxa_network, function(matrix) {
kable(sd_df, row.names = FALSE)
})
kables_sd
```
```{r}
boxplots <- lapply(per_taxa_network, function(matrix) {
df_mat <- stack(as.data.frame(t(matrix)))
ggplot(df_mat, aes(y = values, x = ind, fill = ind)) +
geom_boxplot()
})
boxplots[-7]
```
```{r}
seq_effort <- lapply(per_taxa_network, function(matrix) {
colSums(matrix)
})
seq_effort
```