Compare commits
7 commits
29728d8205
...
d618e1b5d7
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
d618e1b5d7 | ||
|
|
3a1c84443c | ||
|
|
be7453fb9d | ||
|
|
e7f41d56f7 | ||
|
|
97b4a43597 | ||
|
|
6fa32f2157 | ||
|
|
65e305e297 |
6 changed files with 278 additions and 22 deletions
45
blockmodels_correct_sampling_effort.R
Normal file
45
blockmodels_correct_sampling_effort.R
Normal file
|
|
@ -0,0 +1,45 @@
|
|||
library(blockmodels)
|
||||
library(phyloseq)
|
||||
library(biomformat)
|
||||
library(here)
|
||||
source("utils.R")
|
||||
|
||||
options(parallelly.availableCores.custom = function() {
|
||||
ncores <- min(parallelly::availableCores(), 64)
|
||||
max(1L, ncores)
|
||||
})
|
||||
|
||||
message(paste("Number of cores available:", parallelly::availableCores()))
|
||||
args <- commandArgs(trailingOnly = TRUE)
|
||||
if (length(args) == 0) {
|
||||
author <- "mach"
|
||||
} else {
|
||||
author <- args[1]
|
||||
}
|
||||
|
||||
message("For author: ", author)
|
||||
the_data <- import_biom(here("data", author, paste0(author, ".biom")))
|
||||
|
||||
epoch <- as.integer(Sys.time())
|
||||
level <- 2
|
||||
message("At level: ", level, " starting at: ", epoch)
|
||||
|
||||
save_path <- here("results", "seq-effort", paste0(author, "_", level, "_", epoch, ".Rds"))
|
||||
|
||||
per_taxa_networks <- collapse_otu_at_taxo(the_data)
|
||||
|
||||
covar_mat <- t(colSums(per_taxa_networks[[level]]) %*% t(rep(1, nrow(per_taxa_networks[[level]]))))
|
||||
|
||||
lcovar_mat <- log(covar_mat)
|
||||
|
||||
fitted_model <- BM_poisson_covariates("LBM",
|
||||
adj = per_taxa_networks[[level]],
|
||||
covariates = list(lcovar_mat),
|
||||
autosave = save_path,
|
||||
ncores = parallelly::availableCores()
|
||||
)
|
||||
fitted_model$estimate()
|
||||
|
||||
fitted_model$model_parameters[[which.max(fitted_model$ICL)]]
|
||||
message("Saving the model results.")
|
||||
saveRDS(fitted_model, save_path)
|
||||
20
merge-lbm-seq.R
Normal file
20
merge-lbm-seq.R
Normal file
|
|
@ -0,0 +1,20 @@
|
|||
library(stringr)
|
||||
|
||||
args <- commandArgs(trailingOnly = TRUE)
|
||||
|
||||
path <- args[1]
|
||||
mode <- args[2]
|
||||
|
||||
base_folder <- str_remove(string = path, pattern = "/tmp[0-9]*$")
|
||||
print(base_folder)
|
||||
epoch <- str_extract(string = path, pattern = "(?<=tmp)([0-9]*)")
|
||||
|
||||
flist <- list.files(path, pattern = paste0(mode, ".Rds"), full.names = TRUE)
|
||||
|
||||
stopifnot("No files corresponding to mode found." = length(flist) > 0)
|
||||
|
||||
merged_res <- lapply(flist, readRDS)
|
||||
|
||||
names(merged_res) <- paste0("Rank", seq_along(flist) + 1)
|
||||
|
||||
saveRDS(merged_res, file.path(base_folder, paste0(mode, "_", epoch, ".Rds")))
|
||||
|
|
@ -1,6 +1,7 @@
|
|||
# library(GREMLINS) # Load the custom GREMLINS
|
||||
devtools::load_all("../GREMLINS/")
|
||||
library(sbm)
|
||||
# library(sbm)
|
||||
devtools::load_all("../sbm/")
|
||||
|
||||
data(fungusTreeNetwork)
|
||||
tree_genetic_dist_matrix <- fungusTreeNetwork$covar_tree$genetic_dist
|
||||
|
|
@ -26,34 +27,159 @@ v_distrib <- c("gaussian", "bernoulli")
|
|||
namesFG <- c("Tree", "Fungus")
|
||||
givenclassif <- list(tree_geo_sbm$memberships, fungus_tree_sbm$memberships[["row"]])
|
||||
|
||||
geo_multi_noinit <- multipartiteBM(list_Net = list_Net, v_distrib = v_distrib, namesFG = namesFG, givenclassif = NULL, initBM = TRUE, verbose = TRUE)
|
||||
# geo_multi_noinit <- multipartiteBM(list_Net = list_Net, v_distrib = v_distrib, namesFG = namesFG, givenclassif = NULL, initBM = TRUE, verbose = TRUE)
|
||||
|
||||
Tree_Geo_dist_sbm <- defineSBM(-fungusTreeNetwork$covar_tree$geographic_dist, model = "gaussian", type = "simple", dimLabels = c("Tree"))
|
||||
Tree_Taxo_dist_sbm <- defineSBM(-fungusTreeNetwork$covar_tree$taxonomic_dist, model = "gaussian", type = "simple", dimLabels = c("Tree"))
|
||||
Tree_Genetic_dist_sbm <- defineSBM(fungusTreeNetwork$covar_tree$genetic_dist, model = "gaussian", type = "simple", dimLabels = c("Tree"))
|
||||
Tree_Genetic_dist_sbm <- defineSBM(-fungusTreeNetwork$covar_tree$genetic_dist, model = "gaussian", type = "simple", dimLabels = c("Tree"))
|
||||
FungusTree_sbm <- defineSBM(fungusTreeNetwork$fungus_tree, dimLabels = c("Fungus", "Tree"), model = "bernoulli", type = "bipartite")
|
||||
|
||||
geo_multi_noinit_sbm <- estimateMultipartiteSBM(list(Tree_Geo_dist_sbm, FungusTree_sbm), estimOptions = list(initBM = TRUE, ))
|
||||
# geo_multi_noinit_sbm <- estimateMultipartiteSBM(list(Tree_Geo_dist_sbm, FungusTree_sbm), estimOptions = list(initBM = TRUE))
|
||||
|
||||
geo_multi_noinit_nobm <- multipartiteBM(list_Net = list_Net, v_distrib = v_distrib, namesFG = namesFG, givenclassif = NULL, initBM = FALSE, verbose = TRUE)
|
||||
# Les kmins donnent le même résultat ICL -449.21
|
||||
geo_multi_noinit_nobm_kmin <- multipartiteBM(list_Net = list_Net, v_distrib = v_distrib, namesFG = namesFG, givenclassif = NULL, initBM = FALSE, verbose = TRUE, v_Kmin = c(7, 2))
|
||||
geo_multi_noinit_kmin <- multipartiteBM(list_Net = list_Net, v_distrib = v_distrib, namesFG = namesFG, givenclassif = NULL, initBM = TRUE, verbose = TRUE, v_Kmin = c(7, 2))
|
||||
geo_multi_init_kmin <- multipartiteBM(list_Net = list_Net, v_distrib = v_distrib, namesFG = namesFG, givenclassif = givenclassif, v_Kmin = c(7, 2)) # Ne s'autorise pas à descendre
|
||||
##
|
||||
# geo_multi_noinit_nobm <- multipartiteBM(list_Net = list_Net, v_distrib = v_distrib, namesFG = namesFG, givenclassif = NULL, initBM = FALSE, verbose = TRUE)
|
||||
# # Les kmins donnent le même résultat ICL -449.21
|
||||
# geo_multi_noinit_nobm_kmin <- multipartiteBM(list_Net = list_Net, v_distrib = v_distrib, namesFG = namesFG, givenclassif = NULL, initBM = FALSE, verbose = TRUE, v_Kmin = c(7, 2))
|
||||
# geo_multi_noinit_kmin <- multipartiteBM(list_Net = list_Net, v_distrib = v_distrib, namesFG = namesFG, givenclassif = NULL, initBM = TRUE, verbose = TRUE, v_Kmin = c(7, 2))
|
||||
# geo_multi_init_kmin <- multipartiteBM(list_Net = list_Net, v_distrib = v_distrib, namesFG = namesFG, givenclassif = givenclassif, v_Kmin = c(7, 2)) # Ne s'autorise pas à descendre
|
||||
# ##
|
||||
|
||||
# Le meilleur ICL est ici : -408.05 avec une classif de base issue de blockmodels indépendants
|
||||
geo_multi_init <- multipartiteBM(list_Net = list_Net, v_distrib = v_distrib, namesFG = namesFG, givenclassif = givenclassif)
|
||||
# # Le meilleur ICL est ici : -408.05 avec une classif de base issue de blockmodels indépendants
|
||||
# geo_multi_init <- multipartiteBM(list_Net = list_Net, v_distrib = v_distrib, namesFG = namesFG, givenclassif = givenclassif)
|
||||
|
||||
library(knitr)
|
||||
# library(knitr)
|
||||
|
||||
kable(data.frame("Given classif" = c(FALSE, FALSE, FALSE, FALSE, TRUE, TRUE), "InitBM" = c(TRUE, FALSE, FALSE, TRUE, TRUE, TRUE), "Kmin given" = c(FALSE, FALSE, TRUE, TRUE, TRUE, FALSE), ICL = c(
|
||||
geo_multi_noinit$fittedModel[[1]]$ICL,
|
||||
geo_multi_noinit_nobm$fittedModel[[1]]$ICL,
|
||||
geo_multi_noinit_nobm_kmin$fittedModel[[1]]$ICL,
|
||||
geo_multi_noinit_kmin$fittedModel[[1]]$ICL,
|
||||
geo_multi_init_kmin$fittedModel[[1]]$ICL,
|
||||
geo_multi_init$fittedModel[[1]]$ICL
|
||||
)), format = "markdown")
|
||||
# kable(data.frame("Given classif" = c(FALSE, FALSE, FALSE, FALSE, TRUE, TRUE), "InitBM" = c(TRUE, FALSE, FALSE, TRUE, TRUE, TRUE), "Kmin given" = c(FALSE, FALSE, TRUE, TRUE, TRUE, FALSE), ICL = c(
|
||||
# geo_multi_noinit$fittedModel[[1]]$ICL,
|
||||
# geo_multi_noinit_nobm$fittedModel[[1]]$ICL,
|
||||
# geo_multi_noinit_nobm_kmin$fittedModel[[1]]$ICL,
|
||||
# geo_multi_noinit_kmin$fittedModel[[1]]$ICL,
|
||||
# geo_multi_init_kmin$fittedModel[[1]]$ICL,
|
||||
# geo_multi_init$fittedModel[[1]]$ICL
|
||||
# )), format = "markdown")
|
||||
|
||||
dataR6 <- formattingData(list_Net, v_distrib)
|
||||
conditions <- expand.grid(given = c(TRUE, FALSE), initBM = c(TRUE, FALSE), Kmin = c(TRUE, FALSE), rep = seq(3))
|
||||
|
||||
library(future.apply)
|
||||
library(future.callr)
|
||||
|
||||
plan("multisession")
|
||||
|
||||
results_geo_df <- do.call("rbind", future_lapply(seq_len(nrow(conditions)), function(row_idx) {
|
||||
initBM <- conditions[row_idx, "initBM"]
|
||||
is_given <- conditions[row_idx, "given"]
|
||||
is_Kmin <- conditions[row_idx, "Kmin"]
|
||||
rep <- conditions[row_idx, "rep"]
|
||||
|
||||
if (is_given) {
|
||||
the_givenclassif <- givenclassif
|
||||
} else {
|
||||
the_givenclassif <- NULL
|
||||
}
|
||||
|
||||
if (is_Kmin) {
|
||||
the_Kmin <- list(seq(7, 10), seq(2, 10))
|
||||
} else {
|
||||
the_Kmin <- list(seq(10), seq(10))
|
||||
}
|
||||
|
||||
names(the_Kmin) <- c("Tree", "Fungus")
|
||||
|
||||
fit <- estimateMultipartiteSBM(
|
||||
listSBM = list(Tree_Geo_dist_sbm, FungusTree_sbm),
|
||||
estimOptions = list(
|
||||
"initBM" = initBM,
|
||||
"givenclassif" = the_givenclassif,
|
||||
"nbBlocksRange" = the_Kmin,
|
||||
maxVEM = 1000,
|
||||
maxVE = 1000
|
||||
)
|
||||
)
|
||||
out <- data.frame(
|
||||
InitBM = initBM,
|
||||
Kmin = is_Kmin,
|
||||
GivenClassif = is_given,
|
||||
rep = rep,
|
||||
ICL = fit$ICL,
|
||||
nbBlocksTree = fit$nbBlocks["Tree"], nbBlocksFungus = fit$nbBlocks["Fungus"]
|
||||
)
|
||||
}, future.seed = TRUE))
|
||||
# future_
|
||||
results_taxo_df <- do.call("rbind", lapply(seq_len(nrow(conditions)), function(row_idx) {
|
||||
initBM <- conditions[row_idx, "initBM"]
|
||||
is_given <- conditions[row_idx, "given"]
|
||||
is_Kmin <- conditions[row_idx, "Kmin"]
|
||||
rep <- conditions[row_idx, "rep"]
|
||||
|
||||
if (is_given) {
|
||||
the_givenclassif <- givenclassif
|
||||
} else {
|
||||
the_givenclassif <- NULL
|
||||
}
|
||||
|
||||
if (is_Kmin) {
|
||||
the_Kmin <- list(seq(1, 10), seq(1, 10))
|
||||
} else {
|
||||
the_Kmin <- list(seq(10), seq(10))
|
||||
}
|
||||
|
||||
names(the_Kmin) <- c("Tree", "Fungus")
|
||||
|
||||
fit <- estimateMultipartiteSBM(
|
||||
listSBM = list(Tree_Taxo_dist_sbm, FungusTree_sbm),
|
||||
estimOptions = list(
|
||||
"initBM" = initBM,
|
||||
"givenclassif" = the_givenclassif,
|
||||
"nbBlocksRange" = the_Kmin,
|
||||
maxVEM = 100000,
|
||||
maxVE = 100000
|
||||
)
|
||||
)
|
||||
out <- data.frame(
|
||||
InitBM = initBM,
|
||||
Kmin = is_Kmin,
|
||||
GivenClassif = is_given,
|
||||
rep = rep,
|
||||
ICL = fit$ICL,
|
||||
nbBlocksTree = fit$nbBlocks["Tree"], nbBlocksFungus = fit$nbBlocks["Fungus"]
|
||||
)
|
||||
})) # , future.seed = TRUE))
|
||||
|
||||
results_genetic_df <- do.call("rbind", lapply(seq_len(nrow(conditions)), function(row_idx) {
|
||||
initBM <- conditions[row_idx, "initBM"]
|
||||
is_given <- conditions[row_idx, "given"]
|
||||
is_Kmin <- conditions[row_idx, "Kmin"]
|
||||
rep <- conditions[row_idx, "rep"]
|
||||
|
||||
if (is_given) {
|
||||
the_givenclassif <- givenclassif
|
||||
} else {
|
||||
the_givenclassif <- NULL
|
||||
}
|
||||
|
||||
if (is_Kmin) {
|
||||
the_Kmin <- list(seq(4, 10), seq(2, 10))
|
||||
} else {
|
||||
the_Kmin <- list(seq(10), seq(10))
|
||||
}
|
||||
|
||||
names(the_Kmin) <- c("Tree", "Fungus")
|
||||
|
||||
fit <- estimateMultipartiteSBM(
|
||||
listSBM = list(Tree_Genetic_dist_sbm, FungusTree_sbm),
|
||||
estimOptions = list(
|
||||
"initBM" = initBM,
|
||||
"givenclassif" = the_givenclassif,
|
||||
"nbBlocksRange" = the_Kmin,
|
||||
maxVEM = 100000,
|
||||
maxVE = 100000
|
||||
)
|
||||
)
|
||||
out <- data.frame(
|
||||
InitBM = initBM,
|
||||
Kmin = is_Kmin,
|
||||
GivenClassif = is_given,
|
||||
rep = rep,
|
||||
ICL = fit$ICL,
|
||||
nbBlocksTree = fit$nbBlocks["Tree"], nbBlocksFungus = fit$nbBlocks["Fungus"]
|
||||
)
|
||||
})) # , future.seed = TRUE))
|
||||
|
|
|
|||
48
sge_scripts/bm_correct_sampling.sh
Executable file
48
sge_scripts/bm_correct_sampling.sh
Executable file
|
|
@ -0,0 +1,48 @@
|
|||
#!/usr/bin/env bash
|
||||
#$ -V
|
||||
#$ -cwd
|
||||
#$ -N bmcorrectsamp
|
||||
#$ -m besa
|
||||
#$ -q short.q
|
||||
#$ -t 1-6
|
||||
#$ -pe thread 64
|
||||
#$ -M louis.lacoste+migale@agroparistech.fr
|
||||
#$ -o logs/$JOB_NAME
|
||||
#$ -e logs/$JOB_NAME
|
||||
|
||||
# Creating log directory if it doesn't exists
|
||||
BASE_DIR="/home/$USER/work/human-microbiome-compendium"
|
||||
LOG_DIR=$(echo "$BASE_DIR/logs")
|
||||
|
||||
if [ ! -d "$LOG_DIR" ]; then
|
||||
mkdir -p $LOG_DIR
|
||||
fi
|
||||
|
||||
# Finding directory
|
||||
APPLICATIONS_DIR=$(echo "$BASE_DIR")
|
||||
|
||||
echo $APPLICATIONS_DIR
|
||||
|
||||
ARGID=$(($SGE_TASK_ID % 3))
|
||||
|
||||
case $ARGID in
|
||||
0)
|
||||
echo -n "Model will be Chaillou"
|
||||
MODE="chaillou"
|
||||
;;
|
||||
1)
|
||||
echo -n "Model will be Ravel"
|
||||
MODE="ravel"
|
||||
;;
|
||||
2)
|
||||
echo -n "Model will be Mach"
|
||||
MODE="mach"
|
||||
;;
|
||||
esac
|
||||
|
||||
readonly NB_CORES=$(sed -n 's/^#\$ -pe thread \(.*\)/\1/p' -- "$0")
|
||||
|
||||
|
||||
echo -n "Model will be $MODE with $NB_CORES"
|
||||
|
||||
Rscript "${APPLICATIONS_DIR}/blockmodels_correct_sampling_effort.R.R" $MODE &>> logs/$JOB_NAME.$SGE_TASK_ID
|
||||
|
|
@ -91,6 +91,7 @@ otu_table_plot
|
|||
```
|
||||
|
||||
```{r}
|
||||
#| eval: false
|
||||
# Below the graph of taxa abundance
|
||||
# otus <- per_taxa_no_names <- collapse_otu_at_taxo(phylo_data = the_data, renameOTUs = FALSE)
|
||||
otu_mat <- as(otu_table(the_data), "matrix")
|
||||
|
|
@ -170,4 +171,20 @@ kables_sd <- lapply(per_taxa_network, function(matrix) {
|
|||
kable(sd_df, row.names = FALSE)
|
||||
})
|
||||
kables_sd
|
||||
```
|
||||
|
||||
```{r}
|
||||
boxplots <- lapply(per_taxa_network, function(matrix) {
|
||||
df_mat <- stack(as.data.frame(t(matrix)))
|
||||
ggplot(df_mat, aes(y = values, x = ind, fill = ind)) +
|
||||
geom_boxplot()
|
||||
})
|
||||
boxplots[-7]
|
||||
```
|
||||
|
||||
```{r}
|
||||
seq_effort <- lapply(per_taxa_network, function(matrix) {
|
||||
colSums(matrix)
|
||||
})
|
||||
seq_effort
|
||||
```
|
||||
Loading…
Add table
Reference in a new issue