Adding script to run benchmarks for LBM Seq

This commit is contained in:
Louis 2026-01-23 17:03:10 +01:00
parent df319af88d
commit e236b687ca
2 changed files with 196 additions and 0 deletions

153
benchmark_lbm_seq.R Normal file
View file

@ -0,0 +1,153 @@
source("utils.R")
source("utils-bm-seq.R")
library(biomformat)
library(phyloseq)
library(R.utils)
library(stringr)
library(sbm)
library(blockmodels)
library(here)
library(microbenchmark)
data_folder <- here("results", "lbm-seq")
the_data <- import_biom("data/mach/kinetic.biom")
epoch <- as.integer(Sys.time())
tmp_folder <- here(data_folder, paste0("tmp", epoch))
if (!dir.exists(tmp_folder)) {
dir.create(tmp_folder, recursive = TRUE)
}
per_taxa_networks <- collapse_otu_at_taxo(the_data)
mode <- commandArgs(trailingOnly = TRUE)
switch(mode,
"seq" = {
message("Will use SEQ")
r2_mbm_seq <- microbenchmark("Rank2seq" = {
r2_model_seq <- BM_poisson(
membership_type = "LBM",
adj = per_taxa_networks[[2]], # Account for the root
verbosity = 6,
plotting = "",
ncores = 1L
)
r2_model_seq$estimate()
}, times = 3L)
r3_mbm_seq <- microbenchmark("Rank3seq" = {
r3_model_seq <- bm_propagate_taus_all_models(phyloseq_data = the_data, rank_id_start = 2, target_rank_id = 3, per_taxa_networks = per_taxa_networks, first_model = r2_model_seq, ncores = 1)
}, times = 3L)
r4_mbm_seq <- microbenchmark("Rank4seq" = {
r4_model_seq <- bm_propagate_taus_all_models(phyloseq_data = the_data, rank_id_start = 3, per_taxa_networks = per_taxa_networks, first_model = r3_model_seq, ncores = 1)
}, times = 3L)
r5_mbm_seq <- microbenchmark("Rank5seq" = {
r5_model_seq <- bm_propagate_taus_all_models(phyloseq_data = the_data, rank_id_start = 4, per_taxa_networks = per_taxa_networks, first_model = r4_model_seq, ncores = 1)
}, times = 3L)
mbm_seq <- rbind(r2_mbm_seq, r3_mbm_seq, r4_mbm_seq, r5_mbm_seq)
out_seq <- list(
benchmark = mbm_seq,
models = list(
Rank2 = r2_model_seq,
Rank3 = r3_model_seq,
Rank4 = r4_model_seq,
Rank5 = r5_model_seq
)
)
saveRDS(out_seq, here(tmp_folder, "seq.Rds"))
},
"para" = {
message("Will use PARA")
r2_mbm_para <- microbenchmark("Rank2para" = {
r2_model_para <- BM_poisson(
membership_type = "LBM",
adj = per_taxa_networks[[2]], # Account for the root
verbosity = 6,
plotting = "",
ncores = parallelly::availableCores()
)
r2_model_para$estimate()
}, times = 3L)
r3_mbm_para <- microbenchmark("Rank3para" = {
r3_model_para <- bm_propagate_taus_all_models(phyloseq_data = the_data, rank_id_start = 2, target_rank_id = 3, per_taxa_networks = per_taxa_networks, first_model = r2_model_para, ncores = parallelly::availableCores())
}, times = 3L)
r4_mbm_para <- microbenchmark("Rank4para" = {
r4_model_para <- bm_propagate_taus_all_models(phyloseq_data = the_data, rank_id_start = 3, per_taxa_networks = per_taxa_networks, first_model = r3_model_para, ncores = parallelly::availableCores())
}, times = 3L)
r5_mbm_para <- microbenchmark("Rank5para" = {
r5_model_para <- bm_propagate_taus_all_models(phyloseq_data = the_data, rank_id_start = 4, per_taxa_networks = per_taxa_networks, first_model = r4_model_para, ncores = parallelly::availableCores())
}, times = 3L)
mbm_para <- rbind(r2_mbm_para, r3_mbm_para, r4_mbm_para, r5_mbm_para)
out_para <- list(
benchmark = mbm_para,
models = list(
Rank2 = r2_model_para,
Rank3 = r3_model_para,
Rank4 = r4_model_para,
Rank5 = r5_model_para
)
)
saveRDS(out_para, here(tmp_folder, "para.Rds"))
},
"notrans" = { # No transfer
message("Will use NO TRANSFER")
r2_mbm_notrans <- microbenchmark("Rank2notrans" = {
r2_model_notrans <- BM_poisson(
membership_type = "LBM",
adj = per_taxa_networks[[2]], # Account for the root
verbosity = 6,
plotting = "",
ncores = parallelly::availableCores()
)
r2_model_notrans$estimate()
}, times = 3L)
r3_mbm_notrans <- microbenchmark("Rank3notrans" = {
r3_model_notrans <- BM_poisson(
membership_type = "LBM",
adj = per_taxa_networks[[3]], # Account for the root
verbosity = 6,
plotting = "",
ncores = parallelly::availableCores()
)
r3_model_notrans$estimate()
}, times = 3L)
r4_mbm_notrans <- microbenchmark("Rank4notrans" = {
r4_model_notrans <- BM_poisson(
membership_type = "LBM",
adj = per_taxa_networks[[4]], # Account for the root
verbosity = 6,
plotting = "",
ncores = parallelly::availableCores()
)
r4_model_notrans$estimate()
}, times = 3L)
r5_mbm_notrans <- microbenchmark("Rank5notrans" = {
r5_model_notrans <- BM_poisson(
membership_type = "LBM",
adj = per_taxa_networks[[5]], # Account for the root
verbosity = 6,
plotting = "",
ncores = parallelly::availableCores()
)
r5_model_notrans$estimate()
}, times = 3L)
mbm_notrans <- rbind(r2_mbm_notrans, r3_mbm_notrans, r4_mbm_notrans, r5_mbm_notrans)
out_notrans <- list(
benchmark = mbm_notrans,
models = list(
Rank2 = r2_model_notrans,
Rank3 = r3_model_notrans,
Rank4 = r4_model_notrans,
Rank5 = r5_model_notrans
)
)
saveRDS(out_notrans, here(tmp_folder, "notrans.Rds"))
},
stop("Nothing selected, exiting")
)

View file

@ -0,0 +1,43 @@
#!/usr/bin/env bash
#$ -V
#$ -cwd
#$ -N increasing_size
#$ -m besa
#$ -q short.q
#$ -t 1-12
#$ -pe thread 64
#$ -M louis.lacoste+migale@agroparistech.fr
#$ -o logs/$JOB_NAME
#$ -e logs/$JOB_NAME
# Creating log directory if it doesn't exists
BASE_DIR="/home/$USER/work/human-microbiome-compendium"
LOG_DIR=$(echo "$BASE_DIR/logs")
if [ ! -d "$LOG_DIR" ]; then
mkdir -p $LOG_DIR
fi
# Finding directory
APPLICATIONS_DIR=$(echo "$BASE_DIR")
echo $APPLICATIONS_DIR
ARGID=$(($SGE_TASK_ID % 3))
case $ARGID in
0)
echo -n "Model will be NOTRANS"
MODE="notrans"
;;
1)
echo -n "Model will be SEQ"
MODE="seq"
;;
2)
echo -n "Model will be PARA"
MODE="para"
;;
esac
Rscript "${APPLICATIONS_DIR}/benchmark_lbm_seq.R" $MODE &>> logs/$JOB_NAME.$SGE_TASK_ID