From e236b687caeb535d4a9bc1976e29c90909ada85f Mon Sep 17 00:00:00 2001
From: Louis <louis.lacoste@agroparistech.fr>
Date: Fri, 23 Jan 2026 17:03:10 +0100
Subject: [PATCH] Adding script to run benchmarks for LBM Seq

---
 benchmark_lbm_seq.R              | 153 +++++++++++++++++++++++++++++++
 sge_scripts/benchmark_lbm_seq.sh |  43 +++++++++
 2 files changed, 196 insertions(+)
 create mode 100644 benchmark_lbm_seq.R
 create mode 100644 sge_scripts/benchmark_lbm_seq.sh

diff --git a/benchmark_lbm_seq.R b/benchmark_lbm_seq.R
new file mode 100644
index 0000000..40e763d
--- /dev/null
+++ b/benchmark_lbm_seq.R
@@ -0,0 +1,153 @@
+source("utils.R")
+source("utils-bm-seq.R")
+library(biomformat)
+library(phyloseq)
+library(R.utils)
+library(stringr)
+library(sbm)
+library(blockmodels)
+library(here)
+library(microbenchmark)
+
+data_folder <- here("results", "lbm-seq")
+
+the_data <- import_biom("data/mach/kinetic.biom")
+epoch <- as.integer(Sys.time())
+
+tmp_folder <- here(data_folder, paste0("tmp", epoch))
+
+if (!dir.exists(tmp_folder)) {
+    dir.create(tmp_folder, recursive = TRUE)
+}
+
+per_taxa_networks <- collapse_otu_at_taxo(the_data)
+
+mode <- commandArgs(trailingOnly = TRUE)
+switch(mode,
+    "seq" = {
+        message("Will use SEQ")
+        r2_mbm_seq <- microbenchmark("Rank2seq" = {
+            r2_model_seq <- BM_poisson(
+                membership_type = "LBM",
+                adj = per_taxa_networks[[2]], # Account for the root
+                verbosity = 6,
+                plotting = "",
+                ncores = 1L
+            )
+            r2_model_seq$estimate()
+        }, times = 3L)
+        r3_mbm_seq <- microbenchmark("Rank3seq" = {
+            r3_model_seq <- bm_propagate_taus_all_models(phyloseq_data = the_data, rank_id_start = 2, target_rank_id = 3, per_taxa_networks = per_taxa_networks, first_model = r2_model_seq, ncores = 1)
+        }, times = 3L)
+        r4_mbm_seq <- microbenchmark("Rank4seq" = {
+            r4_model_seq <- bm_propagate_taus_all_models(phyloseq_data = the_data, rank_id_start = 3, per_taxa_networks = per_taxa_networks, first_model = r3_model_seq, ncores = 1)
+        }, times = 3L)
+        r5_mbm_seq <- microbenchmark("Rank5seq" = {
+            r5_model_seq <- bm_propagate_taus_all_models(phyloseq_data = the_data, rank_id_start = 4, per_taxa_networks = per_taxa_networks, first_model = r4_model_seq, ncores = 1)
+        }, times = 3L)
+        mbm_seq <- rbind(r2_mbm_seq, r3_mbm_seq, r4_mbm_seq, r5_mbm_seq)
+
+        out_seq <- list(
+            benchmark = mbm_seq,
+            models = list(
+                Rank2 = r2_model_seq,
+                Rank3 = r3_model_seq,
+                Rank4 = r4_model_seq,
+                Rank5 = r5_model_seq
+            )
+        )
+
+        saveRDS(out_seq, here(tmp_folder, "seq.Rds"))
+    },
+    "para" = {
+        message("Will use PARA")
+
+        r2_mbm_para <- microbenchmark("Rank2para" = {
+            r2_model_para <- BM_poisson(
+                membership_type = "LBM",
+                adj = per_taxa_networks[[2]], # Account for the root
+                verbosity = 6,
+                plotting = "",
+                ncores = parallelly::availableCores()
+            )
+            r2_model_para$estimate()
+        }, times = 3L)
+        r3_mbm_para <- microbenchmark("Rank3para" = {
+            r3_model_para <- bm_propagate_taus_all_models(phyloseq_data = the_data, rank_id_start = 2, target_rank_id = 3, per_taxa_networks = per_taxa_networks, first_model = r2_model_para, ncores = parallelly::availableCores())
+        }, times = 3L)
+        r4_mbm_para <- microbenchmark("Rank4para" = {
+            r4_model_para <- bm_propagate_taus_all_models(phyloseq_data = the_data, rank_id_start = 3, per_taxa_networks = per_taxa_networks, first_model = r3_model_para, ncores = parallelly::availableCores())
+        }, times = 3L)
+        r5_mbm_para <- microbenchmark("Rank5para" = {
+            r5_model_para <- bm_propagate_taus_all_models(phyloseq_data = the_data, rank_id_start = 4, per_taxa_networks = per_taxa_networks, first_model = r4_model_para, ncores = parallelly::availableCores())
+        }, times = 3L)
+        mbm_para <- rbind(r2_mbm_para, r3_mbm_para, r4_mbm_para, r5_mbm_para)
+
+        out_para <- list(
+            benchmark = mbm_para,
+            models = list(
+                Rank2 = r2_model_para,
+                Rank3 = r3_model_para,
+                Rank4 = r4_model_para,
+                Rank5 = r5_model_para
+            )
+        )
+        saveRDS(out_para, here(tmp_folder, "para.Rds"))
+    },
+    "notrans" = { # No transfer
+        message("Will use NO TRANSFER")
+        r2_mbm_notrans <- microbenchmark("Rank2notrans" = {
+            r2_model_notrans <- BM_poisson(
+                membership_type = "LBM",
+                adj = per_taxa_networks[[2]], # Account for the root
+                verbosity = 6,
+                plotting = "",
+                ncores = parallelly::availableCores()
+            )
+            r2_model_notrans$estimate()
+        }, times = 3L)
+        r3_mbm_notrans <- microbenchmark("Rank3notrans" = {
+            r3_model_notrans <- BM_poisson(
+                membership_type = "LBM",
+                adj = per_taxa_networks[[3]], # Account for the root
+                verbosity = 6,
+                plotting = "",
+                ncores = parallelly::availableCores()
+            )
+            r3_model_notrans$estimate()
+        }, times = 3L)
+        r4_mbm_notrans <- microbenchmark("Rank4notrans" = {
+            r4_model_notrans <- BM_poisson(
+                membership_type = "LBM",
+                adj = per_taxa_networks[[4]], # Account for the root
+                verbosity = 6,
+                plotting = "",
+                ncores = parallelly::availableCores()
+            )
+            r4_model_notrans$estimate()
+        }, times = 3L)
+        r5_mbm_notrans <- microbenchmark("Rank5notrans" = {
+            r5_model_notrans <- BM_poisson(
+                membership_type = "LBM",
+                adj = per_taxa_networks[[5]], # Account for the root
+                verbosity = 6,
+                plotting = "",
+                ncores = parallelly::availableCores()
+            )
+            r5_model_notrans$estimate()
+        }, times = 3L)
+        mbm_notrans <- rbind(r2_mbm_notrans, r3_mbm_notrans, r4_mbm_notrans, r5_mbm_notrans)
+
+        out_notrans <- list(
+            benchmark = mbm_notrans,
+            models = list(
+                Rank2 = r2_model_notrans,
+                Rank3 = r3_model_notrans,
+                Rank4 = r4_model_notrans,
+                Rank5 = r5_model_notrans
+            )
+        )
+        saveRDS(out_notrans, here(tmp_folder, "notrans.Rds"))
+    },
+    stop("Nothing selected, exiting")
+)
diff --git a/sge_scripts/benchmark_lbm_seq.sh b/sge_scripts/benchmark_lbm_seq.sh
new file mode 100644
index 0000000..78845d2
--- /dev/null
+++ b/sge_scripts/benchmark_lbm_seq.sh
@@ -0,0 +1,43 @@
+#!/usr/bin/env bash
+#$ -V
+#$ -cwd
+#$ -N increasing_size
+#$ -m besa
+#$ -q short.q
+#$ -t 1-12
+#$ -pe thread 64
+#$ -M louis.lacoste+migale@agroparistech.fr
+#$ -o logs/$JOB_NAME
+#$ -e logs/$JOB_NAME
+
+# Creating log directory if it doesn't exists
+BASE_DIR="/home/$USER/work/human-microbiome-compendium"
+LOG_DIR=$(echo "$BASE_DIR/logs")
+
+if [ ! -d "$LOG_DIR" ]; then
+    mkdir -p $LOG_DIR
+fi
+
+# Finding directory
+APPLICATIONS_DIR=$(echo "$BASE_DIR")
+
+echo $APPLICATIONS_DIR
+
+ARGID=$(($SGE_TASK_ID % 3))
+
+case $ARGID in
+    0)
+    echo -n "Model will be NOTRANS"
+    MODE="notrans"
+    ;;
+    1)
+    echo -n "Model will be SEQ"
+    MODE="seq"
+    ;;
+    2)
+    echo -n "Model will be PARA"
+    MODE="para"
+    ;;
+esac
+
+Rscript "${APPLICATIONS_DIR}/benchmark_lbm_seq.R" $MODE &>> logs/$JOB_NAME.$SGE_TASK_ID
\ No newline at end of file