Adding script to run benchmarks for LBM Seq

2026-01-23 17:03:10 +01:00 · 2026-01-23 17:03:10 +01:00 · e236b687ca
commit e236b687ca
parent df319af88d
2 changed files with 196 additions and 0 deletions
--- a/benchmark_lbm_seq.R
+++ b/benchmark_lbm_seq.R
@ -0,0 +1,153 @@
 source("utils.R")
 source("utils-bm-seq.R")
 library(biomformat)
 library(phyloseq)
 library(R.utils)
 library(stringr)
 library(sbm)
 library(blockmodels)
 library(here)
 library(microbenchmark)
 data_folder <- here("results", "lbm-seq")
 the_data <- import_biom("data/mach/kinetic.biom")
 epoch <- as.integer(Sys.time())
 tmp_folder <- here(data_folder, paste0("tmp", epoch))
 if (!dir.exists(tmp_folder)) {
    dir.create(tmp_folder, recursive = TRUE)
 }
 per_taxa_networks <- collapse_otu_at_taxo(the_data)
 mode <- commandArgs(trailingOnly = TRUE)
 switch(mode,
    "seq" = {
        message("Will use SEQ")
        r2_mbm_seq <- microbenchmark("Rank2seq" = {
            r2_model_seq <- BM_poisson(
                membership_type = "LBM",
                adj = per_taxa_networks[[2]], # Account for the root
                verbosity = 6,
                plotting = "",
                ncores = 1L
            )
            r2_model_seq$estimate()
        }, times = 3L)
        r3_mbm_seq <- microbenchmark("Rank3seq" = {
            r3_model_seq <- bm_propagate_taus_all_models(phyloseq_data = the_data, rank_id_start = 2, target_rank_id = 3, per_taxa_networks = per_taxa_networks, first_model = r2_model_seq, ncores = 1)
        }, times = 3L)
        r4_mbm_seq <- microbenchmark("Rank4seq" = {
            r4_model_seq <- bm_propagate_taus_all_models(phyloseq_data = the_data, rank_id_start = 3, per_taxa_networks = per_taxa_networks, first_model = r3_model_seq, ncores = 1)
        }, times = 3L)
        r5_mbm_seq <- microbenchmark("Rank5seq" = {
            r5_model_seq <- bm_propagate_taus_all_models(phyloseq_data = the_data, rank_id_start = 4, per_taxa_networks = per_taxa_networks, first_model = r4_model_seq, ncores = 1)
        }, times = 3L)
        mbm_seq <- rbind(r2_mbm_seq, r3_mbm_seq, r4_mbm_seq, r5_mbm_seq)
        out_seq <- list(
            benchmark = mbm_seq,
            models = list(
                Rank2 = r2_model_seq,
                Rank3 = r3_model_seq,
                Rank4 = r4_model_seq,
                Rank5 = r5_model_seq
            )
        )
        saveRDS(out_seq, here(tmp_folder, "seq.Rds"))
    },
    "para" = {
        message("Will use PARA")
        r2_mbm_para <- microbenchmark("Rank2para" = {
            r2_model_para <- BM_poisson(
                membership_type = "LBM",
                adj = per_taxa_networks[[2]], # Account for the root
                verbosity = 6,
                plotting = "",
                ncores = parallelly::availableCores()
            )
            r2_model_para$estimate()
        }, times = 3L)
        r3_mbm_para <- microbenchmark("Rank3para" = {
            r3_model_para <- bm_propagate_taus_all_models(phyloseq_data = the_data, rank_id_start = 2, target_rank_id = 3, per_taxa_networks = per_taxa_networks, first_model = r2_model_para, ncores = parallelly::availableCores())
        }, times = 3L)
        r4_mbm_para <- microbenchmark("Rank4para" = {
            r4_model_para <- bm_propagate_taus_all_models(phyloseq_data = the_data, rank_id_start = 3, per_taxa_networks = per_taxa_networks, first_model = r3_model_para, ncores = parallelly::availableCores())
        }, times = 3L)
        r5_mbm_para <- microbenchmark("Rank5para" = {
            r5_model_para <- bm_propagate_taus_all_models(phyloseq_data = the_data, rank_id_start = 4, per_taxa_networks = per_taxa_networks, first_model = r4_model_para, ncores = parallelly::availableCores())
        }, times = 3L)
        mbm_para <- rbind(r2_mbm_para, r3_mbm_para, r4_mbm_para, r5_mbm_para)
        out_para <- list(
            benchmark = mbm_para,
            models = list(
                Rank2 = r2_model_para,
                Rank3 = r3_model_para,
                Rank4 = r4_model_para,
                Rank5 = r5_model_para
            )
        )
        saveRDS(out_para, here(tmp_folder, "para.Rds"))
    },
    "notrans" = { # No transfer
        message("Will use NO TRANSFER")
        r2_mbm_notrans <- microbenchmark("Rank2notrans" = {
            r2_model_notrans <- BM_poisson(
                membership_type = "LBM",
                adj = per_taxa_networks[[2]], # Account for the root
                verbosity = 6,
                plotting = "",
                ncores = parallelly::availableCores()
            )
            r2_model_notrans$estimate()
        }, times = 3L)
        r3_mbm_notrans <- microbenchmark("Rank3notrans" = {
            r3_model_notrans <- BM_poisson(
                membership_type = "LBM",
                adj = per_taxa_networks[[3]], # Account for the root
                verbosity = 6,
                plotting = "",
                ncores = parallelly::availableCores()
            )
            r3_model_notrans$estimate()
        }, times = 3L)
        r4_mbm_notrans <- microbenchmark("Rank4notrans" = {
            r4_model_notrans <- BM_poisson(
                membership_type = "LBM",
                adj = per_taxa_networks[[4]], # Account for the root
                verbosity = 6,
                plotting = "",
                ncores = parallelly::availableCores()
            )
            r4_model_notrans$estimate()
        }, times = 3L)
        r5_mbm_notrans <- microbenchmark("Rank5notrans" = {
            r5_model_notrans <- BM_poisson(
                membership_type = "LBM",
                adj = per_taxa_networks[[5]], # Account for the root
                verbosity = 6,
                plotting = "",
                ncores = parallelly::availableCores()
            )
            r5_model_notrans$estimate()
        }, times = 3L)
        mbm_notrans <- rbind(r2_mbm_notrans, r3_mbm_notrans, r4_mbm_notrans, r5_mbm_notrans)
        out_notrans <- list(
            benchmark = mbm_notrans,
            models = list(
                Rank2 = r2_model_notrans,
                Rank3 = r3_model_notrans,
                Rank4 = r4_model_notrans,
                Rank5 = r5_model_notrans
            )
        )
        saveRDS(out_notrans, here(tmp_folder, "notrans.Rds"))
    },
    stop("Nothing selected, exiting")
 )
--- a/sge_scripts/benchmark_lbm_seq.sh
+++ b/sge_scripts/benchmark_lbm_seq.sh
@ -0,0 +1,43 @@
 #!/usr/bin/env bash
 #$ -V
 #$ -cwd
 #$ -N increasing_size
 #$ -m besa
 #$ -q short.q
 #$ -t 1-12
 #$ -pe thread 64
 #$ -M louis.lacoste+migale@agroparistech.fr
 #$ -o logs/$JOB_NAME
 #$ -e logs/$JOB_NAME
 # Creating log directory if it doesn't exists
 BASE_DIR="/home/$USER/work/human-microbiome-compendium"
 LOG_DIR=$(echo "$BASE_DIR/logs")
 if [ ! -d "$LOG_DIR" ]; then
    mkdir -p $LOG_DIR
 fi
 # Finding directory
 APPLICATIONS_DIR=$(echo "$BASE_DIR")
 echo $APPLICATIONS_DIR
 ARGID=$(($SGE_TASK_ID % 3))
 case $ARGID in
    0)
    echo -n "Model will be NOTRANS"
    MODE="notrans"
    ;;
    1)
    echo -n "Model will be SEQ"
    MODE="seq"
    ;;
    2)
    echo -n "Model will be PARA"
    MODE="para"
    ;;
 esac
 Rscript "${APPLICATIONS_DIR}/benchmark_lbm_seq.R" $MODE &>> logs/$JOB_NAME.$SGE_TASK_ID