From e236b687caeb535d4a9bc1976e29c90909ada85f Mon Sep 17 00:00:00 2001 From: Louis Date: Fri, 23 Jan 2026 17:03:10 +0100 Subject: [PATCH] Adding script to run benchmarks for LBM Seq --- benchmark_lbm_seq.R | 153 +++++++++++++++++++++++++++++++ sge_scripts/benchmark_lbm_seq.sh | 43 +++++++++ 2 files changed, 196 insertions(+) create mode 100644 benchmark_lbm_seq.R create mode 100644 sge_scripts/benchmark_lbm_seq.sh diff --git a/benchmark_lbm_seq.R b/benchmark_lbm_seq.R new file mode 100644 index 0000000..40e763d --- /dev/null +++ b/benchmark_lbm_seq.R @@ -0,0 +1,153 @@ +source("utils.R") +source("utils-bm-seq.R") +library(biomformat) +library(phyloseq) +library(R.utils) +library(stringr) +library(sbm) +library(blockmodels) +library(here) +library(microbenchmark) + +data_folder <- here("results", "lbm-seq") + +the_data <- import_biom("data/mach/kinetic.biom") +epoch <- as.integer(Sys.time()) + +tmp_folder <- here(data_folder, paste0("tmp", epoch)) + +if (!dir.exists(tmp_folder)) { + dir.create(tmp_folder, recursive = TRUE) +} + +per_taxa_networks <- collapse_otu_at_taxo(the_data) + +mode <- commandArgs(trailingOnly = TRUE) +switch(mode, + "seq" = { + message("Will use SEQ") + r2_mbm_seq <- microbenchmark("Rank2seq" = { + r2_model_seq <- BM_poisson( + membership_type = "LBM", + adj = per_taxa_networks[[2]], # Account for the root + verbosity = 6, + plotting = "", + ncores = 1L + ) + r2_model_seq$estimate() + }, times = 3L) + r3_mbm_seq <- microbenchmark("Rank3seq" = { + r3_model_seq <- bm_propagate_taus_all_models(phyloseq_data = the_data, rank_id_start = 2, target_rank_id = 3, per_taxa_networks = per_taxa_networks, first_model = r2_model_seq, ncores = 1) + }, times = 3L) + r4_mbm_seq <- microbenchmark("Rank4seq" = { + r4_model_seq <- bm_propagate_taus_all_models(phyloseq_data = the_data, rank_id_start = 3, per_taxa_networks = per_taxa_networks, first_model = r3_model_seq, ncores = 1) + }, times = 3L) + r5_mbm_seq <- microbenchmark("Rank5seq" = { + r5_model_seq <- bm_propagate_taus_all_models(phyloseq_data = the_data, rank_id_start = 4, per_taxa_networks = per_taxa_networks, first_model = r4_model_seq, ncores = 1) + }, times = 3L) + mbm_seq <- rbind(r2_mbm_seq, r3_mbm_seq, r4_mbm_seq, r5_mbm_seq) + + out_seq <- list( + benchmark = mbm_seq, + models = list( + Rank2 = r2_model_seq, + Rank3 = r3_model_seq, + Rank4 = r4_model_seq, + Rank5 = r5_model_seq + ) + ) + + saveRDS(out_seq, here(tmp_folder, "seq.Rds")) + }, + "para" = { + message("Will use PARA") + + r2_mbm_para <- microbenchmark("Rank2para" = { + r2_model_para <- BM_poisson( + membership_type = "LBM", + adj = per_taxa_networks[[2]], # Account for the root + verbosity = 6, + plotting = "", + ncores = parallelly::availableCores() + ) + r2_model_para$estimate() + }, times = 3L) + r3_mbm_para <- microbenchmark("Rank3para" = { + r3_model_para <- bm_propagate_taus_all_models(phyloseq_data = the_data, rank_id_start = 2, target_rank_id = 3, per_taxa_networks = per_taxa_networks, first_model = r2_model_para, ncores = parallelly::availableCores()) + }, times = 3L) + r4_mbm_para <- microbenchmark("Rank4para" = { + r4_model_para <- bm_propagate_taus_all_models(phyloseq_data = the_data, rank_id_start = 3, per_taxa_networks = per_taxa_networks, first_model = r3_model_para, ncores = parallelly::availableCores()) + }, times = 3L) + r5_mbm_para <- microbenchmark("Rank5para" = { + r5_model_para <- bm_propagate_taus_all_models(phyloseq_data = the_data, rank_id_start = 4, per_taxa_networks = per_taxa_networks, first_model = r4_model_para, ncores = parallelly::availableCores()) + }, times = 3L) + mbm_para <- rbind(r2_mbm_para, r3_mbm_para, r4_mbm_para, r5_mbm_para) + + out_para <- list( + benchmark = mbm_para, + models = list( + Rank2 = r2_model_para, + Rank3 = r3_model_para, + Rank4 = r4_model_para, + Rank5 = r5_model_para + ) + ) + saveRDS(out_para, here(tmp_folder, "para.Rds")) + }, + "notrans" = { # No transfer + message("Will use NO TRANSFER") + r2_mbm_notrans <- microbenchmark("Rank2notrans" = { + r2_model_notrans <- BM_poisson( + membership_type = "LBM", + adj = per_taxa_networks[[2]], # Account for the root + verbosity = 6, + plotting = "", + ncores = parallelly::availableCores() + ) + r2_model_notrans$estimate() + }, times = 3L) + r3_mbm_notrans <- microbenchmark("Rank3notrans" = { + r3_model_notrans <- BM_poisson( + membership_type = "LBM", + adj = per_taxa_networks[[3]], # Account for the root + verbosity = 6, + plotting = "", + ncores = parallelly::availableCores() + ) + r3_model_notrans$estimate() + }, times = 3L) + r4_mbm_notrans <- microbenchmark("Rank4notrans" = { + r4_model_notrans <- BM_poisson( + membership_type = "LBM", + adj = per_taxa_networks[[4]], # Account for the root + verbosity = 6, + plotting = "", + ncores = parallelly::availableCores() + ) + r4_model_notrans$estimate() + }, times = 3L) + r5_mbm_notrans <- microbenchmark("Rank5notrans" = { + r5_model_notrans <- BM_poisson( + membership_type = "LBM", + adj = per_taxa_networks[[5]], # Account for the root + verbosity = 6, + plotting = "", + ncores = parallelly::availableCores() + ) + r5_model_notrans$estimate() + }, times = 3L) + mbm_notrans <- rbind(r2_mbm_notrans, r3_mbm_notrans, r4_mbm_notrans, r5_mbm_notrans) + + out_notrans <- list( + benchmark = mbm_notrans, + models = list( + Rank2 = r2_model_notrans, + Rank3 = r3_model_notrans, + Rank4 = r4_model_notrans, + Rank5 = r5_model_notrans + ) + ) + saveRDS(out_notrans, here(tmp_folder, "notrans.Rds")) + }, + stop("Nothing selected, exiting") +) diff --git a/sge_scripts/benchmark_lbm_seq.sh b/sge_scripts/benchmark_lbm_seq.sh new file mode 100644 index 0000000..78845d2 --- /dev/null +++ b/sge_scripts/benchmark_lbm_seq.sh @@ -0,0 +1,43 @@ +#!/usr/bin/env bash +#$ -V +#$ -cwd +#$ -N increasing_size +#$ -m besa +#$ -q short.q +#$ -t 1-12 +#$ -pe thread 64 +#$ -M louis.lacoste+migale@agroparistech.fr +#$ -o logs/$JOB_NAME +#$ -e logs/$JOB_NAME + +# Creating log directory if it doesn't exists +BASE_DIR="/home/$USER/work/human-microbiome-compendium" +LOG_DIR=$(echo "$BASE_DIR/logs") + +if [ ! -d "$LOG_DIR" ]; then + mkdir -p $LOG_DIR +fi + +# Finding directory +APPLICATIONS_DIR=$(echo "$BASE_DIR") + +echo $APPLICATIONS_DIR + +ARGID=$(($SGE_TASK_ID % 3)) + +case $ARGID in + 0) + echo -n "Model will be NOTRANS" + MODE="notrans" + ;; + 1) + echo -n "Model will be SEQ" + MODE="seq" + ;; + 2) + echo -n "Model will be PARA" + MODE="para" + ;; +esac + +Rscript "${APPLICATIONS_DIR}/benchmark_lbm_seq.R" $MODE &>> logs/$JOB_NAME.$SGE_TASK_ID \ No newline at end of file