From fccfe90b3cc1eeea3f10055998ad21e2f7c93f57 Mon Sep 17 00:00:00 2001 From: Louis Lacoste Date: Fri, 7 Jun 2024 15:44:43 +0200 Subject: [PATCH] Adding smaller clustering --- .../01_dore_no_small_clusterize.R | 123 ++++++++++++++++++ .../migale_application_dore_no_small.sh | 34 +++++ 2 files changed, 157 insertions(+) create mode 100644 code/applications/dore_no_small/01_dore_no_small_clusterize.R create mode 100755 code/scripts/migale_application_dore_no_small.sh diff --git a/code/applications/dore_no_small/01_dore_no_small_clusterize.R b/code/applications/dore_no_small/01_dore_no_small_clusterize.R new file mode 100644 index 0000000..42b7814 --- /dev/null +++ b/code/applications/dore_no_small/01_dore_no_small_clusterize.R @@ -0,0 +1,123 @@ +library(colSBM) +library(dplyr) +library(tidyr) +library(here) + +# Arguments +arg <- commandArgs(trailingOnly = TRUE) + +model <- "iid" +seed <- 1234L + +if (length(arg) == 0L) { + message("No arguments provided, using default.") +} else { + if ("--model" %in% arg) { + model <- arg[(which(arg == "--model") + 1L)] + } else { + message("No model provided, defaulting to iid.") + } + if ("--seed" %in% arg) { + seed <- try(as.integer(arg[(which(arg == "--seed") + 1L)])) + } else { + message("No seed provided, defaulting to 1234.") + } +} + +# Arguments checks +allowed_model <- c("iid", "pi", "rho", "pirho") +stopifnot( + "Unknown model, should be : iid, pi, rho or pirho" = (model %in% allowed_model), + "Seed isn't castable to integer" = (is.integer(seed)) +) + +message( + "Début du clustering des données Doré et al. sans les petits réseaux avec le modèle ", model, + " et la seed ", seed +) + +set.seed(seed, "L'Ecuyer-CMRG") +base_data_folder <- file.path(here(), "code", "data", "dore") +save_folder <- file.path(here(), "code", "results", "applications", "dore_no_small") + +if (!dir.exists(save_folder)) { + dir.create(save_folder, recursive = TRUE) +} + +collection_data <- file.path( + base_data_folder, + "dore-matrices.Rds" +) + +interaction_data <- read.table(file = file.path(base_data_folder, "interaction-data.txt"), sep = "\t", header = TRUE) + +seq_ids_network_aggreg <- unique(interaction_data$id_network_aggreg) +names_aggreg_networks <- sapply( + seq_ids_network_aggreg, + function(id) { + paste0( + unique(interaction_data[which(interaction_data$id_network_aggreg == id), ]$web), + collapse = "+" + ) + } +) +# Computation of incidence matrices +incidence_matrices <- lapply( + seq_ids_network_aggreg, + function(m) { + current_interaction_data <- interaction_data[which(interaction_data$id_network_aggreg == m), ] %>% + mutate( + plantaggreg = paste(plantorder, + plantfamily, plantgenus, plantspecies, + sep = "-" + ), + insectaggreg = paste(insectorder, + insectfamily, insectgenus, insectspecies, + sep = "-" + ) + ) + current_interaction_data <- table(current_interaction_data$plantaggreg, current_interaction_data$insectaggreg) + + current_incidence_matrix <- matrix(current_interaction_data, + ncol = ncol(current_interaction_data), dimnames = dimnames(current_interaction_data) + ) + + current_incidence_matrix[which(current_incidence_matrix > 0)] <- 1 + return(current_incidence_matrix) + } +) + +names(incidence_matrices) <- names_aggreg_networks + +min_size <- c(25L, 50L) + +incidence_matrices <- incidence_matrices[ + (colSums(sapply(incidence_matrices, dim) > min_size) == 2L) +] + +start_time <- format(Sys.time(), "%d-%m-%y_%H-%M-%S") + +list_collection <- clusterize_bipartite_networks( + netlist = incidence_matrices, + net_id = names(incidence_matrices), + colsbm_model = model, + nb_run = 3L, + global_opts = list( + nb_cores = parallelly::availableCores(omit = 1L), + plot_details = 0L, + backend = "parallel" + ) +) + +save_file <- file.path( + save_folder, paste0( + "dore_collection_no_small_", + model, "_seed_", seed, + "_", start_time, ".Rds" + ) +) + +message("Clustering saved.") +saveRDS(list(min_size = min_size, clustering = list_collection), + file = save_file +) diff --git a/code/scripts/migale_application_dore_no_small.sh b/code/scripts/migale_application_dore_no_small.sh new file mode 100755 index 0000000..f117fc7 --- /dev/null +++ b/code/scripts/migale_application_dore_no_small.sh @@ -0,0 +1,34 @@ +#!/usr/bin/env bash +#$ -V +#$ -cwd +#$ -N Dore_app +#$ -m besa +#$ -t 1:4 +#$ -q long.q +#$ -pe thread 64 +#$ -M louis.lacoste+migale@agroparistech.fr +#$ -o logs/$JOB_NAME.$TASK_ID +#$ -e logs/$JOB_NAME.$TASK_ID + +# Creating log directory if it doesn't exists +BASE_DIR="/home/$USER/work/mia-stage-2024" +LOG_DIR=$(echo "$BASE_DIR/logs") + +if [ ! -d "$LOG_DIR" ]; then + mkdir -p $LOG_DIR +fi + +# Constant data +MODELARRAY=("iid" "pi" "rho" "pirho") +ID=$((SGE_TASK_ID - 1)) +MODEL=${MODELARRAY[$((ID % 4))]} + +SEED=0 + + +# Finding directory +APPLICATIONS_DIR=$(echo "$BASE_DIR/code/applications") + +echo $APPLICATIONS_DIR + +Rscript "${APPLICATIONS_DIR}/dore_no_small/01_dore_no_small_clusterize.R" --model $MODEL --seed $SEED