Adding smaller clustering

This commit is contained in:
Louis Lacoste 2024-06-07 15:44:43 +02:00
parent b24418ebee
commit fccfe90b3c
2 changed files with 157 additions and 0 deletions

View file

@ -0,0 +1,123 @@
library(colSBM)
library(dplyr)
library(tidyr)
library(here)
# Arguments
arg <- commandArgs(trailingOnly = TRUE)
model <- "iid"
seed <- 1234L
if (length(arg) == 0L) {
message("No arguments provided, using default.")
} else {
if ("--model" %in% arg) {
model <- arg[(which(arg == "--model") + 1L)]
} else {
message("No model provided, defaulting to iid.")
}
if ("--seed" %in% arg) {
seed <- try(as.integer(arg[(which(arg == "--seed") + 1L)]))
} else {
message("No seed provided, defaulting to 1234.")
}
}
# Arguments checks
allowed_model <- c("iid", "pi", "rho", "pirho")
stopifnot(
"Unknown model, should be : iid, pi, rho or pirho" = (model %in% allowed_model),
"Seed isn't castable to integer" = (is.integer(seed))
)
message(
"Début du clustering des données Doré et al. sans les petits réseaux avec le modèle ", model,
" et la seed ", seed
)
set.seed(seed, "L'Ecuyer-CMRG")
base_data_folder <- file.path(here(), "code", "data", "dore")
save_folder <- file.path(here(), "code", "results", "applications", "dore_no_small")
if (!dir.exists(save_folder)) {
dir.create(save_folder, recursive = TRUE)
}
collection_data <- file.path(
base_data_folder,
"dore-matrices.Rds"
)
interaction_data <- read.table(file = file.path(base_data_folder, "interaction-data.txt"), sep = "\t", header = TRUE)
seq_ids_network_aggreg <- unique(interaction_data$id_network_aggreg)
names_aggreg_networks <- sapply(
seq_ids_network_aggreg,
function(id) {
paste0(
unique(interaction_data[which(interaction_data$id_network_aggreg == id), ]$web),
collapse = "+"
)
}
)
# Computation of incidence matrices
incidence_matrices <- lapply(
seq_ids_network_aggreg,
function(m) {
current_interaction_data <- interaction_data[which(interaction_data$id_network_aggreg == m), ] %>%
mutate(
plantaggreg = paste(plantorder,
plantfamily, plantgenus, plantspecies,
sep = "-"
),
insectaggreg = paste(insectorder,
insectfamily, insectgenus, insectspecies,
sep = "-"
)
)
current_interaction_data <- table(current_interaction_data$plantaggreg, current_interaction_data$insectaggreg)
current_incidence_matrix <- matrix(current_interaction_data,
ncol = ncol(current_interaction_data), dimnames = dimnames(current_interaction_data)
)
current_incidence_matrix[which(current_incidence_matrix > 0)] <- 1
return(current_incidence_matrix)
}
)
names(incidence_matrices) <- names_aggreg_networks
min_size <- c(25L, 50L)
incidence_matrices <- incidence_matrices[
(colSums(sapply(incidence_matrices, dim) > min_size) == 2L)
]
start_time <- format(Sys.time(), "%d-%m-%y_%H-%M-%S")
list_collection <- clusterize_bipartite_networks(
netlist = incidence_matrices,
net_id = names(incidence_matrices),
colsbm_model = model,
nb_run = 3L,
global_opts = list(
nb_cores = parallelly::availableCores(omit = 1L),
plot_details = 0L,
backend = "parallel"
)
)
save_file <- file.path(
save_folder, paste0(
"dore_collection_no_small_",
model, "_seed_", seed,
"_", start_time, ".Rds"
)
)
message("Clustering saved.")
saveRDS(list(min_size = min_size, clustering = list_collection),
file = save_file
)

View file

@ -0,0 +1,34 @@
#!/usr/bin/env bash
#$ -V
#$ -cwd
#$ -N Dore_app
#$ -m besa
#$ -t 1:4
#$ -q long.q
#$ -pe thread 64
#$ -M louis.lacoste+migale@agroparistech.fr
#$ -o logs/$JOB_NAME.$TASK_ID
#$ -e logs/$JOB_NAME.$TASK_ID
# Creating log directory if it doesn't exists
BASE_DIR="/home/$USER/work/mia-stage-2024"
LOG_DIR=$(echo "$BASE_DIR/logs")
if [ ! -d "$LOG_DIR" ]; then
mkdir -p $LOG_DIR
fi
# Constant data
MODELARRAY=("iid" "pi" "rho" "pirho")
ID=$((SGE_TASK_ID - 1))
MODEL=${MODELARRAY[$((ID % 4))]}
SEED=0
# Finding directory
APPLICATIONS_DIR=$(echo "$BASE_DIR/code/applications")
echo $APPLICATIONS_DIR
Rscript "${APPLICATIONS_DIR}/dore_no_small/01_dore_no_small_clusterize.R" --model $MODEL --seed $SEED