application : sous collection pour la présentation
This commit is contained in:
parent
54228b24f9
commit
aa0ffefcdb
2 changed files with 181 additions and 0 deletions
147
code/applications/sub-dore/sous-collection/01_souscol.R
Normal file
147
code/applications/sub-dore/sous-collection/01_souscol.R
Normal file
|
|
@ -0,0 +1,147 @@
|
|||
library(colSBM)
|
||||
library(dplyr)
|
||||
library(tidyr)
|
||||
library(stringr)
|
||||
library(here)
|
||||
library(future)
|
||||
library(future.apply)
|
||||
|
||||
plan(multicore)
|
||||
# Arguments
|
||||
arg <- commandArgs(trailingOnly = TRUE)
|
||||
|
||||
model <- "iid"
|
||||
seed <- 1234L
|
||||
|
||||
if (length(arg) == 0L) {
|
||||
message("No arguments provided, using default.")
|
||||
} else {
|
||||
if ("--model" %in% arg) {
|
||||
model <- arg[(which(arg == "--model") + 1L)]
|
||||
} else {
|
||||
message("No model provided, defaulting to iid.")
|
||||
}
|
||||
if ("--seed" %in% arg) {
|
||||
seed <- try(as.integer(arg[(which(arg == "--seed") + 1L)]))
|
||||
} else {
|
||||
message("No seed provided, defaulting to 1234.")
|
||||
}
|
||||
}
|
||||
|
||||
# Arguments checks
|
||||
allowed_model <- c("iid", "pi", "rho", "pirho")
|
||||
stopifnot(
|
||||
"Unknown model, should be : iid, pi, rho or pirho" = (model %in% allowed_model),
|
||||
"Seed isn't castable to integer" = (is.integer(seed))
|
||||
)
|
||||
|
||||
message(
|
||||
"Début du clustering des données sous-collection avec le modèle ", model,
|
||||
" et la seed ", seed
|
||||
)
|
||||
|
||||
set.seed(seed, "L'Ecuyer-CMRG")
|
||||
base_data_folder <- file.path(here(), "code", "data", "dore")
|
||||
save_folder <- file.path(
|
||||
here(), "code", "results", "applications",
|
||||
"sub-dore", "souscollection"
|
||||
)
|
||||
|
||||
if (!dir.exists(save_folder)) {
|
||||
dir.create(save_folder, recursive = TRUE)
|
||||
}
|
||||
|
||||
collection_data <- file.path(
|
||||
base_data_folder,
|
||||
"dore-matrices.Rds"
|
||||
)
|
||||
|
||||
interaction_data <- read.table(file = file.path(base_data_folder, "interaction-data.txt"), sep = "\t", header = TRUE)
|
||||
|
||||
seq_ids_network_aggreg <- unique(interaction_data$id_network_aggreg)
|
||||
names_aggreg_networks <- sapply(
|
||||
seq_ids_network_aggreg,
|
||||
function(id) {
|
||||
paste0(
|
||||
unique(interaction_data[which(interaction_data$id_network_aggreg == id), ]$web),
|
||||
collapse = "+"
|
||||
)
|
||||
}
|
||||
)
|
||||
# Computation of incidence matrices
|
||||
incidence_matrices <- lapply(
|
||||
seq_ids_network_aggreg,
|
||||
function(m) {
|
||||
current_interaction_data <- interaction_data[which(interaction_data$id_network_aggreg == m), ] %>%
|
||||
mutate(
|
||||
plantaggreg = paste(plantorder,
|
||||
plantfamily, plantgenus, plantspecies,
|
||||
sep = "-"
|
||||
),
|
||||
insectaggreg = paste(insectorder,
|
||||
insectfamily, insectgenus, insectspecies,
|
||||
sep = "-"
|
||||
)
|
||||
)
|
||||
current_interaction_data <- table(current_interaction_data$plantaggreg, current_interaction_data$insectaggreg)
|
||||
|
||||
current_incidence_matrix <- matrix(current_interaction_data,
|
||||
ncol = ncol(current_interaction_data), dimnames = dimnames(current_interaction_data)
|
||||
)
|
||||
|
||||
current_incidence_matrix[which(current_incidence_matrix > 0)] <- 1
|
||||
return(current_incidence_matrix)
|
||||
}
|
||||
)
|
||||
|
||||
names(incidence_matrices) <- names_aggreg_networks
|
||||
|
||||
cluster1 <- c(
|
||||
"arroyo1982_1+arroyo1982_2+arroyo3",
|
||||
"Struck1994",
|
||||
"Albrecht2010_49yr+Albrecht2010_63yr+Albrecht2010_84yr+Albrecht2010_109yr+Albrecht2010_130yr",
|
||||
"Chamberlain_HLU+Chamberlain_HLG+Chamberlain_OKU+Chamberlain_OKG+Chamberlain_WLU+Chamberlain_WLG+Chamberlain_SOU+Chamberlain_SOG",
|
||||
"Devoto2005_PP+Devoto2005_AP",
|
||||
"Devoto2005_LL+Devoto2005_CT",
|
||||
"KatoMiura1996",
|
||||
"Souza_chaco",
|
||||
"Souza_vereda",
|
||||
"Adedoja2019",
|
||||
"Hackett2019_NZ_salt_marsh+Hackett2019_NZ_sand_dune+Hackett2019_NZ_scrub_coprosma"
|
||||
)
|
||||
|
||||
cluster2 <- c(
|
||||
"medan2002ld",
|
||||
"ramirez1992",
|
||||
"Benadi2013_3(1340m)",
|
||||
"CordenizPicanco2018_NatVeg"
|
||||
)
|
||||
|
||||
incidence_matrices <- incidence_matrices[names_aggreg_networks %in% c(cluster1, cluster2)]
|
||||
|
||||
start_time <- format(Sys.time(), "%d-%m-%y_%H-%M-%S")
|
||||
|
||||
list_collection <- clusterize_bipartite_networks(
|
||||
netlist = incidence_matrices,
|
||||
net_id = names(incidence_matrices),
|
||||
colsbm_model = model,
|
||||
nb_run = 3L,
|
||||
global_opts = list(
|
||||
nb_cores = parallelly::availableCores(omit = 1L),
|
||||
verbosity = 3L,
|
||||
plot_details = 0L,
|
||||
backend = "future"
|
||||
),
|
||||
fit_opts = list(max_vem_steps = 200L)
|
||||
)
|
||||
|
||||
save_file <- file.path(
|
||||
save_folder, paste0(
|
||||
"souscollection_collection_",
|
||||
model, "_seed_", seed,
|
||||
"_", start_time, ".Rds"
|
||||
)
|
||||
)
|
||||
|
||||
message("Clustering saved. Model", model)
|
||||
saveRDS(list_collection, file = save_file)
|
||||
34
code/scripts/migale_application_souscollection.sh
Executable file
34
code/scripts/migale_application_souscollection.sh
Executable file
|
|
@ -0,0 +1,34 @@
|
|||
#!/usr/bin/env bash
|
||||
#$ -V
|
||||
#$ -cwd
|
||||
#$ -N baldock
|
||||
#$ -m besa
|
||||
#$ -t 1:16
|
||||
#$ -q short.q
|
||||
#$ -pe thread 64
|
||||
#$ -M louis.lacoste+migale@agroparistech.fr
|
||||
#$ -o logs/$JOB_NAME.$TASK_ID
|
||||
#$ -e logs/$JOB_NAME.$TASK_ID
|
||||
|
||||
# Creating log directory if it doesn't exists
|
||||
BASE_DIR="/home/$USER/work/mia-stage-2024"
|
||||
LOG_DIR=$(echo "$BASE_DIR/logs")
|
||||
|
||||
if [ ! -d "$LOG_DIR" ]; then
|
||||
mkdir -p $LOG_DIR
|
||||
fi
|
||||
|
||||
# Constant data
|
||||
MODELARRAY=("iid" "pi" "rho" "pirho")
|
||||
ID=$((SGE_TASK_ID - 1))
|
||||
MODEL=${MODELARRAY[$((ID % 4))]}
|
||||
|
||||
SEED=$(($((ID + $((ID / 4)))) % 4))
|
||||
|
||||
|
||||
# Finding directory
|
||||
APPLICATIONS_DIR=$(echo "$BASE_DIR/code/applications")
|
||||
|
||||
echo $APPLICATIONS_DIR
|
||||
|
||||
Rscript "${APPLICATIONS_DIR}/sub-dore/sous-collection/01_souscol.R" --model $MODEL --seed $SEED
|
||||
Loading…
Add table
Reference in a new issue