diff --git a/.gitignore b/.gitignore index e5b9d27..9e6aee1 100644 --- a/.gitignore +++ b/.gitignore @@ -1,2 +1,7 @@ tmp* -last.dump.rds \ No newline at end of file +last.dump.rds + +*.o* +*.e* +*.pe* +*.po* diff --git a/code/analysis/analyze_NA_robustness.R b/code/analysis/analyze_NA_robustness.R index cead6c3..f01e7fb 100644 --- a/code/analysis/analyze_NA_robustness.R +++ b/code/analysis/analyze_NA_robustness.R @@ -8,9 +8,11 @@ data_folder <- file.path("code", "results", "simulations", "NA_robustness") data <- readRDS(file.path( data_folder, - "NA_robustness18-04-2024_17-05-44_1-100.Rds" + "NA_robustness_19-04-2024_15-18-55_uniform_nested_1-200.Rds" )) +data[["model"]] <- factor(data[["model"]], levels = c("iid", "pi", "rho", "pirho")) + max_repetition <- max(data$repetition) #  Averaging over repetitions @@ -22,7 +24,9 @@ averaged_data <- data %>% #  Preparing auc_data auc_data <- averaged_data %>% select(c(prop_NAs, model) | contains("auc_")) %>% - rename_with(~ gsub("auc_", "", .x, fixed = TRUE)) + rename_with(~ gsub("auc_", "", .x, fixed = TRUE)) %>% + filter(prop_NAs != 0) + auc_data_long <- bind_cols( auc_data %>% select(c("prop_NAs", "model") | contains("_mean")) %>% @@ -35,7 +39,7 @@ auc_data_long <- names_to = NULL, values_to = "auc_sd" ) %>% ungroup() %>% select(!c("prop_NAs", "model")) - ) %>% mutate(method = gsub( + ) %>% mutate(method = method %>% gsub( pattern = "_mean", replacement = "", fixed = TRUE )) @@ -62,14 +66,17 @@ auc_plot <- ggplot(auc_data_long) + geom_line(aes(color = method)) + geom_point(aes(color = method)) + geom_ribbon(aes(ymin = auc_mean - auc_sd, ymax = auc_mean + auc_sd, fill = method), alpha = 0.2) + - ylim(c(0, 1)) + + ylim(c(min(auc_data_long[["auc_mean"]]), max(auc_data_long[["auc_mean"]]))) + scale_x_continuous(breaks = scales::pretty_breaks(n = 10L)) + ylab(TeX("\\bar{AUC}")) + xlab("NA proportion") + + labs(fill = "Method", color = "Method") + ggtitle(TeX(paste( "$\\bar{AUC}\\pm s_{AUC}$", ", function of NA proportion. N=", max_repetition ))) + - facet_grid(cols = vars(model)) + facet_grid(rows = vars(model), as.table = TRUE) + + theme_bw() + ari_plot <- ggplot(ari_data_long) + aes(x = prop_NAs, y = ari_mean) + @@ -79,9 +86,11 @@ ari_plot <- ggplot(ari_data_long) + scale_x_continuous(breaks = scales::pretty_breaks(n = 10L)) + ylab(TeX("$\\bar{ARI^d}$")) + xlab("NA proportion") + + labs(fill = "Method", color = "Method") + ggtitle(TeX(paste( "$\\bar{ARI^d}\\pm s_{ARI^d}$", ", function of NA proportion. N=", max_repetition ))) + - facet_grid(rows = vars(model), cols = vars(dim)) + facet_grid(rows = vars(model), cols = vars(dim)) + + theme_bw() -auc_plot + ari_plot \ No newline at end of file +(auc_plot | ari_plot) + patchwork::plot_layout(guides = "collect") diff --git a/code/results/simulations/NA_robustness/NA_robustness18-04-2024_16-59-42_1-100.Rds b/code/results/simulations/NA_robustness/NA_robustness18-04-2024_16-59-42_1-100.Rds deleted file mode 100644 index 134ff96..0000000 Binary files a/code/results/simulations/NA_robustness/NA_robustness18-04-2024_16-59-42_1-100.Rds and /dev/null differ diff --git a/code/results/simulations/NA_robustness/NA_robustness18-04-2024_17-05-44_1-100.Rds b/code/results/simulations/NA_robustness/NA_robustness18-04-2024_17-05-44_1-100.Rds deleted file mode 100644 index 622a1c8..0000000 Binary files a/code/results/simulations/NA_robustness/NA_robustness18-04-2024_17-05-44_1-100.Rds and /dev/null differ diff --git a/code/results/simulations/NA_robustness/NA_robustness_17-04-2024_18-21-25_1-400.Rds b/code/results/simulations/NA_robustness/NA_robustness_17-04-2024_18-21-25_1-400.Rds deleted file mode 100644 index fea0425..0000000 Binary files a/code/results/simulations/NA_robustness/NA_robustness_17-04-2024_18-21-25_1-400.Rds and /dev/null differ diff --git a/code/results/simulations/NA_robustness/NA_robustness_19-04-2024_15-18-55_uniform_nested_1-200.Rds b/code/results/simulations/NA_robustness/NA_robustness_19-04-2024_15-18-55_uniform_nested_1-200.Rds new file mode 100644 index 0000000..77041c7 Binary files /dev/null and b/code/results/simulations/NA_robustness/NA_robustness_19-04-2024_15-18-55_uniform_nested_1-200.Rds differ diff --git a/code/scripts/migale_simulations_NA_robustness.sh b/code/scripts/migale_simulations_NA_robustness.sh index 1113d3d..868cded 100755 --- a/code/scripts/migale_simulations_NA_robustness.sh +++ b/code/scripts/migale_simulations_NA_robustness.sh @@ -1,4 +1,20 @@ #!/usr/bin/env bash +#$ -V +#$ -cwd +#$ -N NA_robustness_array +#$ -m besa +#$ -t 1:8 +#$ -q short.q +#$ -pe thread 64 +#$ -M louis.lacoste+migale@agroparistech.fr +#$ -o logs/$JOB_NAME.$TASK_ID.out +#$ -e logs/$JOB_NAME.$TASK_ID.err + +# Constant data +STRUCTA=("nested" "modular") +SAMPLINGA=("uniform" "row" "col" "rowcol") + +# Data SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd ) # Finding simulations directory @@ -6,14 +22,6 @@ SIMULATIONS_DIR=$(echo ${SCRIPT_DIR%/*}/simulations) echo $SIMULATIONS_DIR -Rscript "${SIMULATIONS_DIR}/simulations_NA_robustness.R" --struct nested --sampling uniform -Rscript "${SIMULATIONS_DIR}/simulations_NA_robustness.R" --struct modular --sampling uniform +# Parsing sge array id -Rscript "${SIMULATIONS_DIR}/simulations_NA_robustness.R" --struct nested --sampling row -Rscript "${SIMULATIONS_DIR}/simulations_NA_robustness.R" --struct modular --sampling row - -Rscript "${SIMULATIONS_DIR}/simulations_NA_robustness.R" --struct nested --sampling col -Rscript "${SIMULATIONS_DIR}/simulations_NA_robustness.R" --struct modular --sampling col - -Rscript "${SIMULATIONS_DIR}/simulations_NA_robustness.R" --struct nested --sampling rowcol -Rscript "${SIMULATIONS_DIR}/simulations_NA_robustness.R" --struct modular --sampling rowcol \ No newline at end of file +Rscript "${SIMULATIONS_DIR}/simulations_NA_robustness.R" --struct ${STRUCTA[$(($((SGE_TASK_ID - 1)) % 2))]} --sampling ${SAMPLINGA[$(($((SGE_TASK_ID - 1)) % 4))]} diff --git a/code/simulations/simulations_NA_robustness.R b/code/simulations/simulations_NA_robustness.R index 10ec067..f1a7638 100644 --- a/code/simulations/simulations_NA_robustness.R +++ b/code/simulations/simulations_NA_robustness.R @@ -255,7 +255,10 @@ result_list <- parallel::mclapply(seq_len(nrow(conditions)), function(current) { Z[[1]][[2]], mybisbmpop[["best_fit"]][["Z"]][[1]][[2]] ), - elapsed_secs = difftime(stop_time, start_time, units = "sec") + elapsed_secs = difftime(stop_time, start_time, units = "sec"), + + sampling = sampling, + struct = struct ) message("Finished step ", current, "/", nrow(conditions), "\n")