Ignoring cluster output files and adding first results

Removing old data file and improving simulations NA migale script shoud work Removing unnecessary flag Changing filename
2024-04-19 15:31:45 +02:00 · 2024-04-19 15:31:45 +02:00 · 28215028cd
commit 28215028cd
parent b6eca7e5ff
8 changed files with 44 additions and 19 deletions
--- a/.gitignore
+++ b/.gitignore
@ -1,2 +1,7 @@
 tmp*
-last.dump.rds
+last.dump.rds
+
+*.o*
+*.e*
+*.pe*
+*.po*
--- a/code/analysis/analyze_NA_robustness.R
+++ b/code/analysis/analyze_NA_robustness.R
@ -8,9 +8,11 @@ data_folder <- file.path("code", "results", "simulations", "NA_robustness")

 data <- readRDS(file.path(
    data_folder,
-    "NA_robustness18-04-2024_17-05-44_1-100.Rds"
+    "NA_robustness_19-04-2024_15-18-55_uniform_nested_1-200.Rds"
 ))

+data[["model"]] <- factor(data[["model"]], levels = c("iid", "pi", "rho", "pirho"))
+
 max_repetition <- max(data$repetition)

 #  Averaging over repetitions
@ -22,7 +24,9 @@ averaged_data <- data %>%
 #  Preparing auc_data
 auc_data <- averaged_data %>%
    select(c(prop_NAs, model) | contains("auc_")) %>%
-    rename_with(~ gsub("auc_", "", .x, fixed = TRUE))
+    rename_with(~ gsub("auc_", "", .x, fixed = TRUE)) %>%
+    filter(prop_NAs != 0)
+
 auc_data_long <-
    bind_cols(
        auc_data %>% select(c("prop_NAs", "model") | contains("_mean")) %>%
@ -35,7 +39,7 @@ auc_data_long <-
                names_to = NULL,
                values_to = "auc_sd"
            ) %>% ungroup() %>% select(!c("prop_NAs", "model"))
-    ) %>% mutate(method = gsub(
+    ) %>% mutate(method = method %>% gsub(
        pattern = "_mean",
        replacement = "", fixed = TRUE
    ))
@ -62,14 +66,17 @@ auc_plot <- ggplot(auc_data_long) +
    geom_line(aes(color = method)) +
    geom_point(aes(color = method)) +
    geom_ribbon(aes(ymin = auc_mean - auc_sd, ymax = auc_mean + auc_sd, fill = method), alpha = 0.2) +
-    ylim(c(0, 1)) +
+    ylim(c(min(auc_data_long[["auc_mean"]]), max(auc_data_long[["auc_mean"]]))) +
    scale_x_continuous(breaks = scales::pretty_breaks(n = 10L)) +
    ylab(TeX("\\bar{AUC}")) +
    xlab("NA proportion") +
+    labs(fill = "Method", color = "Method") +
    ggtitle(TeX(paste(
        "$\\bar{AUC}\\pm s_{AUC}$", ", function of NA proportion. N=", max_repetition
    ))) +
-    facet_grid(cols = vars(model))
+    facet_grid(rows = vars(model), as.table = TRUE) +
+    theme_bw()
+

 ari_plot <- ggplot(ari_data_long) +
    aes(x = prop_NAs, y = ari_mean) +
@ -79,9 +86,11 @@ ari_plot <- ggplot(ari_data_long) +
    scale_x_continuous(breaks = scales::pretty_breaks(n = 10L)) +
    ylab(TeX("$\\bar{ARI^d}$")) +
    xlab("NA proportion") +
+    labs(fill = "Method", color = "Method") +
    ggtitle(TeX(paste(
        "$\\bar{ARI^d}\\pm s_{ARI^d}$", ", function of NA proportion. N=", max_repetition
    ))) +
-    facet_grid(rows = vars(model), cols = vars(dim))
+    facet_grid(rows = vars(model), cols = vars(dim)) +
+    theme_bw()

-auc_plot + ari_plot
+(auc_plot | ari_plot) + patchwork::plot_layout(guides = "collect")
--- a/code/results/simulations/NA_robustness/NA_robustness18-04-2024_16-59-42_1-100.Rds
+++ b/code/results/simulations/NA_robustness/NA_robustness18-04-2024_16-59-42_1-100.Rds
--- a/code/results/simulations/NA_robustness/NA_robustness18-04-2024_17-05-44_1-100.Rds
+++ b/code/results/simulations/NA_robustness/NA_robustness18-04-2024_17-05-44_1-100.Rds
--- a/code/results/simulations/NA_robustness/NA_robustness_17-04-2024_18-21-25_1-400.Rds
+++ b/code/results/simulations/NA_robustness/NA_robustness_17-04-2024_18-21-25_1-400.Rds
--- a/code/results/simulations/NA_robustness/NA_robustness_19-04-2024_15-18-55_uniform_nested_1-200.Rds
+++ b/code/results/simulations/NA_robustness/NA_robustness_19-04-2024_15-18-55_uniform_nested_1-200.Rds
--- a/code/scripts/migale_simulations_NA_robustness.sh
+++ b/code/scripts/migale_simulations_NA_robustness.sh
@ -1,4 +1,20 @@
 #!/usr/bin/env bash
+#$ -V
+#$ -cwd
+#$ -N NA_robustness_array
+#$ -m besa
+#$ -t 1:8
+#$ -q short.q
+#$ -pe thread 64
+#$ -M louis.lacoste+migale@agroparistech.fr
+#$ -o logs/$JOB_NAME.$TASK_ID.out
+#$ -e logs/$JOB_NAME.$TASK_ID.err
+
+# Constant data
+STRUCTA=("nested" "modular")
+SAMPLINGA=("uniform" "row" "col" "rowcol")
+
+# Data
 SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )

 # Finding simulations directory
@ -6,14 +22,6 @@ SIMULATIONS_DIR=$(echo ${SCRIPT_DIR%/*}/simulations)

 echo $SIMULATIONS_DIR

-Rscript "${SIMULATIONS_DIR}/simulations_NA_robustness.R" --struct nested --sampling uniform
-Rscript "${SIMULATIONS_DIR}/simulations_NA_robustness.R" --struct modular --sampling uniform
+# Parsing sge array id

-Rscript "${SIMULATIONS_DIR}/simulations_NA_robustness.R" --struct nested --sampling row
-Rscript "${SIMULATIONS_DIR}/simulations_NA_robustness.R" --struct modular --sampling row
-
-Rscript "${SIMULATIONS_DIR}/simulations_NA_robustness.R" --struct nested --sampling col
-Rscript "${SIMULATIONS_DIR}/simulations_NA_robustness.R" --struct modular --sampling col
-
-Rscript "${SIMULATIONS_DIR}/simulations_NA_robustness.R" --struct nested --sampling rowcol
-Rscript "${SIMULATIONS_DIR}/simulations_NA_robustness.R" --struct modular --sampling rowcol
+Rscript "${SIMULATIONS_DIR}/simulations_NA_robustness.R" --struct ${STRUCTA[$(($((SGE_TASK_ID - 1)) % 2))]} --sampling ${SAMPLINGA[$(($((SGE_TASK_ID - 1)) % 4))]}
--- a/code/simulations/simulations_NA_robustness.R
+++ b/code/simulations/simulations_NA_robustness.R
@ -255,7 +255,10 @@ result_list <- parallel::mclapply(seq_len(nrow(conditions)), function(current) {
            Z[[1]][[2]],
            mybisbmpop[["best_fit"]][["Z"]][[1]][[2]]
        ),
-        elapsed_secs = difftime(stop_time, start_time, units = "sec")
+        elapsed_secs = difftime(stop_time, start_time, units = "sec"),
+
+        sampling = sampling,
+        struct = struct
    )

    message("Finished step ", current, "/", nrow(conditions), "\n")