Ignoring cluster output files and adding first results

Removing old data file and improving simulations NA

migale script shoud work

Removing unnecessary flag

Changing filename
This commit is contained in:
Louis Lacoste 2024-04-19 15:31:45 +02:00 committed by Louis Lacoste
parent b6eca7e5ff
commit 28215028cd
8 changed files with 44 additions and 19 deletions

7
.gitignore vendored
View file

@ -1,2 +1,7 @@
tmp*
last.dump.rds
last.dump.rds
*.o*
*.e*
*.pe*
*.po*

View file

@ -8,9 +8,11 @@ data_folder <- file.path("code", "results", "simulations", "NA_robustness")
data <- readRDS(file.path(
data_folder,
"NA_robustness18-04-2024_17-05-44_1-100.Rds"
"NA_robustness_19-04-2024_15-18-55_uniform_nested_1-200.Rds"
))
data[["model"]] <- factor(data[["model"]], levels = c("iid", "pi", "rho", "pirho"))
max_repetition <- max(data$repetition)
#  Averaging over repetitions
@ -22,7 +24,9 @@ averaged_data <- data %>%
#  Preparing auc_data
auc_data <- averaged_data %>%
select(c(prop_NAs, model) | contains("auc_")) %>%
rename_with(~ gsub("auc_", "", .x, fixed = TRUE))
rename_with(~ gsub("auc_", "", .x, fixed = TRUE)) %>%
filter(prop_NAs != 0)
auc_data_long <-
bind_cols(
auc_data %>% select(c("prop_NAs", "model") | contains("_mean")) %>%
@ -35,7 +39,7 @@ auc_data_long <-
names_to = NULL,
values_to = "auc_sd"
) %>% ungroup() %>% select(!c("prop_NAs", "model"))
) %>% mutate(method = gsub(
) %>% mutate(method = method %>% gsub(
pattern = "_mean",
replacement = "", fixed = TRUE
))
@ -62,14 +66,17 @@ auc_plot <- ggplot(auc_data_long) +
geom_line(aes(color = method)) +
geom_point(aes(color = method)) +
geom_ribbon(aes(ymin = auc_mean - auc_sd, ymax = auc_mean + auc_sd, fill = method), alpha = 0.2) +
ylim(c(0, 1)) +
ylim(c(min(auc_data_long[["auc_mean"]]), max(auc_data_long[["auc_mean"]]))) +
scale_x_continuous(breaks = scales::pretty_breaks(n = 10L)) +
ylab(TeX("\\bar{AUC}")) +
xlab("NA proportion") +
labs(fill = "Method", color = "Method") +
ggtitle(TeX(paste(
"$\\bar{AUC}\\pm s_{AUC}$", ", function of NA proportion. N=", max_repetition
))) +
facet_grid(cols = vars(model))
facet_grid(rows = vars(model), as.table = TRUE) +
theme_bw()
ari_plot <- ggplot(ari_data_long) +
aes(x = prop_NAs, y = ari_mean) +
@ -79,9 +86,11 @@ ari_plot <- ggplot(ari_data_long) +
scale_x_continuous(breaks = scales::pretty_breaks(n = 10L)) +
ylab(TeX("$\\bar{ARI^d}$")) +
xlab("NA proportion") +
labs(fill = "Method", color = "Method") +
ggtitle(TeX(paste(
"$\\bar{ARI^d}\\pm s_{ARI^d}$", ", function of NA proportion. N=", max_repetition
))) +
facet_grid(rows = vars(model), cols = vars(dim))
facet_grid(rows = vars(model), cols = vars(dim)) +
theme_bw()
auc_plot + ari_plot
(auc_plot | ari_plot) + patchwork::plot_layout(guides = "collect")

View file

@ -1,4 +1,20 @@
#!/usr/bin/env bash
#$ -V
#$ -cwd
#$ -N NA_robustness_array
#$ -m besa
#$ -t 1:8
#$ -q short.q
#$ -pe thread 64
#$ -M louis.lacoste+migale@agroparistech.fr
#$ -o logs/$JOB_NAME.$TASK_ID.out
#$ -e logs/$JOB_NAME.$TASK_ID.err
# Constant data
STRUCTA=("nested" "modular")
SAMPLINGA=("uniform" "row" "col" "rowcol")
# Data
SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )
# Finding simulations directory
@ -6,14 +22,6 @@ SIMULATIONS_DIR=$(echo ${SCRIPT_DIR%/*}/simulations)
echo $SIMULATIONS_DIR
Rscript "${SIMULATIONS_DIR}/simulations_NA_robustness.R" --struct nested --sampling uniform
Rscript "${SIMULATIONS_DIR}/simulations_NA_robustness.R" --struct modular --sampling uniform
# Parsing sge array id
Rscript "${SIMULATIONS_DIR}/simulations_NA_robustness.R" --struct nested --sampling row
Rscript "${SIMULATIONS_DIR}/simulations_NA_robustness.R" --struct modular --sampling row
Rscript "${SIMULATIONS_DIR}/simulations_NA_robustness.R" --struct nested --sampling col
Rscript "${SIMULATIONS_DIR}/simulations_NA_robustness.R" --struct modular --sampling col
Rscript "${SIMULATIONS_DIR}/simulations_NA_robustness.R" --struct nested --sampling rowcol
Rscript "${SIMULATIONS_DIR}/simulations_NA_robustness.R" --struct modular --sampling rowcol
Rscript "${SIMULATIONS_DIR}/simulations_NA_robustness.R" --struct ${STRUCTA[$(($((SGE_TASK_ID - 1)) % 2))]} --sampling ${SAMPLINGA[$(($((SGE_TASK_ID - 1)) % 4))]}

View file

@ -255,7 +255,10 @@ result_list <- parallel::mclapply(seq_len(nrow(conditions)), function(current) {
Z[[1]][[2]],
mybisbmpop[["best_fit"]][["Z"]][[1]][[2]]
),
elapsed_secs = difftime(stop_time, start_time, units = "sec")
elapsed_secs = difftime(stop_time, start_time, units = "sec"),
sampling = sampling,
struct = struct
)
message("Finished step ", current, "/", nrow(conditions), "\n")