diff --git a/code/analysis/analyze_NA_robustness.R b/code/analysis/analyze_NA_robustness.R deleted file mode 100644 index f01e7fb..0000000 --- a/code/analysis/analyze_NA_robustness.R +++ /dev/null @@ -1,96 +0,0 @@ -library(ggplot2) -library(tidyverse) -library(latex2exp) -library(patchwork) - -#  Loading data -data_folder <- file.path("code", "results", "simulations", "NA_robustness") - -data <- readRDS(file.path( - data_folder, - "NA_robustness_19-04-2024_15-18-55_uniform_nested_1-200.Rds" -)) - -data[["model"]] <- factor(data[["model"]], levels = c("iid", "pi", "rho", "pirho")) - -max_repetition <- max(data$repetition) - -#  Averaging over repetitions -averaged_data <- data %>% - group_by(prop_NAs, model) %>% - select(-repetition) %>% - summarise_all(list(mean = mean, sd = sd)) - -#  Preparing auc_data -auc_data <- averaged_data %>% - select(c(prop_NAs, model) | contains("auc_")) %>% - rename_with(~ gsub("auc_", "", .x, fixed = TRUE)) %>% - filter(prop_NAs != 0) - -auc_data_long <- - bind_cols( - auc_data %>% select(c("prop_NAs", "model") | contains("_mean")) %>% - pivot_longer(!c(prop_NAs, model), - names_to = c("method"), - values_to = "auc_mean" - ), - auc_data %>% select(c("prop_NAs", "model") | contains("_sd")) %>% - pivot_longer(!c(prop_NAs, model), - names_to = NULL, - values_to = "auc_sd" - ) %>% ungroup() %>% select(!c("prop_NAs", "model")) - ) %>% mutate(method = method %>% gsub( - pattern = "_mean", - replacement = "", fixed = TRUE - )) -# Preparing ARI data -ari_data <- averaged_data %>% select(c(prop_NAs, model) | contains("ari")) - -ari_data_long <- bind_cols( - ari_data %>% select(c("prop_NAs", "model") | contains("_mean")) %>% - pivot_longer(!c(prop_NAs, model), - names_to = c("method"), - values_to = "ari_mean" - ), - ari_data %>% select(c("prop_NAs", "model") | contains("_sd")) %>% - pivot_longer(!c(prop_NAs, model), - names_to = NULL, - values_to = "ari_sd" - ) %>% ungroup() %>% select(!c("prop_NAs", "model")) -) %>% - separate(method, into = c("dim", "method", "useless"), sep = "_") %>% - select(-useless) - -auc_plot <- ggplot(auc_data_long) + - aes(x = prop_NAs, y = auc_mean) + - geom_line(aes(color = method)) + - geom_point(aes(color = method)) + - geom_ribbon(aes(ymin = auc_mean - auc_sd, ymax = auc_mean + auc_sd, fill = method), alpha = 0.2) + - ylim(c(min(auc_data_long[["auc_mean"]]), max(auc_data_long[["auc_mean"]]))) + - scale_x_continuous(breaks = scales::pretty_breaks(n = 10L)) + - ylab(TeX("\\bar{AUC}")) + - xlab("NA proportion") + - labs(fill = "Method", color = "Method") + - ggtitle(TeX(paste( - "$\\bar{AUC}\\pm s_{AUC}$", ", function of NA proportion. N=", max_repetition - ))) + - facet_grid(rows = vars(model), as.table = TRUE) + - theme_bw() - - -ari_plot <- ggplot(ari_data_long) + - aes(x = prop_NAs, y = ari_mean) + - geom_line(aes(color = method)) + - geom_point(aes(color = method)) + - geom_ribbon(aes(ymin = ari_mean - ari_sd, ymax = ari_mean + ari_sd, fill = method), alpha = 0.2) + - scale_x_continuous(breaks = scales::pretty_breaks(n = 10L)) + - ylab(TeX("$\\bar{ARI^d}$")) + - xlab("NA proportion") + - labs(fill = "Method", color = "Method") + - ggtitle(TeX(paste( - "$\\bar{ARI^d}\\pm s_{ARI^d}$", ", function of NA proportion. N=", max_repetition - ))) + - facet_grid(rows = vars(model), cols = vars(dim)) + - theme_bw() - -(auc_plot | ari_plot) + patchwork::plot_layout(guides = "collect") diff --git a/code/analysis/analyze_NA_robustness.Rmd b/code/analysis/analyze_NA_robustness.Rmd new file mode 100644 index 0000000..cc40a60 --- /dev/null +++ b/code/analysis/analyze_NA_robustness.Rmd @@ -0,0 +1,183 @@ +--- +title: "Analysis NA robustness" +output: + bookdown::html_document2: + toc: true + fig_caption: true +--- + +```{r knitropts, echo = FALSE} +knitr::opts_chunk$set(fig.width=12L, fig.height=10L) +``` + +```{r imports, echo = FALSE, include=FALSE} +suppressPackageStartupMessages(library(ggplot2)) +suppressPackageStartupMessages(library(tidyverse)) +suppressPackageStartupMessages(library(latex2exp)) +suppressPackageStartupMessages(library(here)) +loadNamespace(package = "patchwork") +``` + +```{r data, echo = FALSE} +#  Loading data +data_folder <- file.path(here(), "code", "results", "simulations", "NA_robustness") +files_list <- list.files(data_folder, pattern = "^NA_robustness_19-04-2024") +``` +```{r df_func, echo = FALSE} +prepare_dataframes_auc_ari <- function(data) { + data[["model"]] <- factor(data[["model"]], levels = c("iid", "pi", "rho", "pirho")) + + max_repetition <- max(data[["repetition"]]) + sampling <- unique(data[["sampling"]]) + struct <- unique(data[["struct"]]) + + #  Averaging over repetitions + averaged_data <- data %>% + group_by(prop_NAs, model) %>% + select(-c(repetition, sampling, struct)) %>% + summarise_all(list(mean = mean, sd = sd)) + + #  Preparing auc_data + auc_data <- averaged_data %>% + select(c(prop_NAs, model) | contains("auc_")) %>% + rename_with(~ gsub("auc_", "", .x, fixed = TRUE)) %>% + filter(prop_NAs != 0) + + auc_data_long <- + bind_cols( + auc_data %>% + select(c("prop_NAs", "model") | contains("_mean")) %>% + pivot_longer(!c(prop_NAs, model), + names_to = c("method"), + values_to = "auc_mean" + ), + auc_data %>% + select(c("prop_NAs", "model") | contains("_sd")) %>% + pivot_longer(!c(prop_NAs, model), + names_to = NULL, + values_to = "auc_sd" + ) %>% + ungroup() %>% + select(!c("prop_NAs", "model")) + ) %>% + mutate(method = method %>% + gsub( + pattern = "_mean", + replacement = "", fixed = TRUE + )) + # Preparing ARI data + ari_data <- averaged_data %>% select(c(prop_NAs, model) | contains("ari")) + + ari_data_long <- bind_cols( + ari_data %>% + select(c("prop_NAs", "model") | contains("_mean")) %>% + pivot_longer(!c(prop_NAs, model), + names_to = c("method"), + values_to = "ari_mean" + ), + ari_data %>% + select(c("prop_NAs", "model") | contains("_sd")) %>% + pivot_longer(!c(prop_NAs, model), + names_to = NULL, + values_to = "ari_sd" + ) %>% + ungroup() %>% + select(!c("prop_NAs", "model")) + ) %>% + separate(method, into = c("dim", "method", "useless"), sep = "_") %>% + select(-useless) + + return(list( + auc_data_long = auc_data_long, ari_data_long = ari_data_long, + auc_data = auc_data, ari_data = ari_data, + max_repetition = max_repetition, + sampling = sampling, + struct = struct + )) +} +``` +```{r plot-func, echo = FALSE} +plot_auc_ari_data <- function(df_list) { + auc_data_long <- df_list[["auc_data_long"]] + ari_data_long <- df_list[["ari_data_long"]] + max_repetition <- df_list[["max_repetition"]] + sampling <- df_list[["sampling"]] + struct <- df_list[["struct"]] + + auc_plot <- ggplot(auc_data_long) + + aes(x = prop_NAs, y = auc_mean) + + geom_line(aes(color = method)) + + geom_point(aes(color = method)) + + geom_ribbon(aes(ymin = auc_mean - auc_sd, ymax = auc_mean + auc_sd, fill = method), alpha = 0.2) + + ylim(c(min(auc_data_long[["auc_mean"]]), max(auc_data_long[["auc_mean"]]))) + + scale_x_continuous(breaks = scales::pretty_breaks(n = 10L)) + + ylab(TeX("\\bar{AUC}")) + + xlab("NA proportion") + + labs(fill = "Method", color = "Method") + + ggtitle(TeX(paste( + "$\\bar{AUC}\\pm s_{AUC}$", ", function of NA proportion. N=", max_repetition + ))) + + facet_grid(rows = vars(model), as.table = TRUE) + + theme_bw() + + + ari_plot <- ggplot(ari_data_long) + + aes(x = prop_NAs, y = ari_mean) + + geom_line(aes(color = method)) + + geom_point(aes(color = method)) + + geom_ribbon(aes(ymin = ari_mean - ari_sd, ymax = ari_mean + ari_sd, fill = method), alpha = 0.2) + + scale_x_continuous(breaks = scales::pretty_breaks(n = 10L)) + + ylab(TeX("$\\bar{ARI^d}$")) + + xlab("NA proportion") + + labs(fill = "Method", color = "Method") + + ggtitle(TeX(paste( + "$\\bar{ARI^d}\\pm s_{ARI^d}$", ", function of NA proportion. N=", max_repetition + ))) + + facet_grid(rows = vars(model), cols = vars(dim)) + + theme_bw() + + return((auc_plot | ari_plot) + + patchwork::plot_layout(guides = "collect") + + patchwork::plot_annotation( + title = "Metrics in function of NA proportions", + subtitle = paste0( + "With N = ", max_repetition, ", a ", struct, + " structure and a ", sampling, " sampling" + ), tag_levels = "A" + )) +} +``` + +```{r prep-fig-caps, echo = FALSE} +figcaps <- sapply(files_list, function(filename) { + data <- readRDS(file.path( + data_folder, + filename + )) + + max_repetition <- max(data[["repetition"]]) + sampling <- unique(data[["sampling"]]) + struct <- unique(data[["struct"]]) + + return(paste(struct, "structure with", sampling, "sampling and N =", max_repetition, "repetitions")) +}) +``` + +```{r plot, echo = FALSE, fig.cap=paste("Graph of metrics for NA robustness with a ", figcaps)} +for (filename in files_list) { + data <- readRDS(file.path( + data_folder, + filename + )) + df_list <- prepare_dataframes_auc_ari(data = data) + plot(plot_auc_ari_data(df_list = df_list)) +} +# filename <- files_list[1] +# data <- readRDS(file.path( +# data_folder, +# filename +# )) +# print(here()) +# df_list <- prepare_dataframes_auc_ari(data = data) +# knitr::knit_print(plot_auc_ari_data(df_list = df_list)) +``` \ No newline at end of file diff --git a/code/results/simulations/NA_robustness/NA_robustness_19-04-2024_15-18-55_uniform_nested_1-200.Rds b/code/results/simulations/NA_robustness/NA_robustness_19-04-2024_15-18-55_uniform_nested_1-200.Rds deleted file mode 100644 index 77041c7..0000000 Binary files a/code/results/simulations/NA_robustness/NA_robustness_19-04-2024_15-18-55_uniform_nested_1-200.Rds and /dev/null differ