Changing analysis

This commit is contained in:
Louis Lacoste 2024-04-22 10:26:30 +02:00
parent 1e07a8d79d
commit c3b4144689
3 changed files with 183 additions and 96 deletions

View file

@ -1,96 +0,0 @@
library(ggplot2)
library(tidyverse)
library(latex2exp)
library(patchwork)
#  Loading data
data_folder <- file.path("code", "results", "simulations", "NA_robustness")
data <- readRDS(file.path(
data_folder,
"NA_robustness_19-04-2024_15-18-55_uniform_nested_1-200.Rds"
))
data[["model"]] <- factor(data[["model"]], levels = c("iid", "pi", "rho", "pirho"))
max_repetition <- max(data$repetition)
#  Averaging over repetitions
averaged_data <- data %>%
group_by(prop_NAs, model) %>%
select(-repetition) %>%
summarise_all(list(mean = mean, sd = sd))
#  Preparing auc_data
auc_data <- averaged_data %>%
select(c(prop_NAs, model) | contains("auc_")) %>%
rename_with(~ gsub("auc_", "", .x, fixed = TRUE)) %>%
filter(prop_NAs != 0)
auc_data_long <-
bind_cols(
auc_data %>% select(c("prop_NAs", "model") | contains("_mean")) %>%
pivot_longer(!c(prop_NAs, model),
names_to = c("method"),
values_to = "auc_mean"
),
auc_data %>% select(c("prop_NAs", "model") | contains("_sd")) %>%
pivot_longer(!c(prop_NAs, model),
names_to = NULL,
values_to = "auc_sd"
) %>% ungroup() %>% select(!c("prop_NAs", "model"))
) %>% mutate(method = method %>% gsub(
pattern = "_mean",
replacement = "", fixed = TRUE
))
# Preparing ARI data
ari_data <- averaged_data %>% select(c(prop_NAs, model) | contains("ari"))
ari_data_long <- bind_cols(
ari_data %>% select(c("prop_NAs", "model") | contains("_mean")) %>%
pivot_longer(!c(prop_NAs, model),
names_to = c("method"),
values_to = "ari_mean"
),
ari_data %>% select(c("prop_NAs", "model") | contains("_sd")) %>%
pivot_longer(!c(prop_NAs, model),
names_to = NULL,
values_to = "ari_sd"
) %>% ungroup() %>% select(!c("prop_NAs", "model"))
) %>%
separate(method, into = c("dim", "method", "useless"), sep = "_") %>%
select(-useless)
auc_plot <- ggplot(auc_data_long) +
aes(x = prop_NAs, y = auc_mean) +
geom_line(aes(color = method)) +
geom_point(aes(color = method)) +
geom_ribbon(aes(ymin = auc_mean - auc_sd, ymax = auc_mean + auc_sd, fill = method), alpha = 0.2) +
ylim(c(min(auc_data_long[["auc_mean"]]), max(auc_data_long[["auc_mean"]]))) +
scale_x_continuous(breaks = scales::pretty_breaks(n = 10L)) +
ylab(TeX("\\bar{AUC}")) +
xlab("NA proportion") +
labs(fill = "Method", color = "Method") +
ggtitle(TeX(paste(
"$\\bar{AUC}\\pm s_{AUC}$", ", function of NA proportion. N=", max_repetition
))) +
facet_grid(rows = vars(model), as.table = TRUE) +
theme_bw()
ari_plot <- ggplot(ari_data_long) +
aes(x = prop_NAs, y = ari_mean) +
geom_line(aes(color = method)) +
geom_point(aes(color = method)) +
geom_ribbon(aes(ymin = ari_mean - ari_sd, ymax = ari_mean + ari_sd, fill = method), alpha = 0.2) +
scale_x_continuous(breaks = scales::pretty_breaks(n = 10L)) +
ylab(TeX("$\\bar{ARI^d}$")) +
xlab("NA proportion") +
labs(fill = "Method", color = "Method") +
ggtitle(TeX(paste(
"$\\bar{ARI^d}\\pm s_{ARI^d}$", ", function of NA proportion. N=", max_repetition
))) +
facet_grid(rows = vars(model), cols = vars(dim)) +
theme_bw()
(auc_plot | ari_plot) + patchwork::plot_layout(guides = "collect")

View file

@ -0,0 +1,183 @@
---
title: "Analysis NA robustness"
output:
bookdown::html_document2:
toc: true
fig_caption: true
---
```{r knitropts, echo = FALSE}
knitr::opts_chunk$set(fig.width=12L, fig.height=10L)
```
```{r imports, echo = FALSE, include=FALSE}
suppressPackageStartupMessages(library(ggplot2))
suppressPackageStartupMessages(library(tidyverse))
suppressPackageStartupMessages(library(latex2exp))
suppressPackageStartupMessages(library(here))
loadNamespace(package = "patchwork")
```
```{r data, echo = FALSE}
#  Loading data
data_folder <- file.path(here(), "code", "results", "simulations", "NA_robustness")
files_list <- list.files(data_folder, pattern = "^NA_robustness_19-04-2024")
```
```{r df_func, echo = FALSE}
prepare_dataframes_auc_ari <- function(data) {
data[["model"]] <- factor(data[["model"]], levels = c("iid", "pi", "rho", "pirho"))
max_repetition <- max(data[["repetition"]])
sampling <- unique(data[["sampling"]])
struct <- unique(data[["struct"]])
#  Averaging over repetitions
averaged_data <- data %>%
group_by(prop_NAs, model) %>%
select(-c(repetition, sampling, struct)) %>%
summarise_all(list(mean = mean, sd = sd))
#  Preparing auc_data
auc_data <- averaged_data %>%
select(c(prop_NAs, model) | contains("auc_")) %>%
rename_with(~ gsub("auc_", "", .x, fixed = TRUE)) %>%
filter(prop_NAs != 0)
auc_data_long <-
bind_cols(
auc_data %>%
select(c("prop_NAs", "model") | contains("_mean")) %>%
pivot_longer(!c(prop_NAs, model),
names_to = c("method"),
values_to = "auc_mean"
),
auc_data %>%
select(c("prop_NAs", "model") | contains("_sd")) %>%
pivot_longer(!c(prop_NAs, model),
names_to = NULL,
values_to = "auc_sd"
) %>%
ungroup() %>%
select(!c("prop_NAs", "model"))
) %>%
mutate(method = method %>%
gsub(
pattern = "_mean",
replacement = "", fixed = TRUE
))
# Preparing ARI data
ari_data <- averaged_data %>% select(c(prop_NAs, model) | contains("ari"))
ari_data_long <- bind_cols(
ari_data %>%
select(c("prop_NAs", "model") | contains("_mean")) %>%
pivot_longer(!c(prop_NAs, model),
names_to = c("method"),
values_to = "ari_mean"
),
ari_data %>%
select(c("prop_NAs", "model") | contains("_sd")) %>%
pivot_longer(!c(prop_NAs, model),
names_to = NULL,
values_to = "ari_sd"
) %>%
ungroup() %>%
select(!c("prop_NAs", "model"))
) %>%
separate(method, into = c("dim", "method", "useless"), sep = "_") %>%
select(-useless)
return(list(
auc_data_long = auc_data_long, ari_data_long = ari_data_long,
auc_data = auc_data, ari_data = ari_data,
max_repetition = max_repetition,
sampling = sampling,
struct = struct
))
}
```
```{r plot-func, echo = FALSE}
plot_auc_ari_data <- function(df_list) {
auc_data_long <- df_list[["auc_data_long"]]
ari_data_long <- df_list[["ari_data_long"]]
max_repetition <- df_list[["max_repetition"]]
sampling <- df_list[["sampling"]]
struct <- df_list[["struct"]]
auc_plot <- ggplot(auc_data_long) +
aes(x = prop_NAs, y = auc_mean) +
geom_line(aes(color = method)) +
geom_point(aes(color = method)) +
geom_ribbon(aes(ymin = auc_mean - auc_sd, ymax = auc_mean + auc_sd, fill = method), alpha = 0.2) +
ylim(c(min(auc_data_long[["auc_mean"]]), max(auc_data_long[["auc_mean"]]))) +
scale_x_continuous(breaks = scales::pretty_breaks(n = 10L)) +
ylab(TeX("\\bar{AUC}")) +
xlab("NA proportion") +
labs(fill = "Method", color = "Method") +
ggtitle(TeX(paste(
"$\\bar{AUC}\\pm s_{AUC}$", ", function of NA proportion. N=", max_repetition
))) +
facet_grid(rows = vars(model), as.table = TRUE) +
theme_bw()
ari_plot <- ggplot(ari_data_long) +
aes(x = prop_NAs, y = ari_mean) +
geom_line(aes(color = method)) +
geom_point(aes(color = method)) +
geom_ribbon(aes(ymin = ari_mean - ari_sd, ymax = ari_mean + ari_sd, fill = method), alpha = 0.2) +
scale_x_continuous(breaks = scales::pretty_breaks(n = 10L)) +
ylab(TeX("$\\bar{ARI^d}$")) +
xlab("NA proportion") +
labs(fill = "Method", color = "Method") +
ggtitle(TeX(paste(
"$\\bar{ARI^d}\\pm s_{ARI^d}$", ", function of NA proportion. N=", max_repetition
))) +
facet_grid(rows = vars(model), cols = vars(dim)) +
theme_bw()
return((auc_plot | ari_plot) +
patchwork::plot_layout(guides = "collect") +
patchwork::plot_annotation(
title = "Metrics in function of NA proportions",
subtitle = paste0(
"With N = ", max_repetition, ", a ", struct,
" structure and a ", sampling, " sampling"
), tag_levels = "A"
))
}
```
```{r prep-fig-caps, echo = FALSE}
figcaps <- sapply(files_list, function(filename) {
data <- readRDS(file.path(
data_folder,
filename
))
max_repetition <- max(data[["repetition"]])
sampling <- unique(data[["sampling"]])
struct <- unique(data[["struct"]])
return(paste(struct, "structure with", sampling, "sampling and N =", max_repetition, "repetitions"))
})
```
```{r plot, echo = FALSE, fig.cap=paste("Graph of metrics for NA robustness with a ", figcaps)}
for (filename in files_list) {
data <- readRDS(file.path(
data_folder,
filename
))
df_list <- prepare_dataframes_auc_ari(data = data)
plot(plot_auc_ari_data(df_list = df_list))
}
# filename <- files_list[1]
# data <- readRDS(file.path(
# data_folder,
# filename
# ))
# print(here())
# df_list <- prepare_dataframes_auc_ari(data = data)
# knitr::knit_print(plot_auc_ari_data(df_list = df_list))
```