Changing analysis
This commit is contained in:
parent
1e07a8d79d
commit
c3b4144689
3 changed files with 183 additions and 96 deletions
|
|
@ -1,96 +0,0 @@
|
|||
library(ggplot2)
|
||||
library(tidyverse)
|
||||
library(latex2exp)
|
||||
library(patchwork)
|
||||
|
||||
# Loading data
|
||||
data_folder <- file.path("code", "results", "simulations", "NA_robustness")
|
||||
|
||||
data <- readRDS(file.path(
|
||||
data_folder,
|
||||
"NA_robustness_19-04-2024_15-18-55_uniform_nested_1-200.Rds"
|
||||
))
|
||||
|
||||
data[["model"]] <- factor(data[["model"]], levels = c("iid", "pi", "rho", "pirho"))
|
||||
|
||||
max_repetition <- max(data$repetition)
|
||||
|
||||
# Averaging over repetitions
|
||||
averaged_data <- data %>%
|
||||
group_by(prop_NAs, model) %>%
|
||||
select(-repetition) %>%
|
||||
summarise_all(list(mean = mean, sd = sd))
|
||||
|
||||
# Preparing auc_data
|
||||
auc_data <- averaged_data %>%
|
||||
select(c(prop_NAs, model) | contains("auc_")) %>%
|
||||
rename_with(~ gsub("auc_", "", .x, fixed = TRUE)) %>%
|
||||
filter(prop_NAs != 0)
|
||||
|
||||
auc_data_long <-
|
||||
bind_cols(
|
||||
auc_data %>% select(c("prop_NAs", "model") | contains("_mean")) %>%
|
||||
pivot_longer(!c(prop_NAs, model),
|
||||
names_to = c("method"),
|
||||
values_to = "auc_mean"
|
||||
),
|
||||
auc_data %>% select(c("prop_NAs", "model") | contains("_sd")) %>%
|
||||
pivot_longer(!c(prop_NAs, model),
|
||||
names_to = NULL,
|
||||
values_to = "auc_sd"
|
||||
) %>% ungroup() %>% select(!c("prop_NAs", "model"))
|
||||
) %>% mutate(method = method %>% gsub(
|
||||
pattern = "_mean",
|
||||
replacement = "", fixed = TRUE
|
||||
))
|
||||
# Preparing ARI data
|
||||
ari_data <- averaged_data %>% select(c(prop_NAs, model) | contains("ari"))
|
||||
|
||||
ari_data_long <- bind_cols(
|
||||
ari_data %>% select(c("prop_NAs", "model") | contains("_mean")) %>%
|
||||
pivot_longer(!c(prop_NAs, model),
|
||||
names_to = c("method"),
|
||||
values_to = "ari_mean"
|
||||
),
|
||||
ari_data %>% select(c("prop_NAs", "model") | contains("_sd")) %>%
|
||||
pivot_longer(!c(prop_NAs, model),
|
||||
names_to = NULL,
|
||||
values_to = "ari_sd"
|
||||
) %>% ungroup() %>% select(!c("prop_NAs", "model"))
|
||||
) %>%
|
||||
separate(method, into = c("dim", "method", "useless"), sep = "_") %>%
|
||||
select(-useless)
|
||||
|
||||
auc_plot <- ggplot(auc_data_long) +
|
||||
aes(x = prop_NAs, y = auc_mean) +
|
||||
geom_line(aes(color = method)) +
|
||||
geom_point(aes(color = method)) +
|
||||
geom_ribbon(aes(ymin = auc_mean - auc_sd, ymax = auc_mean + auc_sd, fill = method), alpha = 0.2) +
|
||||
ylim(c(min(auc_data_long[["auc_mean"]]), max(auc_data_long[["auc_mean"]]))) +
|
||||
scale_x_continuous(breaks = scales::pretty_breaks(n = 10L)) +
|
||||
ylab(TeX("\\bar{AUC}")) +
|
||||
xlab("NA proportion") +
|
||||
labs(fill = "Method", color = "Method") +
|
||||
ggtitle(TeX(paste(
|
||||
"$\\bar{AUC}\\pm s_{AUC}$", ", function of NA proportion. N=", max_repetition
|
||||
))) +
|
||||
facet_grid(rows = vars(model), as.table = TRUE) +
|
||||
theme_bw()
|
||||
|
||||
|
||||
ari_plot <- ggplot(ari_data_long) +
|
||||
aes(x = prop_NAs, y = ari_mean) +
|
||||
geom_line(aes(color = method)) +
|
||||
geom_point(aes(color = method)) +
|
||||
geom_ribbon(aes(ymin = ari_mean - ari_sd, ymax = ari_mean + ari_sd, fill = method), alpha = 0.2) +
|
||||
scale_x_continuous(breaks = scales::pretty_breaks(n = 10L)) +
|
||||
ylab(TeX("$\\bar{ARI^d}$")) +
|
||||
xlab("NA proportion") +
|
||||
labs(fill = "Method", color = "Method") +
|
||||
ggtitle(TeX(paste(
|
||||
"$\\bar{ARI^d}\\pm s_{ARI^d}$", ", function of NA proportion. N=", max_repetition
|
||||
))) +
|
||||
facet_grid(rows = vars(model), cols = vars(dim)) +
|
||||
theme_bw()
|
||||
|
||||
(auc_plot | ari_plot) + patchwork::plot_layout(guides = "collect")
|
||||
183
code/analysis/analyze_NA_robustness.Rmd
Normal file
183
code/analysis/analyze_NA_robustness.Rmd
Normal file
|
|
@ -0,0 +1,183 @@
|
|||
---
|
||||
title: "Analysis NA robustness"
|
||||
output:
|
||||
bookdown::html_document2:
|
||||
toc: true
|
||||
fig_caption: true
|
||||
---
|
||||
|
||||
```{r knitropts, echo = FALSE}
|
||||
knitr::opts_chunk$set(fig.width=12L, fig.height=10L)
|
||||
```
|
||||
|
||||
```{r imports, echo = FALSE, include=FALSE}
|
||||
suppressPackageStartupMessages(library(ggplot2))
|
||||
suppressPackageStartupMessages(library(tidyverse))
|
||||
suppressPackageStartupMessages(library(latex2exp))
|
||||
suppressPackageStartupMessages(library(here))
|
||||
loadNamespace(package = "patchwork")
|
||||
```
|
||||
|
||||
```{r data, echo = FALSE}
|
||||
# Loading data
|
||||
data_folder <- file.path(here(), "code", "results", "simulations", "NA_robustness")
|
||||
files_list <- list.files(data_folder, pattern = "^NA_robustness_19-04-2024")
|
||||
```
|
||||
```{r df_func, echo = FALSE}
|
||||
prepare_dataframes_auc_ari <- function(data) {
|
||||
data[["model"]] <- factor(data[["model"]], levels = c("iid", "pi", "rho", "pirho"))
|
||||
|
||||
max_repetition <- max(data[["repetition"]])
|
||||
sampling <- unique(data[["sampling"]])
|
||||
struct <- unique(data[["struct"]])
|
||||
|
||||
# Averaging over repetitions
|
||||
averaged_data <- data %>%
|
||||
group_by(prop_NAs, model) %>%
|
||||
select(-c(repetition, sampling, struct)) %>%
|
||||
summarise_all(list(mean = mean, sd = sd))
|
||||
|
||||
# Preparing auc_data
|
||||
auc_data <- averaged_data %>%
|
||||
select(c(prop_NAs, model) | contains("auc_")) %>%
|
||||
rename_with(~ gsub("auc_", "", .x, fixed = TRUE)) %>%
|
||||
filter(prop_NAs != 0)
|
||||
|
||||
auc_data_long <-
|
||||
bind_cols(
|
||||
auc_data %>%
|
||||
select(c("prop_NAs", "model") | contains("_mean")) %>%
|
||||
pivot_longer(!c(prop_NAs, model),
|
||||
names_to = c("method"),
|
||||
values_to = "auc_mean"
|
||||
),
|
||||
auc_data %>%
|
||||
select(c("prop_NAs", "model") | contains("_sd")) %>%
|
||||
pivot_longer(!c(prop_NAs, model),
|
||||
names_to = NULL,
|
||||
values_to = "auc_sd"
|
||||
) %>%
|
||||
ungroup() %>%
|
||||
select(!c("prop_NAs", "model"))
|
||||
) %>%
|
||||
mutate(method = method %>%
|
||||
gsub(
|
||||
pattern = "_mean",
|
||||
replacement = "", fixed = TRUE
|
||||
))
|
||||
# Preparing ARI data
|
||||
ari_data <- averaged_data %>% select(c(prop_NAs, model) | contains("ari"))
|
||||
|
||||
ari_data_long <- bind_cols(
|
||||
ari_data %>%
|
||||
select(c("prop_NAs", "model") | contains("_mean")) %>%
|
||||
pivot_longer(!c(prop_NAs, model),
|
||||
names_to = c("method"),
|
||||
values_to = "ari_mean"
|
||||
),
|
||||
ari_data %>%
|
||||
select(c("prop_NAs", "model") | contains("_sd")) %>%
|
||||
pivot_longer(!c(prop_NAs, model),
|
||||
names_to = NULL,
|
||||
values_to = "ari_sd"
|
||||
) %>%
|
||||
ungroup() %>%
|
||||
select(!c("prop_NAs", "model"))
|
||||
) %>%
|
||||
separate(method, into = c("dim", "method", "useless"), sep = "_") %>%
|
||||
select(-useless)
|
||||
|
||||
return(list(
|
||||
auc_data_long = auc_data_long, ari_data_long = ari_data_long,
|
||||
auc_data = auc_data, ari_data = ari_data,
|
||||
max_repetition = max_repetition,
|
||||
sampling = sampling,
|
||||
struct = struct
|
||||
))
|
||||
}
|
||||
```
|
||||
```{r plot-func, echo = FALSE}
|
||||
plot_auc_ari_data <- function(df_list) {
|
||||
auc_data_long <- df_list[["auc_data_long"]]
|
||||
ari_data_long <- df_list[["ari_data_long"]]
|
||||
max_repetition <- df_list[["max_repetition"]]
|
||||
sampling <- df_list[["sampling"]]
|
||||
struct <- df_list[["struct"]]
|
||||
|
||||
auc_plot <- ggplot(auc_data_long) +
|
||||
aes(x = prop_NAs, y = auc_mean) +
|
||||
geom_line(aes(color = method)) +
|
||||
geom_point(aes(color = method)) +
|
||||
geom_ribbon(aes(ymin = auc_mean - auc_sd, ymax = auc_mean + auc_sd, fill = method), alpha = 0.2) +
|
||||
ylim(c(min(auc_data_long[["auc_mean"]]), max(auc_data_long[["auc_mean"]]))) +
|
||||
scale_x_continuous(breaks = scales::pretty_breaks(n = 10L)) +
|
||||
ylab(TeX("\\bar{AUC}")) +
|
||||
xlab("NA proportion") +
|
||||
labs(fill = "Method", color = "Method") +
|
||||
ggtitle(TeX(paste(
|
||||
"$\\bar{AUC}\\pm s_{AUC}$", ", function of NA proportion. N=", max_repetition
|
||||
))) +
|
||||
facet_grid(rows = vars(model), as.table = TRUE) +
|
||||
theme_bw()
|
||||
|
||||
|
||||
ari_plot <- ggplot(ari_data_long) +
|
||||
aes(x = prop_NAs, y = ari_mean) +
|
||||
geom_line(aes(color = method)) +
|
||||
geom_point(aes(color = method)) +
|
||||
geom_ribbon(aes(ymin = ari_mean - ari_sd, ymax = ari_mean + ari_sd, fill = method), alpha = 0.2) +
|
||||
scale_x_continuous(breaks = scales::pretty_breaks(n = 10L)) +
|
||||
ylab(TeX("$\\bar{ARI^d}$")) +
|
||||
xlab("NA proportion") +
|
||||
labs(fill = "Method", color = "Method") +
|
||||
ggtitle(TeX(paste(
|
||||
"$\\bar{ARI^d}\\pm s_{ARI^d}$", ", function of NA proportion. N=", max_repetition
|
||||
))) +
|
||||
facet_grid(rows = vars(model), cols = vars(dim)) +
|
||||
theme_bw()
|
||||
|
||||
return((auc_plot | ari_plot) +
|
||||
patchwork::plot_layout(guides = "collect") +
|
||||
patchwork::plot_annotation(
|
||||
title = "Metrics in function of NA proportions",
|
||||
subtitle = paste0(
|
||||
"With N = ", max_repetition, ", a ", struct,
|
||||
" structure and a ", sampling, " sampling"
|
||||
), tag_levels = "A"
|
||||
))
|
||||
}
|
||||
```
|
||||
|
||||
```{r prep-fig-caps, echo = FALSE}
|
||||
figcaps <- sapply(files_list, function(filename) {
|
||||
data <- readRDS(file.path(
|
||||
data_folder,
|
||||
filename
|
||||
))
|
||||
|
||||
max_repetition <- max(data[["repetition"]])
|
||||
sampling <- unique(data[["sampling"]])
|
||||
struct <- unique(data[["struct"]])
|
||||
|
||||
return(paste(struct, "structure with", sampling, "sampling and N =", max_repetition, "repetitions"))
|
||||
})
|
||||
```
|
||||
|
||||
```{r plot, echo = FALSE, fig.cap=paste("Graph of metrics for NA robustness with a ", figcaps)}
|
||||
for (filename in files_list) {
|
||||
data <- readRDS(file.path(
|
||||
data_folder,
|
||||
filename
|
||||
))
|
||||
df_list <- prepare_dataframes_auc_ari(data = data)
|
||||
plot(plot_auc_ari_data(df_list = df_list))
|
||||
}
|
||||
# filename <- files_list[1]
|
||||
# data <- readRDS(file.path(
|
||||
# data_folder,
|
||||
# filename
|
||||
# ))
|
||||
# print(here())
|
||||
# df_list <- prepare_dataframes_auc_ari(data = data)
|
||||
# knitr::knit_print(plot_auc_ari_data(df_list = df_list))
|
||||
```
|
||||
Binary file not shown.
Loading…
Add table
Reference in a new issue