report : script to generate inference figure for report

This commit is contained in:
Louis Lacoste 2024-07-11 23:55:12 +02:00
parent 81199691e8
commit 5d130c3adf

View file

@ -5,16 +5,11 @@ library("tidyr")
library("dplyr")
library("stringr")
library("knitr")
library("pander")
library("patchwork")
library("latex2exp")
library("stringr")
library("here")
library("tikzDevice")
## ----setup, echo = FALSE-------------------------------------------------------------------------------------------------------------------------------------------------------------
options(dplyr.summarise.inform = FALSE)
knitr::opts_knit$set(kable.force.latex = TRUE)
meanse <- function(x, ...) {
mean1 <- signif(round(mean(x, na.rm = T), 2), 5) # calculate mean and round
se1 <- signif(round(sd(x, na.rm = T) / sqrt(sum(!is.na(x))), 2), 2) # std error - round adding zeros
@ -31,21 +26,36 @@ meanse <- function(x, ...) {
## ----import-data, echo = FALSE-------------------------------------------------------------------------------------------------------------------------------------------------------
filenames <- list.files(
path = "./data/",
pattern = "inference_testing_2023-07-*",
path = here("code", "results", "simulations", "inference", "bernoulli"),
# pattern = "",
full.names = TRUE
)
data_list <- lapply(filenames, readRDS)
filenames <- filenames[order(file.info(filenames)$ctime, decreasing = TRUE)]
col_id_BICLS <- c(11, 16, 23, 30, 37)
result_data_frame <- dplyr::bind_rows(data_list)
result_data_frame <- readRDS(filenames[1])
# Compute the preferred model
result_data_frame <- cbind(result_data_frame, preferred_model = sapply(seq_len(nrow(result_data_frame)), function(n) sub("_BICL", "", names(which.max(result_data_frame[n, col_id_BICLS])))))
result_data_frame$preferred_model <- factor(result_data_frame$preferred_model, levels = c(
"sep", "iid", "pi",
"rho", "pirho"
))
result_data_frame <- result_data_frame %>%
mutate(
iid_Q1 = as.integer(iid_Q1 == 4L),
iid_Q2 = as.integer(iid_Q2 == 4L),
pi_Q1 = as.integer(pi_Q1 == 4L),
pi_Q2 = as.integer(pi_Q2 == 4L),
rho_Q1 = as.integer(rho_Q1 == 4L),
rho_Q2 = as.integer(rho_Q2 == 4L),
pirho_Q1 = as.integer(pirho_Q1 == 4L),
pirho_Q2 = as.integer(pirho_Q2 == 4L)
)
## ----inference_table, echo = FALSE---------------------------------------------------------------------------------------------------------------------------------------------------
averaged_data <- result_data_frame %>%
@ -57,15 +67,19 @@ averaged_data <- averaged_data %>%
arr.ind = TRUE
))
## ----function_per_model, echo = FALSE------------------------------------------------------------------------------------------------------------------------------------------------
dataframe_per_model <- function(model) {
averaged_data %>%
select(epsilon_alpha, starts_with(paste0(model, "_")))
}
## ----per_model_table, echo = FALSE, results='asis', message=FALSE, warning = FALSE---------------------------------------------------------------------------------------------------
dir.create(here(
"mia-rapport-2024", "tables", "simulations",
"inference"
), recursive = TRUE)
for (model in c("sep", "iid", "pi", "rho", "pirho")) {
kable_colnames <- c(
"$\\eps[\\alpha]$", # "BIC-L",
@ -75,8 +89,8 @@ for (model in c("sep", "iid", "pi", "rho", "pirho")) {
model_name <- model
if (model != "sep") {
kable_colnames <- c(
kable_colnames, "Recovered $Q_1$",
"Recovered $Q_2$"
kable_colnames, "$\\mathbbb{1}_{\\widehat{Q_1}=Q_1}$",
"$\\mathbbb{1}_{\\widehat{Q_2}=Q_2}$"
)
}
if (model == "pirho") {
@ -88,7 +102,7 @@ for (model in c("sep", "iid", "pi", "rho", "pirho")) {
model_name <- paste0("$", model, "$")
}
}
print(kable(dataframe_per_model(model),
cat(kable(dataframe_per_model(model),
escape = FALSE,
booktabs = TRUE,
digits = 2,
@ -98,7 +112,11 @@ for (model in c("sep", "iid", "pi", "rho", "pirho")) {
"}Quality metrics for ",
ifelse(model != "sep", paste0(model_name, "$\\text{-}colBiSBM$"), "$sep\\text{-}BiSBM$")
),
col.names = kable_colnames
col.names = kable_colnames,
format = "latex"
), file = here(
"mia-rapport-2024", "tables", "simulations",
"inference", paste0(model, ".tex")
))
}
@ -117,7 +135,7 @@ proportion_preferred_table <- proportion_preferred_data %>%
values_from = prop_model, values_fill = 0
)
kable(proportion_preferred_table,
cat(kable(proportion_preferred_table,
escape = FALSE,
booktabs = TRUE,
digits = 2,
@ -133,7 +151,10 @@ kable(proportion_preferred_table,
),
align = "rccccc",
format = "latex"
)
), file = here(
"mia-rapport-2024", "tables", "simulations",
"inference", "preferred.tex"
))
## ----proportion_preferred_figure, echo = FALSE---------------------------------------------------------------------------------------------------------------------------------------
@ -143,20 +164,35 @@ kable(proportion_preferred_table,
#| fig.width = 7,
#| fig.height = 4,
#| dpi=300
output_tikz_folder <- here("mia-rapport-2024", "tikz", "simulations", "inference")
if (!dir.exists(output_tikz_folder)) {
dir.create(output_tikz_folder, recursive = TRUE)
}
plot <- proportion_preferred_data %>% ggplot() +
tikz(
file = file.path(output_tikz_folder, "model-proportions.tex"), width = 4L,
height = 4L
)
levels(proportion_preferred_data$preferred_model) <- c(
"sep", "iid", "$\\pi$", "$\\rho$",
"$\\pi\\rho$"
)
plot <- proportion_preferred_data %>%
ggplot() +
aes(
x = epsilon_alpha, y = prop_model, color = preferred_model,
fill = preferred_model
) +
guides(
fill = guide_legend(title = "Preferred Model"),
color = guide_legend(title = "Preferred Model")
fill = guide_legend(title = "Preferred\nModel"),
color = guide_legend(title = "Preferred\nModel")
) +
scale_x_continuous(breaks = seq(from = 0.0, to = 0.24, by = 0.03)) +
scale_color_okabe_ito() +
scale_fill_okabe_ito() +
xlab(TeX("$\\epsilon_{\\alpha}$")) +
ylab("Model proportions") +
xlab("$\\epsilon_{\\alpha}$") +
ylab("Preferred model proportions") +
theme(aspect.ratio = 1L, axis.text.x = element_text(angle = -45, vjust = .5, hjust = 1)) +
geom_col(position = "stack")
print(plot)
dev.off()