report : preparing for report
This commit is contained in:
parent
44bda5e683
commit
dfb0d71a82
4 changed files with 326 additions and 1 deletions
2
.gitignore
vendored
2
.gitignore
vendored
|
|
@ -10,3 +10,5 @@ analyze_inference_bernoulli_*/
|
|||
*.e[1-9]*
|
||||
*.pe[1-9]*
|
||||
*.po[1-9]*
|
||||
|
||||
/.luarc.json
|
||||
|
|
|
|||
162
code/for_report/inference_analyze.R
Normal file
162
code/for_report/inference_analyze.R
Normal file
|
|
@ -0,0 +1,162 @@
|
|||
## ----libraries, echo = FALSE, include = FALSE----------------------------------------------------------------------------------------------------------------------------------------
|
||||
library("ggplot2")
|
||||
library("ggokabeito")
|
||||
library("tidyr")
|
||||
library("dplyr")
|
||||
library("stringr")
|
||||
library("knitr")
|
||||
library("pander")
|
||||
library("patchwork")
|
||||
library("latex2exp")
|
||||
library("here")
|
||||
|
||||
|
||||
## ----setup, echo = FALSE-------------------------------------------------------------------------------------------------------------------------------------------------------------
|
||||
options(dplyr.summarise.inform = FALSE)
|
||||
knitr::opts_knit$set(kable.force.latex = TRUE)
|
||||
|
||||
meanse <- function(x, ...) {
|
||||
mean1 <- signif(round(mean(x, na.rm = T), 2), 5) # calculate mean and round
|
||||
se1 <- signif(round(sd(x, na.rm = T) / sqrt(sum(!is.na(x))), 2), 2) # std error - round adding zeros
|
||||
out <- paste(mean1, "$\\pm$", se1) # paste together mean plus/minus and standard error
|
||||
if (str_detect(out, "NA")) {
|
||||
out <- "NA"
|
||||
} # if missing do not add plusminus
|
||||
if (se1 == 0) {
|
||||
out <- paste(mean1)
|
||||
}
|
||||
return(out)
|
||||
}
|
||||
|
||||
|
||||
## ----import-data, echo = FALSE-------------------------------------------------------------------------------------------------------------------------------------------------------
|
||||
filenames <- list.files(
|
||||
path = "./data/",
|
||||
pattern = "inference_testing_2023-07-*",
|
||||
full.names = TRUE
|
||||
)
|
||||
data_list <- lapply(filenames, readRDS)
|
||||
col_id_BICLS <- c(11, 16, 23, 30, 37)
|
||||
result_data_frame <- dplyr::bind_rows(data_list)
|
||||
|
||||
# Compute the preferred model
|
||||
result_data_frame <- cbind(result_data_frame, preferred_model = sapply(seq_len(nrow(result_data_frame)), function(n) sub("_BICL", "", names(which.max(result_data_frame[n, col_id_BICLS])))))
|
||||
result_data_frame$preferred_model <- factor(result_data_frame$preferred_model, levels = c(
|
||||
"sep", "iid", "pi",
|
||||
"rho", "pirho"
|
||||
))
|
||||
|
||||
|
||||
## ----inference_table, echo = FALSE---------------------------------------------------------------------------------------------------------------------------------------------------
|
||||
averaged_data <- result_data_frame %>%
|
||||
group_by(epsilon_alpha) %>%
|
||||
summarise(across(-preferred_model, list("avrg" = meanse))) %>%
|
||||
select(-c(2:10))
|
||||
averaged_data <- averaged_data %>%
|
||||
select(which(!grepl("*_BICL_*", colnames(averaged_data)),
|
||||
arr.ind = TRUE
|
||||
))
|
||||
|
||||
|
||||
## ----function_per_model, echo = FALSE------------------------------------------------------------------------------------------------------------------------------------------------
|
||||
dataframe_per_model <- function(model) {
|
||||
averaged_data %>%
|
||||
select(epsilon_alpha, starts_with(paste0(model, "_")))
|
||||
}
|
||||
|
||||
|
||||
## ----per_model_table, echo = FALSE, results='asis', message=FALSE, warning = FALSE---------------------------------------------------------------------------------------------------
|
||||
for (model in c("sep", "iid", "pi", "rho", "pirho")) {
|
||||
kable_colnames <- c(
|
||||
"$\\eps[\\alpha]$", # "BIC-L",
|
||||
"$\\overline{\\text{ARI}}_{1}$",
|
||||
"$\\overline{\\text{ARI}}_{2}$", "$\\text{ARI}_{1}$", "$\\text{ARI}_{2}$"
|
||||
)
|
||||
model_name <- model
|
||||
if (model != "sep") {
|
||||
kable_colnames <- c(
|
||||
kable_colnames, "Recovered $Q_1$",
|
||||
"Recovered $Q_2$"
|
||||
)
|
||||
}
|
||||
if (model == "pirho") {
|
||||
model_name <- "$\\pi\\rho$"
|
||||
} else {
|
||||
if (model != "iid" && model != "sep") {
|
||||
model_name <- paste0("$\\", model, "$")
|
||||
} else {
|
||||
model_name <- paste0("$", model, "$")
|
||||
}
|
||||
}
|
||||
print(kable(dataframe_per_model(model),
|
||||
escape = FALSE,
|
||||
booktabs = TRUE,
|
||||
digits = 2,
|
||||
position = "!h",
|
||||
caption = paste0(
|
||||
"\\label{tab:per_model_", model,
|
||||
"}Quality metrics for ",
|
||||
ifelse(model != "sep", paste0(model_name, "$\\text{-}colBiSBM$"), "$sep\\text{-}BiSBM$")
|
||||
),
|
||||
col.names = kable_colnames
|
||||
))
|
||||
}
|
||||
|
||||
|
||||
## ----proportion-preferred_model, echo = FALSE----------------------------------------------------------------------------------------------------------------------------------------
|
||||
proportion_preferred_data <- result_data_frame %>%
|
||||
group_by(epsilon_alpha, preferred_model) %>%
|
||||
summarise(n = n()) %>%
|
||||
mutate(prop_model = n / sum(n)) %>%
|
||||
ungroup() %>%
|
||||
select(-n)
|
||||
|
||||
proportion_preferred_table <- proportion_preferred_data %>%
|
||||
pivot_wider(
|
||||
names_from = preferred_model,
|
||||
values_from = prop_model, values_fill = 0
|
||||
)
|
||||
|
||||
kable(proportion_preferred_table,
|
||||
escape = FALSE,
|
||||
booktabs = TRUE,
|
||||
digits = 2,
|
||||
position = "!h",
|
||||
caption = "\\label{tab:proportion-preferred-table}Proportions of models selected per \\eps[\\alpha] (data for Figure \\ref{fig:inference-proportion-preferred})",
|
||||
col.names = c(
|
||||
"\\eps[\\alpha]",
|
||||
"$sep\\text{-}BiSBM$",
|
||||
"$iid\\text{-}colBiSBM$",
|
||||
"$\\pi\\text{-}colBiSBM$",
|
||||
"$\\rho\\text{-}colBiSBM$",
|
||||
"$\\pi\\rho\\text{-}colBiSBM$"
|
||||
),
|
||||
align = "rccccc",
|
||||
format = "latex"
|
||||
)
|
||||
|
||||
|
||||
## ----proportion_preferred_figure, echo = FALSE---------------------------------------------------------------------------------------------------------------------------------------
|
||||
#| fig.cap="\\label{fig:inference-proportion-preferred}Plot of the proportions of different preferred models in function of \\eps[\\alpha]",
|
||||
#| fig.asp = 0.5,
|
||||
#| fig.pos = "H",
|
||||
#| fig.width = 7,
|
||||
#| fig.height = 4,
|
||||
#| dpi=300
|
||||
|
||||
plot <- proportion_preferred_data %>% ggplot() +
|
||||
aes(
|
||||
x = epsilon_alpha, y = prop_model, color = preferred_model,
|
||||
fill = preferred_model
|
||||
) +
|
||||
guides(
|
||||
fill = guide_legend(title = "Preferred Model"),
|
||||
color = guide_legend(title = "Preferred Model")
|
||||
) +
|
||||
scale_x_continuous(breaks = seq(from = 0.0, to = 0.24, by = 0.03)) +
|
||||
scale_color_okabe_ito() +
|
||||
scale_fill_okabe_ito() +
|
||||
xlab(TeX("$\\epsilon_{\\alpha}$")) +
|
||||
ylab("Model proportions") +
|
||||
geom_col(position = "stack")
|
||||
print(plot)
|
||||
161
code/for_report/model_selection_analyze.R
Normal file
161
code/for_report/model_selection_analyze.R
Normal file
|
|
@ -0,0 +1,161 @@
|
|||
## ----libraries, echo = FALSE, include = FALSE-----------------------------------------------------------------------------------------------------------------------
|
||||
require("ggplot2")
|
||||
require("ggokabeito")
|
||||
require("knitr")
|
||||
require("kableExtra")
|
||||
require("stringr")
|
||||
require("tidyr")
|
||||
require("dplyr")
|
||||
require("patchwork")
|
||||
require("latex2exp")
|
||||
|
||||
|
||||
## ----setup, echo = FALSE, include= FALSE----------------------------------------------------------------------------------------------------------------------------
|
||||
options(knitr.table.format = "latex")
|
||||
|
||||
meanse <- function(x, ...) {
|
||||
mean1 <- signif(round(mean(x, na.rm = T), 2), 5) # calculate mean and round
|
||||
se1 <- signif(round(sd(x, na.rm = T) / sqrt(sum(!is.na(x))), 2), 2) # std error - round adding zeros
|
||||
out <- paste(mean1, "$\\pm$", se1) # paste together mean plus/minus and standard error
|
||||
if (str_detect(out, "NA")) {
|
||||
out <- "NA"
|
||||
} # if missing do not add plusminus
|
||||
if (se1 == 0) {
|
||||
out <- paste(mean1)
|
||||
}
|
||||
return(out)
|
||||
}
|
||||
|
||||
|
||||
## ----import-data, echo = FALSE--------------------------------------------------------------------------------------------------------------------------------------
|
||||
filenames <- list.files(
|
||||
path = "./data/",
|
||||
pattern = "model_selection_check_batch_report_3_rep_*",
|
||||
full.names = TRUE
|
||||
)
|
||||
data_list <- lapply(filenames, readRDS)
|
||||
result_data_frame <- dplyr::bind_rows(data_list)
|
||||
|
||||
result_data_frame$preferred_model <- factor(result_data_frame$preferred_model, levels = c(
|
||||
"sep","iid","pi",
|
||||
"rho","pirho"
|
||||
))
|
||||
|
||||
# Adding a column accounting for true model iid, pi, rho or pirho
|
||||
# result_data_frame <- result_data_frame %>% mutate(true_model = if (all( c(epsilon_pi >0, epsilon_rho > 0) == c(TRUE, TRUE))) print("pirho") else if (all( c(epsilon_pi >0, epsilon_rho > 0) == c(TRUE, FALSE))) print("pi") else if (all( c(epsilon_pi >0, epsilon_rho > 0) == c(F, T))) print("rho") else print("iid"))
|
||||
|
||||
|
||||
|
||||
## ----compute-table, echo = FALSE, include = FALSE-------------------------------------------------------------------------------------------------------------------
|
||||
model_comparison_eps_pi_rho <- result_data_frame %>%
|
||||
group_by(epsilon_pi, epsilon_rho, preferred_model) %>%
|
||||
summarise(n = n()) %>%
|
||||
mutate(prop_model = n / sum(n))
|
||||
|
||||
model_comparison_eps_pi <- result_data_frame %>%
|
||||
group_by(epsilon_pi, preferred_model) %>%
|
||||
summarise(n = n(), rec_Q1 = mean(iid_Q1 + pi_Q1 + rho_Q1 + pirho_Q1)/4) %>%
|
||||
mutate(prop_model = n / sum(n))
|
||||
|
||||
model_comparison_eps_rho <- result_data_frame %>%
|
||||
group_by(epsilon_rho, preferred_model) %>%
|
||||
summarise(n = n(), rec_Q2 = mean(iid_Q2 + pi_Q2 + rho_Q2 + pirho_Q2)/4) %>%
|
||||
mutate(prop_model = n / sum(n))
|
||||
|
||||
|
||||
## ----epsilon_plot, echo = FALSE, include = FALSE--------------------------------------------------------------------------------------------------------------------
|
||||
#| fig.asp = 0.5,
|
||||
#| fig.pos = "H",
|
||||
#| fig.width = 7,
|
||||
#| fig.height = 4,
|
||||
#| dpi=300
|
||||
|
||||
plot_pi <- model_comparison_eps_pi %>% ggplot() +
|
||||
aes(
|
||||
x = epsilon_pi, y = prop_model, color = preferred_model,
|
||||
fill = preferred_model
|
||||
) +
|
||||
guides(fill = guide_legend(title = "Preferred Model"),
|
||||
color = guide_legend(title = "Preferred Model")) +
|
||||
xlab(TeX("$\\epsilon_{\\pi}$")) +
|
||||
ylab("Model proportions") +
|
||||
scale_color_okabe_ito() +
|
||||
scale_fill_okabe_ito() +
|
||||
geom_col(position = "stack")
|
||||
plot_rho <- model_comparison_eps_rho %>% ggplot() +
|
||||
aes(
|
||||
x = epsilon_rho, y = prop_model, color = preferred_model,
|
||||
fill = preferred_model
|
||||
) +
|
||||
guides(fill = guide_legend(title = "Preferred Model"),
|
||||
color = guide_legend(title = "Preferred Model")) +
|
||||
xlab(TeX("$\\epsilon_{\\rho}$")) +
|
||||
ylab("") +
|
||||
scale_color_okabe_ito() +
|
||||
scale_fill_okabe_ito() +
|
||||
geom_col(position = "stack")
|
||||
|
||||
ggsave("./img/plot_model_function_eps.png", plot_pi + plot_rho +
|
||||
plot_layout(guides = "collect"))
|
||||
|
||||
|
||||
## ----tables, echo = FALSE, results='asis'---------------------------------------------------------------------------------------------------------------------------
|
||||
kable(
|
||||
(model_comparison_eps_pi %>%
|
||||
select(-one_of("n")) %>%
|
||||
pivot_wider(
|
||||
names_from = preferred_model,
|
||||
values_from = prop_model,
|
||||
values_fill = 0
|
||||
) %>% group_by(epsilon_pi) %>%
|
||||
summarise(
|
||||
rec_Q1 = meanse(rec_Q1),
|
||||
iid = sum(iid),
|
||||
pi = sum(pi),
|
||||
rho = sum(rho),
|
||||
pirho = sum(pirho)
|
||||
))[, c(1, 3:6, 2)],
|
||||
digits = 2,
|
||||
col.names = c(
|
||||
"$\\eps[\\pi]$",
|
||||
"$iid\\text{-}colBiSBM$ ",
|
||||
"$\\pi\\text{-}colBiSBM$",
|
||||
"$\\rho\\text{-}colBiSBM$",
|
||||
"$\\pi\\rho\\text{-}colBiSBM$",
|
||||
"Recovered $Q_1$"
|
||||
), align = "lcccc",
|
||||
booktab = TRUE,
|
||||
position = "!h",
|
||||
escape = FALSE,
|
||||
caption = "\\label{tab:pi-model-sel}Model selection for varying $\\pi$ mixture parameters",
|
||||
format = "latex"
|
||||
) %>% kableExtra::add_header_above(c(" "=1,"Models"=4,"Blocks"=1))
|
||||
kable(
|
||||
(model_comparison_eps_rho %>%
|
||||
select(-one_of("n")) %>%
|
||||
pivot_wider(
|
||||
names_from = preferred_model,
|
||||
values_from = prop_model,
|
||||
values_fill = 0
|
||||
) %>% group_by(epsilon_rho) %>%
|
||||
summarise(rec_Q2 = meanse(rec_Q2),
|
||||
iid = sum(iid),
|
||||
pi = sum(pi),
|
||||
rho = sum(rho),
|
||||
pirho = sum(pirho)))[,c(1,3:6, 2)],
|
||||
digits = 2,
|
||||
col.names = c(
|
||||
"$\\eps[\\rho]$",
|
||||
"$iid\\text{-}colBiSBM$ ",
|
||||
"$\\pi\\text{-}colBiSBM$",
|
||||
"$\\rho\\text{-}colBiSBM$",
|
||||
"$\\pi\\rho\\text{-}colBiSBM$",
|
||||
"Recovered $Q_2$"
|
||||
), align = "lcccc",
|
||||
booktab = TRUE,
|
||||
position = "!h",
|
||||
escape = FALSE,
|
||||
caption = "\\label{tab:rho-model-sel}Model selection for varying $\\rho$ mixture parameters",
|
||||
format = "latex"
|
||||
) %>% kableExtra::add_header_above(c(" "=1,"Models"=4,"Blocks"=1))
|
||||
|
||||
|
|
@ -1 +1 @@
|
|||
Subproject commit 43485078a719b65f23009ecf5b2112cb8f29ff22
|
||||
Subproject commit e412e0015127b1361bb7cee42d4f84dc98121cce
|
||||
Loading…
Add table
Reference in a new issue