From ae8560aebc57517e1a062e5a1223272ee1f48b2f Mon Sep 17 00:00:00 2001 From: Louis Lacoste Date: Wed, 26 Jun 2024 17:16:06 +0200 Subject: [PATCH] analysis : model_selection table and time --- code/analysis/simulations/model_selection.qmd | 101 ++++++++++++++++++ code/analysis/simulations/parse-latex.lua | 32 ++++++ 2 files changed, 133 insertions(+) create mode 100644 code/analysis/simulations/model_selection.qmd create mode 100644 code/analysis/simulations/parse-latex.lua diff --git a/code/analysis/simulations/model_selection.qmd b/code/analysis/simulations/model_selection.qmd new file mode 100644 index 0000000..7d37638 --- /dev/null +++ b/code/analysis/simulations/model_selection.qmd @@ -0,0 +1,101 @@ +--- +title: Model selection analysis +author: Louis Lacoste +filters: + - parse-latex.lua +header-includes: + - \usepackage{booktabs} + - \usepackage{longtable} +execute: + warning: false + echo: false +--- +```{r setup} +options(xtable.comment=FALSE) +``` +```{r libraries} +library(here) +library(dplyr) +library(tidyr) +library(ggplot2) +``` + +```{r import data} +data_folder <- here("code", "results", "simulations", "model_selection") +file_list <- file.info(file.path(data_folder, list.files(data_folder))) + +# Tri par date de création +file_list <- file_list %>% arrange(ctime) +file_list[["filepath"]] <- rownames(file_list) + +data_file <- file_list[["filepath"]][1] + +data <- readRDS(data_file) + +data[["preferred_model"]] <- factor(data[["preferred_model"]], levels = c("sep", "iid", "pi", "rho", "pirho")) + +latex_ready_data <- data %>% select(epsilon_pi, epsilon_rho, preferred_model) +names(latex_ready_data) <- c("$\\epsilon_{\\pi}$", "$\\epsilon_{\\rho}$", "preferred model") +``` + +```{r temps_calcul} +time_data <- data %>% + group_by(epsilon_pi, epsilon_rho) %>% + select(-(pi2.1:preferred_model)) %>% + mutate(elapsed_secs = as.numeric(elapsed_secs)) + +ggplot(time_data) + + aes( + x = as.factor(epsilon_rho), + y = elapsed_secs, color = as.factor(epsilon_pi) + ) + + geom_boxplot() + +ggplot(time_data) + + aes( + x = as.factor(epsilon_pi), + y = elapsed_secs, color = as.factor(epsilon_rho) + ) + + geom_boxplot() + +``` + +Ainsi les temps de calculs ne semblent pas augmenter de beaucoup avec des +modèles aux proportions plus éloignées. + +```{r} +bloc_recov_data <- data %>% + group_by(epsilon_pi, epsilon_rho) %>% + select( + -(pi2.1:rho2.3), -repetition, -elapsed_secs, + -contains("BICL"), -preferred_model + ) %>% + mutate_at(vars(contains("Q")), function(Q) as.numeric(Q == 3)) %>% + summarise_at(vars(contains("Q")), mean) +preferred_model_data <- data %>% + group_by(epsilon_pi, epsilon_rho) %>% + select( + -(pi2.1:rho2.3), -repetition, -elapsed_secs, + -contains("BICL"), -contains("Q") + ) %>% + mutate( + sep_pref = preferred_model == "sep", + iid_pref = preferred_model == "iid", + pi_pref = preferred_model == "pi", + rho_pref = preferred_model == "rho", + pirho_pref = preferred_model == "pirho" + ) %>% + select(-preferred_model) %>% + summarise_at(vars(contains("pref")), list("prop" = mean)) +preferred_model_data_longer <- preferred_model_data %>% pivot_longer(cols = contains("pref"), names_to = "model", names_pattern = "(sep|iid|pirho|pi|rho)", values_to = "proportion") + +model_comparison_table <- ftable(prop.table(xtabs(~ `$\\epsilon_{\\pi}$` + `$\\epsilon_{\\rho}$` + `preferred model`, data = latex_ready_data), margin = c("$\\epsilon_{\\pi}$", "$\\epsilon_{\\rho}$")), digits = 2) +``` + +```{r} +#| output: asis +print( + xtable::xtableFtable(round(model_comparison_table, digits = 2), + method = "row.compact" + )) +``` diff --git a/code/analysis/simulations/parse-latex.lua b/code/analysis/simulations/parse-latex.lua new file mode 100644 index 0000000..9594efd --- /dev/null +++ b/code/analysis/simulations/parse-latex.lua @@ -0,0 +1,32 @@ +--- parse-latex.lua – parse and replace raw LaTeX snippets +--- +--- Copyright: © 2021–2022 Albert Krewinkel +--- License: MIT – see LICENSE for details + +-- Makes sure users know if their pandoc version is too old for this +-- filter. +PANDOC_VERSION:must_be_at_least '2.9' + +-- Return an empty filter if the target format is LaTeX: the snippets will be +-- passed through unchanged. +if FORMAT:match 'latex' then + return {} +end + +-- Parse and replace raw TeX blocks, leave all other raw blocks +-- alone. +function RawBlock(raw) + if raw.format:match 'tex' then + return pandoc.read(raw.text, 'latex').blocks + end +end + +-- Parse and replace raw TeX inlines, leave other raw inline +-- elements alone. +function RawInline(raw) + if raw.format:match 'tex' then + return pandoc.utils.blocks_to_inlines( + pandoc.read(raw.text, 'latex').blocks + ) + end +end