From ae8560aebc57517e1a062e5a1223272ee1f48b2f Mon Sep 17 00:00:00 2001
From: Louis Lacoste <louis.lacoste@hotmail.fr>
Date: Wed, 26 Jun 2024 17:16:06 +0200
Subject: [PATCH] analysis : model_selection table and time

---
 code/analysis/simulations/model_selection.qmd | 101 ++++++++++++++++++
 code/analysis/simulations/parse-latex.lua     |  32 ++++++
 2 files changed, 133 insertions(+)
 create mode 100644 code/analysis/simulations/model_selection.qmd
 create mode 100644 code/analysis/simulations/parse-latex.lua

diff --git a/code/analysis/simulations/model_selection.qmd b/code/analysis/simulations/model_selection.qmd
new file mode 100644
index 0000000..7d37638
--- /dev/null
+++ b/code/analysis/simulations/model_selection.qmd
@@ -0,0 +1,101 @@
+---
+title: Model selection analysis
+author: Louis Lacoste
+filters:
+  - parse-latex.lua
+header-includes:
+  - \usepackage{booktabs}
+  - \usepackage{longtable}
+execute: 
+  warning: false
+  echo: false
+---
+```{r setup}
+options(xtable.comment=FALSE) 
+```
+```{r libraries}
+library(here)
+library(dplyr)
+library(tidyr)
+library(ggplot2)
+```
+
+```{r import data}
+data_folder <- here("code", "results", "simulations", "model_selection")
+file_list <- file.info(file.path(data_folder, list.files(data_folder)))
+
+# Tri par date de création
+file_list <- file_list %>% arrange(ctime)
+file_list[["filepath"]] <- rownames(file_list)
+
+data_file <- file_list[["filepath"]][1]
+
+data <- readRDS(data_file)
+
+data[["preferred_model"]] <- factor(data[["preferred_model"]], levels = c("sep", "iid", "pi", "rho", "pirho"))
+
+latex_ready_data <- data %>% select(epsilon_pi, epsilon_rho, preferred_model)
+names(latex_ready_data) <- c("$\\epsilon_{\\pi}$", "$\\epsilon_{\\rho}$", "preferred model")
+```
+
+```{r temps_calcul}
+time_data <- data %>%
+    group_by(epsilon_pi, epsilon_rho) %>%
+    select(-(pi2.1:preferred_model)) %>%
+    mutate(elapsed_secs = as.numeric(elapsed_secs))
+
+ggplot(time_data) +
+    aes(
+        x = as.factor(epsilon_rho),
+        y = elapsed_secs, color = as.factor(epsilon_pi)
+    ) +
+    geom_boxplot()
+
+ggplot(time_data) +
+    aes(
+        x = as.factor(epsilon_pi),
+        y = elapsed_secs, color = as.factor(epsilon_rho)
+    ) +
+    geom_boxplot()
+
+```
+
+Ainsi les temps de calculs ne semblent pas augmenter de beaucoup avec des 
+modèles aux proportions plus éloignées.
+
+```{r}
+bloc_recov_data <- data %>%
+    group_by(epsilon_pi, epsilon_rho) %>%
+    select(
+        -(pi2.1:rho2.3), -repetition, -elapsed_secs,
+        -contains("BICL"), -preferred_model
+    ) %>%
+    mutate_at(vars(contains("Q")), function(Q) as.numeric(Q == 3)) %>%
+    summarise_at(vars(contains("Q")), mean)
+preferred_model_data <- data %>%
+    group_by(epsilon_pi, epsilon_rho) %>%
+    select(
+        -(pi2.1:rho2.3), -repetition, -elapsed_secs,
+        -contains("BICL"), -contains("Q")
+    ) %>%
+    mutate(
+        sep_pref = preferred_model == "sep",
+        iid_pref = preferred_model == "iid",
+        pi_pref = preferred_model == "pi",
+        rho_pref = preferred_model == "rho",
+        pirho_pref = preferred_model == "pirho"
+    ) %>%
+    select(-preferred_model) %>%
+    summarise_at(vars(contains("pref")), list("prop" = mean))
+preferred_model_data_longer <- preferred_model_data %>% pivot_longer(cols = contains("pref"), names_to = "model", names_pattern = "(sep|iid|pirho|pi|rho)", values_to = "proportion")
+
+model_comparison_table <- ftable(prop.table(xtabs(~ `$\\epsilon_{\\pi}$` + `$\\epsilon_{\\rho}$` + `preferred model`, data = latex_ready_data), margin = c("$\\epsilon_{\\pi}$", "$\\epsilon_{\\rho}$")), digits = 2)
+```
+
+```{r}
+#| output: asis
+print(
+    xtable::xtableFtable(round(model_comparison_table, digits = 2),
+        method = "row.compact"
+    ))
+```
diff --git a/code/analysis/simulations/parse-latex.lua b/code/analysis/simulations/parse-latex.lua
new file mode 100644
index 0000000..9594efd
--- /dev/null
+++ b/code/analysis/simulations/parse-latex.lua
@@ -0,0 +1,32 @@
+--- parse-latex.lua – parse and replace raw LaTeX snippets
+---
+--- Copyright: © 2021–2022 Albert Krewinkel
+--- License: MIT – see LICENSE for details
+
+-- Makes sure users know if their pandoc version is too old for this
+-- filter.
+PANDOC_VERSION:must_be_at_least '2.9'
+
+-- Return an empty filter if the target format is LaTeX: the snippets will be
+-- passed through unchanged.
+if FORMAT:match 'latex' then
+    return {}
+end
+
+-- Parse and replace raw TeX blocks, leave all other raw blocks
+-- alone.
+function RawBlock(raw)
+    if raw.format:match 'tex' then
+        return pandoc.read(raw.text, 'latex').blocks
+    end
+end
+
+-- Parse and replace raw TeX inlines, leave other raw inline
+-- elements alone.
+function RawInline(raw)
+    if raw.format:match 'tex' then
+        return pandoc.utils.blocks_to_inlines(
+            pandoc.read(raw.text, 'latex').blocks
+        )
+    end
+end