data : Adding BICL detail computation

2024-07-11 23:55:59 +02:00 · 2024-07-11 23:55:59 +02:00 · b775542a6a
commit b775542a6a
parent 5d130c3adf
3 changed files with 629 additions and 330 deletions
--- a/code/applications/dore/02_dore_analysis.html
+++ b/code/applications/dore/02_dore_analysis.html
--- a/code/applications/dore/02_dore_analysis.qmd
+++ b/code/applications/dore/02_dore_analysis.qmd
@ -66,6 +66,35 @@ knitr::kable(vec_bicl,
 )
 ```

+```{r}
+bicl_df <- do.call(
+    "rbind",
+    lapply(seq_along(list_clustering), function(idx) {
+        partition <- readRDS(list_clustering[[idx]])
+        unlisted_best_partition <- extract_best_partition(partition)
+        if (!is.list(unlisted_best_partition)) {
+            bicl_df <- get_details_bicl(unlisted_best_partition) %>%
+                mutate(clustering = names(list_clustering)[[idx]])
+        } else {
+            bicl_df <- do.call("rbind", lapply(
+                seq_len(length(unlisted_best_partition)),
+                function(idx) {
+                    get_details_bicl(unlisted_best_partition[[idx]]) %>%
+                        mutate(col_id = idx)
+                }
+            )) %>%
+                select(-col_id) %>%
+                summarize_all(sum, na.rm = TRUE) %>%
+                mutate(clustering = names(list_clustering)[[idx]])
+        }
+        bicl_df <- bicl_df %>% relocate(clustering)
+        bicl_df
+    })
+)
+knitr::kable(bicl_df, caption = "Détails des pénalités et du BIC-L", row.names = FALSE)
+
+```
+
 :::{.panel-tabset}

 ```{r write_tabs}
--- a/code/applications/utils.R
+++ b/code/applications/utils.R
@ -79,3 +79,64 @@ extract_clustering <- function(clustering) {
    }
    return(out)
 }
+
+compute_alpha_penalty <- function(model) {
+    N_M <- sum(model[["n"]][[1]] * model[["n"]][[2]])
+    if (model[["free_mixture_row"]] || model[["free_mixture_col"]]) {
+        return(sum(model$Calpha) * log(N_M))
+    } else {
+        return(model$Q[1] * model$Q[2] * log(N_M))
+    }
+}
+
+compute_pi_penalty <- function(model) {
+    if (model$free_mixture_row) {
+        Cpi <- model$Cpi[[1]]
+        pi1_penalty <- sum((colSums(Cpi) - 1) * log(model$n[[1]]))
+    } else {
+        # To account for the possibility of the other free_mixture we store a
+        # temporary support full of TRUE
+        Cpi <- matrix(TRUE, model$Q[1], model$M) # Cpi must be Q x M !
+        # If there is no free mixture on the cols
+        pi1_penalty <- (model$Q[1] - 1) * log(sum(model$n[[1]]))
+    }
+    return(pi1_penalty)
+}
+
+compute_rho_penalty <- function(model) {
+    if (model$free_mixture_col) {
+        Cpi <- model$Cpi[[2]]
+        pi2_penalty <- sum((colSums(Cpi) - 1) * log(model$n[[2]]))
+    } else {
+        # To account for the possibility of the other free_mixture we store a
+        # temporary support full of TRUE
+        Cpi <- matrix(TRUE, model$Q[2], model$M) # Cpi must be Q x M !
+        # If there is no free mixture on the cols
+        pi2_penalty <- (model$Q[2] - 1) * log(sum(model$n[[2]]))
+    }
+    return(pi2_penalty)
+}
+
+compute_S_penalty <- function(model, dim) {
+    if ((dim == 1 && model$free_mixture_row) || (dim == 2 && model$free_mixture_col)) {
+        log_p_Q <- -model$M * log(model$Q[dim]) -
+            sum(log(choose(
+                rep(model$Q[dim], model$M), colSums(model$Cpi[[dim]])
+            )))
+        return(-2 * log_p_Q)
+    } else {
+        return(0)
+    }
+}
+
+get_details_bicl <- function(model) {
+    data.frame(
+        vbound = model$compute_vbound(),
+        pen_pi = 0.5 * compute_pi_penalty(model),
+        pen_S_pi = 0.5 * compute_S_penalty(model, dim = 1),
+        pen_rho = 0.5 * compute_rho_penalty(model),
+        pen_S_rho = 0.5 * compute_S_penalty(model, dim = 2),
+        pen_alpha = 0.5 * compute_alpha_penalty(model),
+        package_bicl = model$compute_BICL()
+    ) |> dplyr::mutate(computed_bicl = vbound - sum(dplyr::across(pen_pi:pen_alpha)))
+}