data : Adding BICL detail computation

This commit is contained in:
Louis Lacoste 2024-07-11 23:55:59 +02:00
parent 5d130c3adf
commit b775542a6a
3 changed files with 629 additions and 330 deletions

File diff suppressed because one or more lines are too long

View file

@ -66,6 +66,35 @@ knitr::kable(vec_bicl,
)
```
```{r}
bicl_df <- do.call(
"rbind",
lapply(seq_along(list_clustering), function(idx) {
partition <- readRDS(list_clustering[[idx]])
unlisted_best_partition <- extract_best_partition(partition)
if (!is.list(unlisted_best_partition)) {
bicl_df <- get_details_bicl(unlisted_best_partition) %>%
mutate(clustering = names(list_clustering)[[idx]])
} else {
bicl_df <- do.call("rbind", lapply(
seq_len(length(unlisted_best_partition)),
function(idx) {
get_details_bicl(unlisted_best_partition[[idx]]) %>%
mutate(col_id = idx)
}
)) %>%
select(-col_id) %>%
summarize_all(sum, na.rm = TRUE) %>%
mutate(clustering = names(list_clustering)[[idx]])
}
bicl_df <- bicl_df %>% relocate(clustering)
bicl_df
})
)
knitr::kable(bicl_df, caption = "Détails des pénalités et du BIC-L", row.names = FALSE)
```
:::{.panel-tabset}
```{r write_tabs}

View file

@ -79,3 +79,64 @@ extract_clustering <- function(clustering) {
}
return(out)
}
compute_alpha_penalty <- function(model) {
N_M <- sum(model[["n"]][[1]] * model[["n"]][[2]])
if (model[["free_mixture_row"]] || model[["free_mixture_col"]]) {
return(sum(model$Calpha) * log(N_M))
} else {
return(model$Q[1] * model$Q[2] * log(N_M))
}
}
compute_pi_penalty <- function(model) {
if (model$free_mixture_row) {
Cpi <- model$Cpi[[1]]
pi1_penalty <- sum((colSums(Cpi) - 1) * log(model$n[[1]]))
} else {
# To account for the possibility of the other free_mixture we store a
# temporary support full of TRUE
Cpi <- matrix(TRUE, model$Q[1], model$M) # Cpi must be Q x M !
# If there is no free mixture on the cols
pi1_penalty <- (model$Q[1] - 1) * log(sum(model$n[[1]]))
}
return(pi1_penalty)
}
compute_rho_penalty <- function(model) {
if (model$free_mixture_col) {
Cpi <- model$Cpi[[2]]
pi2_penalty <- sum((colSums(Cpi) - 1) * log(model$n[[2]]))
} else {
# To account for the possibility of the other free_mixture we store a
# temporary support full of TRUE
Cpi <- matrix(TRUE, model$Q[2], model$M) # Cpi must be Q x M !
# If there is no free mixture on the cols
pi2_penalty <- (model$Q[2] - 1) * log(sum(model$n[[2]]))
}
return(pi2_penalty)
}
compute_S_penalty <- function(model, dim) {
if ((dim == 1 && model$free_mixture_row) || (dim == 2 && model$free_mixture_col)) {
log_p_Q <- -model$M * log(model$Q[dim]) -
sum(log(choose(
rep(model$Q[dim], model$M), colSums(model$Cpi[[dim]])
)))
return(-2 * log_p_Q)
} else {
return(0)
}
}
get_details_bicl <- function(model) {
data.frame(
vbound = model$compute_vbound(),
pen_pi = 0.5 * compute_pi_penalty(model),
pen_S_pi = 0.5 * compute_S_penalty(model, dim = 1),
pen_rho = 0.5 * compute_rho_penalty(model),
pen_S_rho = 0.5 * compute_S_penalty(model, dim = 2),
pen_alpha = 0.5 * compute_alpha_penalty(model),
package_bicl = model$compute_BICL()
) |> dplyr::mutate(computed_bicl = vbound - sum(dplyr::across(pen_pi:pen_alpha)))
}