Divers tests

This commit is contained in:
Louis Lacoste 2024-05-07 16:40:47 +02:00
parent 9e9b97bcf6
commit 993f4ef7e4
5 changed files with 5380 additions and 6 deletions

View file

@ -0,0 +1,47 @@
library(colSBM)
library(microbenchmark)
library(here)
set.seed(1234, "L'Ecuyer-CMRG")
data(dorebipartite)
str(dorebipartite)
nb_cores = parallelly::availableCores()
concurrent_models <- min(length(seq(1,4)), nb_cores%/%2)
per_model <- (nb_cores - concurrent_models) %/% length(seq(1,4))
mb1 <- microbenchmark(
"fancy computation" = {
parallel::mclapply(seq(1, 4), function(idx) {
message("Start ", idx)
estimate_colBiSBM(
netlist = dorebipartite[1:4],
colsbm_model = "iid",
global_opts = list(nb_cores = per_model, verbosity = 0L)
)
message("End ", idx)
},
mc.cores = concurrent_models
)
},
times = 5L
)
mb2 <- microbenchmark(
"Attributing all" = {
parallel::mclapply(seq(1, 4), function(idx) {
message("Start ", idx)
estimate_colBiSBM(
netlist = dorebipartite[1:4],
colsbm_model = "iid",
global_opts = list(nb_cores = parallelly::availableCores(omit = 1), verbosity = 1L)
)
message("End ", idx)
},
mc.cores = parallelly::availableCores(omit = 1)
)
},
times = 5L
)
# Bilan : plus intéressant de bourriner

View file

@ -1,9 +1,179 @@
```{r, echo = FALSE}
data <- readRDS("code/results//simulations/inference//bernoulli/bernoulli_inference_18-04-2024_09-41-45_1-972.Rds")
prob_data <- data[which((data$pirho_double_row_ARI < 1 | data$pirho_double_col_ARI < 1)&(data$pirho_mean_row_ARI == 1 & data$pirho_mean_col_ARI == 1)),]
```{r libraries, echo = FALSE}
library(here)
library(dplyr)
library(colSBM)
library(aricode)
library(patchwork)
library(ggplot2)
```
```{r , echo = FALSE}
knitr::kable(prob_data)
```{r plot-alpha, echo = FALSE}
plot_alpha <- function(alpha) {
p_alpha <- alpha[drop = FALSE] %>%
t() %>%
reshape2::melt() %>%
ggplot2::ggplot(ggplot2::aes(x = Var1, y = Var2, fill = value)) +
ggplot2::geom_tile() +
ggplot2::geom_text(ggplot2::aes(label = round(value, 2)), color = "black") +
ggplot2::scale_fill_gradient2("alpha",
low = "white",
high = "red",
limits = c(
0,
1
)
) +
ggplot2::geom_hline(yintercept = seq(nrow(alpha)) + .5) +
ggplot2::geom_vline(xintercept = seq(ncol(alpha)) + .5) +
ggplot2::scale_y_reverse() +
ggplot2::theme_bw(base_size = 15, base_rect_size = 1, base_line_size = 1) +
ggplot2::xlab("") +
ggplot2::ylab("") +
ggplot2::coord_fixed(expand = FALSE)
return(p_alpha)
}
```
```{r, echo = FALSE}
data_folder <- file.path(
here(), "code", "results",
"investigating", "selection_error", "incorrect24-04-2024_17-18-42"
)
file_list <- list.files(data_folder)
file <- file_list[[1]]
data <- readRDS(file.path(data_folder, file))
#  Extracting data to netlist and Z
netlist_memb <- data[["netlist"]]
netlist <- lapply(netlist_memb, function(full) full[["incidence_matrix"]])
nr <- nrow(netlist[[1]])
nc <- ncol(netlist[[1]])
row_blockmemberships <- lapply(netlist_memb, function(full) full[["row_blockmemberships"]])
col_blockmemberships <- lapply(netlist_memb, function(full) full[["col_blockmemberships"]])
joined_row_memberships <- unlist(row_blockmemberships)
joined_col_memberships <- unlist(col_blockmemberships)
row_taus <- lapply(row_blockmemberships, function(Z1_m) colSBM:::.one_hot(Z1_m, 4))
col_taus <- lapply(col_blockmemberships, function(Z2_m) colSBM:::.one_hot(Z2_m, 4))
```
```{r parameters, echo = FALSE}
set.seed(1234, "L'Ecuyer-CMRG")
fitted_bisbmpop_pirho <- estimate_colBiSBM(
netlist = netlist,
colsbm_model = "pirho",
nb_run = 3L,
distribution = "bernoulli",
global_opts = list(
verbosity = 3L,
plot_details = 0,
nb_cores = parallelly::availableCores(omit = 1)
)
)
```
```{r, echo = FALSE}
bad_model <- fitted_bisbmpop_pirho[["best_fit"]]$clone()
good_model <- fitted_bisbmpop_pirho[["model_list"]][[4, 4]]$clone()
```
```{r ARI, echo = FALSE}
good_Z <- good_model[["Z"]]
good_row <- lapply(good_Z, function(model) model[["row"]])
good_col <- lapply(good_Z, function(model) model[["col"]])
good_joined_row <- unlist(good_row)
good_joined_col <- unlist(good_col)
sapply(seq_len(length(good_row)), function(m) {
ARI(
row_blockmemberships[[m]],
good_row[[m]]
)
})
sapply(seq_len(length(good_row)), function(m) {
ARI(
col_blockmemberships[[m]],
good_col[[m]]
)
})
ARI(joined_row_memberships, good_joined_row)
ARI(joined_col_memberships, good_joined_col)
```
```{r, echo = FALSE}
wrap_plots(
plot_alpha(data[["alpha"]])+ labs(caption = "Vrai alpha"),
plot(good_model, type = "meso", mixture = TRUE, values = TRUE) + labs(caption = "Q = (4,4)"),
plot(bad_model, type = "meso", mixture = TRUE, values = TRUE) + labs(caption = "Q = (4,5)")
)
```
```{r poisson?, echo = FALSE}
# set.seed(1234, "L'Ecuyer-CMRG")
# alpha_poisson <- alpha * 10
# netlist_poisson <- c(generate_bipartite_collection(nr = nr, nc = nc, pi = pi1, rho = rho1, alpha = alpha_poisson, M = 1, distribution = "poisson"),
# generate_bipartite_collection(nr = nr, nc = nc, pi = pi2, rho = rho2, alpha = alpha_poisson, M = 1, distribution = "poisson"))
# fitted_bisbmpop_pirho_poisson <- estimate_colBiSBM(
# netlist = netlist_poisson,
# colsbm_model = "pirho",
# nb_run = 1,
# distribution = "poisson",
# global_opts = list(
# verbosity = 3L,
# plot_details = 0,
# nb_cores = parallelly::availableCores(omit = 1)
# )
# )
# wrap_plots(
# plot_alpha(alpha_poisson) + labs(caption = "Vrai alpha"),
# plot(fitted_bisbmpop_pirho_poisson$best_fit, type = "meso", values = TRUE, mixture = TRUE))
```
Ici on clone le modèle et on lui donne les bons paramètres pour voir s'il fait
mieux que celui trouvé avant.
```{r testing_true_params, echo = FALSE}
# Un clone
good_clone <- good_model$clone()
# Les vrais paramètres
alpha <- data[["alpha"]]
taus <- lapply(seq_along(row_taus), function(m) {
list(row = row_taus[[m]],
col = col_taus[[m]])
})
# Pi params
pi1 <- as.vector(data[["pi1"]])
pi1[1] <- pi1[1] + 1e-9
rho1 <- rep(0.25, 4)
pi2 <- rep(0.25, 4)
rho2 <- as.vector(data[["rho2"]])
rho2[3] <- rho2[3] + 1e-9
pi <- list(list(pi1, rho1), list(pi2, rho2))
good_clone[["tau"]] <- taus
good_clone[["pi"]] <- pi
good_clone[["alpha"]] <- alpha
result_BICL <- c(bad_BICL = bad_model$compute_BICL(),
good_model_BICL = good_model$compute_BICL(),
good_clone_BICL = good_clone$compute_BICL())
print(result_BICL)
```
On vient mettre à la place du 4,4 le modèle avec les vrais paramètres.
Et on espère voir la bonne information se diffuser.
```{r adjusting, echo = FALSE}
fitted_bisbmpop_pirho$model_list[[4,4]] <- good_clone
fit_1_pass <- adjust_colBiSBM(fitted_bisbmpop_pirho, Q = c(4,4), depth = 1L, nb_pass = 1L)
```

File diff suppressed because one or more lines are too long

View file

@ -0,0 +1,42 @@
library(colSBM)
library(here)
set.seed(1234, "L'Ecuyer-CMRG")
prof_folder <- file.path(here(), "code", "results", "investigating", "profiling")
if (!dir.exists(prof_folder)) {
dir.create(prof_folder)
}
prof_file <- file.path(prof_folder, "no-parallel-asis.out")
nr <- 70
nc <- 50
pi <- c(0.3, 0.2, 0.5)
rho <- c(0.25, 0.15, 0.6)
alpha <- matrix(c(
0.75, 0.4, 0.1,
0.2, 0.05, 0.05,
0.1, 0.05, 0.3
), nrow = 3)
netlist <- generate_bipartite_collection(
nr = nr, nc = nc,
pi = pi, rho = rho,
alpha = alpha,
M = 4
)
Rprof(filename = prof_file, memory.profiling = TRUE)
fit <- estimate_colBiSBM(
netlist = netlist,
colsbm_model = "pirho",
global_opts = list(verbosity = 1L, backend = "no_mc")
)
Rprof(NULL)
for (filename in list.files(prof_folder)) {
print(filename)
prof_sum <- summaryRprof(file.path(prof_folder, filename), memory = "both")
print(prof_sum[["by.total"]][1, ])
}