Removing non breaking spaces

This commit is contained in:
Louis Lacoste 2024-06-07 15:48:53 +02:00
parent a9b6ae30d4
commit b43618b378
27 changed files with 2124 additions and 234 deletions

View file

@ -1,4 +1,4 @@
#  MIA Stage 2024
# MIA Stage 2024
Ce dépôt accueille tout le travaille que je vais réaliser pendant mon stage au
MIA Paris-Saclay en 2024.

View file

@ -7,7 +7,7 @@ output:
---
```{r knitropts, echo = FALSE}
knitr::opts_chunk$set(fig.width=12L, fig.height=10L)
knitr::opts_chunk$set(fig.width = 12L, fig.height = 10L)
```
```{r imports, echo = FALSE, include=FALSE}
@ -19,7 +19,7 @@ loadNamespace(package = "patchwork")
```
```{r data, echo = FALSE}
#  Loading data
# Loading data
data_folder <- file.path(here(), "code", "results", "simulations", "NA_robustness")
files_list <- list.files(data_folder, pattern = "^NA_robustness_22-04-2024_17")
```
@ -31,13 +31,13 @@ prepare_dataframes_auc_ari <- function(data) {
sampling <- unique(data[["sampling"]])
struct <- unique(data[["struct"]])
#  Averaging over repetitions
# Averaging over repetitions
averaged_data <- data %>%
group_by(prop_NAs, model) %>%
select(-c(repetition, sampling, struct)) %>%
summarise_all(list(mean = mean, sd = sd))
#  Preparing auc_data
# Preparing auc_data
auc_data <- averaged_data %>%
select(c(prop_NAs, model) | contains("auc_")) %>%
rename_with(~ gsub("auc_", "", .x, fixed = TRUE))

View file

@ -24,7 +24,7 @@ loadNamespace("kableExtra")
```
```{r data, echo = FALSE}
#  Loading data
# Loading data
data_folder <- file.path(here(), "code", "results", "simulations", "inference", "bernoulli")
# files_list <- list.files(data_folder, pattern = "^NA_robustness_19-04-2024")
data <- readRDS(file.path(data_folder, "bernoulli_inference_06-05-2024_17-21-48_1-972.Rds"))
@ -62,7 +62,7 @@ print_data <- av_data %>%
group_by(epsilon_alpha) %>%
round(2) %>%
mutate(
#  Model comparison
# Model comparison
pirho_over_sep = pirho_over_sep_mean,
pirho_over_iid = pirho_over_iid_mean,
pirho_over_pi = pirho_over_pi_mean,
@ -77,7 +77,7 @@ print_data <- av_data %>%
pirho_Q2_over_4 = pirho_Q2_over_4_mean,
#  Grouping accuracy
# Grouping accuracy
pirho_mean_row_ARI = paste(pirho_mean_row_ARI_mean,
pirho_mean_row_ARI_sd,
sep = "\\pm"
@ -132,7 +132,7 @@ ft_1 <- flextable(print_data) |>
opts_html = list(
scroll = list(freeze_first_column = TRUE)
)
) |>
) |>
compose(j = "pirho_mean_row_ARI", value = as_paragraph(as_equation(pirho_mean_row_ARI)))
ft_1 <- compose(ft_1, j = "pirho_mean_col_ARI", value = as_paragraph(as_equation(pirho_mean_col_ARI)))
ft_1 <- compose(ft_1, j = "pirho_double_row_ARI", value = as_paragraph(as_equation(pirho_double_row_ARI)))
@ -143,7 +143,7 @@ ft_1 <- ft_1 |> mk_par(
as_equation(col_keys, width = .1, height = .2)
)
)
theme_zebra(ft_1) |>
theme_zebra(ft_1) |>
hline(i = 1, border = fp_border_default(), part = "header") |>
vline(j = 1, border = fp_border_default(), part = "all") |>
bg(j = 1, bg = "#DDDDDD", part = "all") |>

View file

@ -6,9 +6,9 @@ set.seed(1234, "L'Ecuyer-CMRG")
data(dorebipartite)
str(dorebipartite)
nb_cores = parallelly::availableCores()
concurrent_models <- min(length(seq(1,4)), nb_cores%/%2)
per_model <- (nb_cores - concurrent_models) %/% length(seq(1,4))
nb_cores <- parallelly::availableCores()
concurrent_models <- min(length(seq(1, 4)), nb_cores %/% 2)
per_model <- (nb_cores - concurrent_models) %/% length(seq(1, 4))
mb1 <- microbenchmark(
"fancy computation" = {
@ -44,4 +44,4 @@ mb2 <- microbenchmark(
times = 5L
)
# Bilan : plus intéressant de bourriner
# Bilan : plus intéressant de bourriner

View file

@ -44,7 +44,7 @@ file_list <- list.files(data_folder)
file <- file_list[[1]]
data <- readRDS(file.path(data_folder, file))
#  Extracting data to netlist and Z
# Extracting data to netlist and Z
netlist_memb <- data[["netlist"]]
netlist <- lapply(netlist_memb, function(full) full[["incidence_matrix"]])
nr <- nrow(netlist[[1]])
@ -58,8 +58,6 @@ joined_col_memberships <- unlist(col_blockmemberships)
row_taus <- lapply(row_blockmemberships, function(Z1_m) colSBM:::.one_hot(Z1_m, 4))
col_taus <- lapply(col_blockmemberships, function(Z2_m) colSBM:::.one_hot(Z2_m, 4))
```
```{r parameters, echo = FALSE}
@ -110,7 +108,7 @@ ARI(joined_col_memberships, good_joined_col)
```{r, echo = FALSE}
wrap_plots(
plot_alpha(data[["alpha"]])+ labs(caption = "Vrai alpha"),
plot_alpha(data[["alpha"]]) + labs(caption = "Vrai alpha"),
plot(good_model, type = "meso", mixture = TRUE, values = TRUE) + labs(caption = "Q = (4,4)"),
plot(bad_model, type = "meso", mixture = TRUE, values = TRUE) + labs(caption = "Q = (4,5)")
)
@ -143,18 +141,20 @@ Ici on clone le modèle et on lui donne les bons paramètres pour voir s'il fait
mieux que celui trouvé avant.
```{r testing_true_params, echo = FALSE}
# Un clone
# Un clone
good_clone <- good_model$clone()
# Les vrais paramètres
# Les vrais paramètres
alpha <- data[["alpha"]]
taus <- lapply(seq_along(row_taus), function(m) {
list(row = row_taus[[m]],
col = col_taus[[m]])
list(
row = row_taus[[m]],
col = col_taus[[m]]
)
})
# Pi params
pi1 <- as.vector(data[["pi1"]])
pi1[1] <- pi1[1] + 1e-9
pi1[1] <- pi1[1] + 1e-9
rho1 <- rep(0.25, 4)
pi2 <- rep(0.25, 4)
rho2 <- as.vector(data[["rho2"]])
@ -165,15 +165,17 @@ good_clone[["tau"]] <- taus
good_clone[["pi"]] <- pi
good_clone[["alpha"]] <- alpha
result_BICL <- c(bad_BICL = bad_model$compute_BICL(),
good_model_BICL = good_model$compute_BICL(),
good_clone_BICL = good_clone$compute_BICL())
result_BICL <- c(
bad_BICL = bad_model$compute_BICL(),
good_model_BICL = good_model$compute_BICL(),
good_clone_BICL = good_clone$compute_BICL()
)
print(result_BICL)
```
On vient mettre à la place du 4,4 le modèle avec les vrais paramètres.
Et on espère voir la bonne information se diffuser.
```{r adjusting, echo = FALSE}
fitted_bisbmpop_pirho$model_list[[4,4]] <- good_clone
fit_1_pass <- adjust_colBiSBM(fitted_bisbmpop_pirho, Q = c(4,4), depth = 1L, nb_pass = 1L)
fitted_bisbmpop_pirho$model_list[[4, 4]] <- good_clone
fit_1_pass <- adjust_colBiSBM(fitted_bisbmpop_pirho, Q = c(4, 4), depth = 1L, nb_pass = 1L)
```

View file

@ -2,7 +2,7 @@ library(here)
inv_folder <- file.path(here(), "code", "results", "investigating", "split_clust")
for(file in list.files(inv_folder)) {
for (file in list.files(inv_folder)) {
load(file.path(inv_folder, file))
}
@ -16,10 +16,9 @@ noname_X <- as.matrix(noname_X)
colSBM:::split_clust(noname_X, Z, Q = 1, is_bipartite = TRUE)
alt_X <- colSBM::generate_bipartite_collection(nr = 15, nc = 30, pi = c(1), rho = c(1), alpha = c(0.7), M = 1
)[[1]]
alt_X <- colSBM::generate_bipartite_collection(nr = 15, nc = 30, pi = c(1), rho = c(1), alpha = c(0.7), M = 1)[[1]]
colSBM:::split_clust(alt_X, Z, Q = 1, is_bipartite = TRUE)
# La solution était que X était regardé comme une table et unique marche
# différemment
# La solution était que X était regardé comme une table et unique marche
# différemment

View file

@ -0,0 +1,133 @@
### Modèle ${{model}}$
```{r}
library(here)
library(colSBM)
library(dplyr)
library(tidyr)
library(plotly)
library(ggplot2)
library(ggforce)
library(ggrepel)
library(latex2exp)
```
```{r}
list_collection <- readRDS("{{clustering}}")
unlisted_best_partition <- unlist(extract_best_bipartite_partition(list_collection))
graph_size_df <- build_graph_size_dataframe(unlisted_best_partition)
summarized_graph_size_df <- graph_size_df %>%
group_by(collection_id) %>%
select(-net_id) %>%
summarise(
M = n(),
nr_mean = mean(nr),
nr_sd = ifelse(length(nr) > 1, sd(nr), 0),
nc_mean = mean(nc),
nc_sd = ifelse(length(nc) > 1, sd(nc), 0),
Qr = first(Qr),
Qc = first(Qc)
)
```
:::{.panel-tabset}
#### Analyse des tailles de cluster
```{r}
filtered_summarized_graph_size_df <- summarized_graph_size_df
ggplot(filtered_summarized_graph_size_df) +
aes(x = Qc, y = Qr, color = collection_id, label = M) +
geom_point() + # , position = position_jitter(h = 0.15, w = 0.05)
geom_text_repel() +
xlab(TeX("$Q_c$")) +
ylab(TeX("$Q_r$")) +
coord_fixed()
plot_ly(x = filtered_summarized_graph_size_df$Qc,
y = filtered_summarized_graph_size_df$Qr,
z = filtered_summarized_graph_size_df$M,
type = "scatter3d", mode = "markers",
color = filtered_summarized_graph_size_df$collection_id) %>%
layout(scene = list(xaxis = list(title = "Q_c"),
yaxis = list(title = "Q_r")))
```
#### Analyse des tailles de réseaux
```{r}
ggplot(filtered_summarized_graph_size_df) +
aes(x = nc_mean, y = nr_mean, color = collection_id) +
geom_point(size = 4) + # , position = position_jitter(h = 0.15, w = 0.05)
geom_ellipse(aes(
x0 = nc_mean, y0 = nr_mean,
a = 0.5 * nc_sd,
b = 0.5 * nr_sd,
fill = collection_id, angle = 0
), alpha = 0.1) +
geom_text_repel(aes(label = collection_id)) +
xlab(TeX("$n_c$")) +
ylab(TeX("$n_r$")) +
coord_fixed() +
ggtitle("Distribution des tailles moyennes et écart-types")
max_nr <- 200
max_nc <- 200
ggplot(filtered_summarized_graph_size_df %>% filter(nr_mean <= max_nr, nc_mean <= max_nc)) +
aes(x = nc_mean, y = nr_mean, color = collection_id) +
geom_point(size = 4) + # , position = position_jitter(h = 0.15, w = 0.05)
geom_ellipse(aes(
x0 = nc_mean, y0 = nr_mean,
a = 0.5 * nc_sd,
b = 0.5 * nr_sd,
fill = collection_id, angle = 0
), alpha = 0.1) +
geom_text_repel(aes(label = collection_id)) +
xlab(TeX("$n_c$")) +
ylab(TeX("$n_r$")) +
coord_fixed() +
ggtitle(TeX(paste0("Distribution des tailles moyennes et écart-types,", "$n_r <=", max_nr, "$", " et ", "$n_c <=", max_nc, "$")))
filtered_graph_size <- graph_size_df
ggplot(filtered_graph_size) +
aes(x = collection_id, y = nr, fill = collection_id) +
geom_boxplot()
ggplot(filtered_graph_size) +
aes(x = collection_id, y = nc, fill = collection_id) +
geom_boxplot()
ggplot(filtered_graph_size) +
aes(x = collection_id, y = nr + nc, fill = collection_id) +
geom_boxplot()
ggplot(filtered_graph_size) +
aes(x = collection_id, y = nr * nc, fill = collection_id) +
geom_boxplot()
```
:::
#### Structure des collections
```{r}
#| output: asis
for (i in seq_len(length(unlisted_best_partition))) {
cat("\n##### Structure collection ", i, "\n")
current_col <- unlisted_best_partition[[i]]$clone()
current_col$net_id <- current_col$net_id %>%
stringr::str_trunc(width = 20L)
try({
p <- plot(
current_col,
type = "meso",
values = FALSE, mixture = TRUE
) +
ggtitle(paste0("Structure\ncollection ", i))
print(p)
})
}
```

View file

@ -24,7 +24,7 @@ if (length(arg) == 0L) {
}
}
#  Arguments checks
# Arguments checks
allowed_model <- c("iid", "pi", "rho", "pirho")
stopifnot(
"Unknown model, should be : iid, pi, rho or pirho" = (model %in% allowed_model),

File diff suppressed because one or more lines are too long

View file

@ -1,52 +1,46 @@
---
format: html
title: Clustering avec `colSBM` des données
execute:
echo: false
warning: false
---
# Analyse
```{r}
library(colSBM)
library(here)
library(stringr)
```
```{r}
root_app_folder <- file.path(here(), "code", "applications")
source(file.path(root_app_folder, "utils.R"))
data_folder <- file.path(here(), "code", "results", "applications", "dore")
files_vec <- get_recent_files(data_folder)
files_vec <- get_recent_files(data_folder, n = 16)
files_vec <- identify_models(files_vec)
list_collection <- readRDS(file.path(data_folder, "dore_collection_iid_24-05-24_18-07-50.Rds"))
best_partition <- extract_best_bipartite_partition(list_collection)
length(unlist(best_partition))
unlisted_bp <- unlist(best_partition)
list_clustering <- files_vec
# list_collection <- readRDS(file.path(data_folder, "dore_collection_iid_24-05-24_18-07-50.Rds"))
```
```{r}
library(stringr)
## Analyse par modèle
# string_split <- function(string,size) {
# unlist(str_extract_all(string, paste0('.{1,',size,'}')))
# }
:::{.panel-tabset}
# custom_str_wrap <- function(str, width = 30) {
# if (nchar(str) > width) {
# return(paste(string_split(str, size = width), collapse = "\n"))
# }
# return(str)
# }
```{r write_tabs}
#| warning: false
#| output: asis
iid_BICL <- sum(sapply(unlisted_bp, function(model) model$BICL))
#  Shortening net names
for (idx in seq_len(length(unlisted_bp))) {
unlisted_bp[[idx]]$net_id <- sapply(unlisted_bp[[idx]]$net_id, function(id) str_trunc(id,20))
}
```
```{r}
for (idx in seq_len(length(unlisted_bp))) {
print(plot(unlisted_bp[[idx]], type = "meso", mixture = TRUE, values = TRUE))
# Generate content for each model using knit_expand
for (clustering_idx in seq_len(length(list_clustering))) {
clustering <- list_clustering[clustering_idx]
model <- names(clustering)
expanded_content <- knitr::knit_expand(file = file.path(root_app_folder, "base_analysis.qmd"), clustering = clustering, model = model)
res <- knitr::knit_child(text = expanded_content, quiet = TRUE)
cat(res, sep = "\n")
cat("\n")
}
```
Analyser par LBM :
3,
:::

View file

@ -28,7 +28,7 @@ if (length(arg) == 0L) {
}
}
#  Arguments checks
# Arguments checks
allowed_model <- c("iid", "pi", "rho", "pirho")
stopifnot(
"Unknown model, should be : iid, pi, rho or pirho" = (model %in% allowed_model)

File diff suppressed because one or more lines are too long

View file

@ -3,8 +3,18 @@ title: Analysis of herbivores data
execute:
echo: false
freeze: auto
warning: false
---
```{r import_data}
root_app_folder <- file.path(here(), "code", "applications")
source(file.path(root_app_folder, "utils.R"))
data_folder <- file.path(here(), "code", "results", "applications", "herbivores")
files_vec <- get_recent_files(data_folder)
filepath <- files_vec[1]
list_collection <- readRDS(filepath)
```
```{r libs_and_data}
library(here)
@ -12,21 +22,14 @@ library(colSBM)
library(dplyr)
library(tidyr)
library(plotly)
library(ggplot2)
library(ggforce)
library(weird)
root_app_folder <- file.path(here(), "code", "applications")
source(file.path(root_app_folder, "utils.R"))
data_folder <- file.path(here(), "code", "results", "applications", "herbivores")
files_vec <- get_recent_files(data_folder)
# files_vec <- identify_models(files_vec)
library(ggrepel)
library(latex2exp)
```
```{r}
filepath <- files_vec[1]
list_collection <- readRDS(filepath)
```{r dataformatting}
unlisted_best_partition <- unlist(extract_best_bipartite_partition(list_collection))
graph_size_df <- build_graph_size_dataframe(unlisted_best_partition)
@ -41,39 +44,72 @@ summarized_graph_size_df <- graph_size_df %>%
nc_sd = ifelse(length(nc) > 1, sd(nc), 0),
Qr = first(Qr),
Qc = first(Qc)
)
)
```
### Analyse des tailles de cluster
```{r collection_blocks_plot_size}
#| layout-ncol: 2
#| plotly: true
filtered_summarized_graph_size_df <- summarized_graph_size_df
ggplot(filtered_summarized_graph_size_df) +
aes(x = Qc, y = Qr, color = collection_id, label = M) +
geom_point() + # , position = position_jitter(h = 0.15, w = 0.05)
geom_text_repel() +
xlab(TeX("$Q_c$")) +
ylab(TeX("$Q_r$")) +
coord_fixed()
plot_ly(x = filtered_summarized_graph_size_df$Qc,
y = filtered_summarized_graph_size_df$Qr,
z = filtered_summarized_graph_size_df$M,
type = "scatter3d", mode = "markers",
color = filtered_summarized_graph_size_df$collection_id) %>%
layout(scene = list(xaxis = list(title = "Q_c"),
yaxis = list(title = "Q_r")))
```
```{r collection_size_boxplot}
filtered_graph_size <- graph_size_df %>% filter(M > 5)
### Analyse des tailles de réseaux
```{r size_plot}
#| layout-ncol: 2
ggplot(filtered_summarized_graph_size_df) +
aes(x = nc_mean, y = nr_mean, color = collection_id) +
geom_point(size = 4) + # , position = position_jitter(h = 0.15, w = 0.05)
geom_ellipse(aes(
x0 = nc_mean, y0 = nr_mean,
a = 0.5 * nc_sd,
b = 0.5 * nr_sd,
fill = collection_id, angle = 0
), alpha = 0.1) +
geom_text_repel(aes(label = collection_id)) +
xlab(TeX("$n_c$")) +
ylab(TeX("$n_r$")) +
coord_fixed() +
ggtitle("Distribution des tailles moyennes et écart-types")
max_nr <- 200
max_nc <- 200
ggplot(filtered_summarized_graph_size_df %>% filter(nr_mean <= max_nr, nc_mean <= max_nc)) +
aes(x = nc_mean, y = nr_mean, color = collection_id) +
geom_point(size = 4) + # , position = position_jitter(h = 0.15, w = 0.05)
geom_ellipse(aes(
x0 = nc_mean, y0 = nr_mean,
a = 0.5 * nc_sd,
b = 0.5 * nr_sd,
fill = collection_id, angle = 0
), alpha = 0.1) +
geom_text_repel(aes(label = collection_id)) +
xlab(TeX("$n_c$")) +
ylab(TeX("$n_r$")) +
coord_fixed() +
ggtitle(TeX(paste0("Distribution des tailles moyennes et écart-types,", "$n_r <=", max_nr, "$", " et ", "$n_c <=", max_nc, "$")))
filtered_graph_size <- graph_size_df %>% filter(M > 1)
ggplot(filtered_graph_size) +
aes(x = collection_id, y = nr, fill = collection_id) +
geom_boxplot()
ggplot(filtered_graph_size) +
aes(x = collection_id, y = nc, fill = collection_id) +
geom_boxplot()
# ggplot(filtered_graph_size) +
# aes(x = nc, y = nr, fill = collection_id) +
gg_bagplot(filtered_graph_size, x = filtered_graph_size[["nc"]], y = filtered_graph_size[["nr"]])
```
```{r collection_blocks_plot}
filtered_summarized_graph_size_df <- summarized_graph_size_df %>% filter(M > 5)
ggplot(filtered_summarized_graph_size_df) +
aes(x = Qc, y = Qr, color = collection_id) +
geom_point(size = 4) + # , position = position_jitter(h = 0.15, w = 0.05)
geom_circle(aes(x0 = Qc, y0 = Qr, r = 0.025 * M, fill = collection_id), alpha = 0.1) +
coord_fixed()
```
```{r collection_size_mean}
filtered_summarized_graph_size_df <- summarized_graph_size_df %>% filter(M > 5)
ggplot(filtered_summarized_graph_size_df) +
aes(x = nc_mean, y = nr_mean, color = collection_id) +
geom_point(size = 4) + # , position = position_jitter(h = 0.15, w = 0.05)
geom_ellipse(aes(x0 = nc_mean, y0 = nr_mean, a = 0.5 * nc_sd, b = 0.5 * nr_sd, fill = collection_id, angle = 0), alpha = 0.1) +
coord_fixed()
```

View file

@ -8,8 +8,12 @@
#'
#' @return A vector of size `n` with the file path.
get_recent_files <- function(data_folder, n = 4) {
files_info <- file.info(file.path(data_folder, list.files(data_folder)))
files_info[["filepath"]] <- file.path(data_folder, list.files(data_folder))
files_info <- file.info(file.path(data_folder, list.files(data_folder,
include.dirs = FALSE, pattern = "*dore_collection_[a-z]*_seed"
)))
files_info[["filepath"]] <- file.path(data_folder, list.files(data_folder,
include.dirs = FALSE, pattern = "*dore_collection_[a-z]*_seed"
))
files_info <- sort_by(files_info, files_info[["ctime"]], decreasing = TRUE)
if (n > nrow(files_info)) {
warning(
@ -23,17 +27,10 @@ get_recent_files <- function(data_folder, n = 4) {
#' Identify models
identify_models <- function(files_vec) {
iid_id <- grep(pattern = "iid", files_vec, fixed = TRUE)
pi_id <- grep(pattern = "pi_", files_vec, fixed = TRUE)
rho_id <- grep(pattern = "_rho", files_vec, fixed = TRUE)
pirho_id <- grep(pattern = "pirho", files_vec, fixed = TRUE)
file_order <- c(iid_id, pi_id, rho_id, pirho_id)
stopifnot(
"There are multiple files matching models !" =
length(file_order) <= 4L
names(files_vec) <- stringr::str_extract(
string = files_vec,
pattern = "(iid|pirho|pi|rho)"
)
files_vec <- files_vec[file_order]
names(files_vec) <- c("iid", "pi", "rho", "pirho")
return(files_vec)
}
@ -50,4 +47,4 @@ build_graph_size_dataframe <- function(collection_list) {
Qc = collection[["Q"]][[2]]
)
}))
}
}

View file

@ -37,7 +37,7 @@ if (length(arg) == 0L) {
}
}
#  Arguments checks
# Arguments checks
allowed_model <- c("iid", "pi", "rho", "pirho")
stopifnot(
"Unknown model, should be : iid, pi, rho or pirho" = (model %in% allowed_model),

View file

@ -10,7 +10,7 @@
#$ -o logs/$JOB_NAME.$TASK_ID
#$ -e logs/$JOB_NAME.$TASK_ID
# Creating log directory if it doesn't exists
# Creating log directory if it doesn't exists
BASE_DIR="/home/$USER/work/mia-stage-2024"
LOG_DIR=$(echo "$BASE_DIR/logs")
@ -18,12 +18,12 @@ if [ ! -d "$LOG_DIR" ]; then
mkdir -p $LOG_DIR
fi
# Constant data
# Constant data
MODELARRAY=("iid" "pi" "rho" "pirho")
MODEL=${MODELARRAY[$(($((SGE_TASK_ID - 1)) % 4))]}
# Finding directory
# Finding directory
APPLICATIONS_DIR=$(echo "$BASE_DIR/code/applications")
echo $APPLICATIONS_DIR

View file

@ -10,7 +10,7 @@
#$ -o logs/$JOB_NAME.$TASK_ID
#$ -e logs/$JOB_NAME.$TASK_ID
# Creating log directory if it doesn't exists
# Creating log directory if it doesn't exists
BASE_DIR="/home/$USER/work/mia-stage-2024"
LOG_DIR=$(echo "$BASE_DIR/logs")
@ -18,12 +18,12 @@ if [ ! -d "$LOG_DIR" ]; then
mkdir -p $LOG_DIR
fi
# Constant data
# Constant data
MODELARRAY=("iid" "pi" "rho" "pirho")
MODEL=${MODELARRAY[$(($((SGE_TASK_ID - 1)) % 4))]}
# Finding directory
# Finding directory
APPLICATIONS_DIR=$(echo "$BASE_DIR/code/applications")
echo $APPLICATIONS_DIR

View file

@ -10,9 +10,9 @@
#$ -o logs/$JOB_NAME.$TASK_ID
#$ -e logs/$JOB_NAME.$TASK_ID
# Creating log directory if it doesn't exists
# Creating log directory if it doesn't exists
# Constant data
# Constant data
STRUCTA=("nested" "modular")
SAMPLINGA=("uniform" "row" "col" "rowcol")
@ -25,11 +25,11 @@ if [ ! -d "$LOG_DIR" ]; then
mkdir -p $LOG_DIR
fi
# Finding simulations directory
# Finding simulations directory
SIMULATIONS_DIR=$(echo "$BASE_DIR/code/simulations")
echo $SIMULATIONS_DIR
# Parsing sge array id
# Parsing sge array id
Rscript "${SIMULATIONS_DIR}/simulations_NA_robustness.R" --struct $STRUCT

View file

@ -28,7 +28,7 @@ if (length(arg) == 0L) {
}
}
#  Arguments checks
# Arguments checks
allowed_struct <- c("modular", "nested")
stopifnot(
@ -70,49 +70,61 @@ max_repetition <- 10L
# Collections
collections <- list(
"iid" = c(generate_bipartite_collection(nr1, nc1,
pir, pic,
alpha, 1,
model = "iid",
return_memberships = TRUE),
"iid" = c(
generate_bipartite_collection(nr1, nc1,
pir, pic,
alpha, 1,
model = "iid",
return_memberships = TRUE
),
generate_bipartite_collection(nr2, nc2,
pir, pic,
alpha, M-1,
model = "iid",
return_memberships = TRUE)
pir, pic,
alpha, M - 1,
model = "iid",
return_memberships = TRUE
)
),
"pi" = c(generate_bipartite_collection(nr1, nc1,
pir, pic,
alpha, 1,
model = "pi",
return_memberships = TRUE),
"pi" = c(
generate_bipartite_collection(nr1, nc1,
pir, pic,
alpha, 1,
model = "pi",
return_memberships = TRUE
),
generate_bipartite_collection(nr2, nc2,
pir, pic,
alpha, M-1,
model = "pi",
return_memberships = TRUE)
pir, pic,
alpha, M - 1,
model = "pi",
return_memberships = TRUE
)
),
"rho" = c(generate_bipartite_collection(nr1, nc1,
pir, pic,
alpha, 1,
model = "rho",
return_memberships = TRUE),
"rho" = c(
generate_bipartite_collection(nr1, nc1,
pir, pic,
alpha, 1,
model = "rho",
return_memberships = TRUE
),
generate_bipartite_collection(nr2, nc2,
pir, pic,
alpha, M-1,
model = "rho",
return_memberships = TRUE)
pir, pic,
alpha, M - 1,
model = "rho",
return_memberships = TRUE
)
),
"pirho" = c(generate_bipartite_collection(nr1, nc1,
pir, pic,
alpha, 1,
model = "pirho",
return_memberships = TRUE),
"pirho" = c(
generate_bipartite_collection(nr1, nc1,
pir, pic,
alpha, 1,
model = "pirho",
return_memberships = TRUE
),
generate_bipartite_collection(nr2, nc2,
pir, pic,
alpha, M-1,
model = "pirho",
return_memberships = TRUE)
pir, pic,
alpha, M - 1,
model = "pirho",
return_memberships = TRUE
)
)
)
@ -124,7 +136,7 @@ conditions <- expand.grid(
)
#  Data params
# Data params
main_dir <- file.path("code", "results", "simulations", "NA_robustness")
if (!dir.exists(main_dir)) {
@ -142,7 +154,7 @@ if (!dir.exists(temp_dir)) {
}
file_save <- file.path(main_dir, paste0(
"NA_robustness_", start_time, "_", sampling,
"NA_robustness_", start_time, "_", sampling,
"_", struct, "_1-", nrow(conditions), ".Rds"
))
@ -179,7 +191,7 @@ result_list <- parallel::mclapply(seq_len(nrow(conditions)), function(current) {
NAs_selected_index_exp <- expand.grid(row = row_nodes_selected, col = col_nodes_selected)
#  Computes the index as a single number, R stores matrices by column
# Computes the index as a single number, R stores matrices by column
(NAs_selected_index_exp[["col"]] - 1) *
nrow(bipartite_collection_incidence[[1]]) + NAs_selected_index_exp[["row"]]
},
@ -190,7 +202,7 @@ result_list <- parallel::mclapply(seq_len(nrow(conditions)), function(current) {
NAs_selected_index_exp <- expand.grid(row = row_nodes_selected, col = col_nodes_selected)
#  Computes the index as a single number, R stores matrices by column
# Computes the index as a single number, R stores matrices by column
(NAs_selected_index_exp[["col"]] - 1) *
nrow(bipartite_collection_incidence[[1]]) + NAs_selected_index_exp[["row"]]
},
@ -201,7 +213,7 @@ result_list <- parallel::mclapply(seq_len(nrow(conditions)), function(current) {
col_nodes_selected <- which(Z[[1]][[2]] == col_cluster_selected)
NAs_selected_index_exp <- expand.grid(row = row_nodes_selected, col = col_nodes_selected)
#  Computes the index as a single number, R stores matrices by column
# Computes the index as a single number, R stores matrices by column
(NAs_selected_index_exp[["col"]] - 1) *
nrow(bipartite_collection_incidence[[1]]) + NAs_selected_index_exp[["row"]]
}
@ -269,14 +281,13 @@ result_list <- parallel::mclapply(seq_len(nrow(conditions)), function(current) {
mybisbmpop[["best_fit"]][["Z"]][[1]][[2]]
),
elapsed_secs = difftime(stop_time, start_time, units = "sec"),
sampling = sampling,
struct = struct
)
message("Finished step ", current, "/", nrow(conditions), "\n")
#  Saving temp
# Saving temp
temp_file_save <- file.path(temp_dir, paste0(
"conditions_", current, "_on_",
nrow(conditions), ".Rds"
@ -296,4 +307,4 @@ saveRDS(
result_dataframe,
file = file_save
)
message("Finished simulations.")
message("Finished simulations.")

View file

@ -72,7 +72,7 @@ choosed_conditions <- seq.int(from = arg[1], to = arg[2])
conditions <- conditions[choosed_conditions, ]
#  Data params
# Data params
main_dir <- file.path("code", "results", "simulations", "inference", "bernoulli")
if (!dir.exists(main_dir)) {
@ -205,7 +205,7 @@ results <- parallel::mclapply(conditions_rows, function(s) {
stop_time_condition <- Sys.time()
##  Preparing date for export
## Preparing date for export
# BICLs
sep_BICL <- sum(sep_BiSBM$BICL)
iid_BICL <- fitted_bisbmpop_iid$best_fit$BICL
@ -318,7 +318,7 @@ results <- parallel::mclapply(conditions_rows, function(s) {
)
message("Finished step ", s, "/", nrow(conditions))
#  Saving temp
# Saving temp
temp_file_save <- file.path(temp_dir, paste0(
"conditions_", s, "_on_",
nrow(conditions), ".Rds"
@ -326,7 +326,7 @@ results <- parallel::mclapply(conditions_rows, function(s) {
saveRDS(object = data_frame_output, file = temp_file_save)
#  Saving inhabitual data
# Saving inhabitual data
if (all(unlist(pirho_mean_ARIs) == 1L) & any(unlist(pirho_double_ARIs) < 1L)) {
warning("Incorrect result encountered, saving.")
incorrect_filepath <- file.path(temp_dir, paste0(
@ -358,4 +358,4 @@ full_data_frame <- do.call(rbind, results)
saveRDS(full_data_frame,
file = file_save
)
message("Finished simulations.")
message("Finished simulations.")

View file

@ -72,7 +72,7 @@ choosed_conditions <- seq.int(from = arg[1], to = arg[2])
conditions <- conditions[choosed_conditions, ]
#  Data params
# Data params
main_dir <- file.path("code", "results", "simulations", "inference", "poisson")
if (!dir.exists(main_dir)) {
@ -204,7 +204,7 @@ results <- parallel::mclapply(conditions_rows, function(s) {
stop_time_condition <- Sys.time()
##  Preparing date for export
## Preparing date for export
# BICLs
sep_BICL <- sum(sep_BiSBM$BICL)
iid_BICL <- fitted_bisbmpop_iid$best_fit$BICL
@ -317,7 +317,7 @@ results <- parallel::mclapply(conditions_rows, function(s) {
)
message("Finished step ", s, "/", nrow(conditions))
#  Saving temp
# Saving temp
temp_file_save <- file.path(temp_dir, paste0(
"conditions_", s, "_on_",
nrow(conditions), ".Rds"
@ -325,7 +325,7 @@ results <- parallel::mclapply(conditions_rows, function(s) {
saveRDS(object = data_frame_output, file = temp_file_save)
#  Saving inhabitual data
# Saving inhabitual data
if (all(unlist(pirho_mean_ARIs) == 1L) & any(unlist(pirho_double_ARIs) < 1L)) {
warning("Incorrect result encountered, saving.")
incorrect_filepath <- file.path(temp_dir, paste0(
@ -357,4 +357,4 @@ full_data_frame <- do.call(rbind, results)
saveRDS(full_data_frame,
file = file_save
)
message("Finished simulations.")
message("Finished simulations.")

View file

@ -72,7 +72,7 @@ if (arg[2] > nrow(conditions) | arg[2] < 1) {
choosed_conditions <- seq.int(from = arg[1], to = arg[2])
conditions <- conditions[choosed_conditions, ]
#  Data params
# Data params
main_dir <- file.path("code", "results", "simulations", "model_selection")
if (!dir.exists(main_dir)) {
@ -246,7 +246,7 @@ with_progress({
)
message("Finished step ", s, "/", nrow(conditions))
#  Saving temp
# Saving temp
temp_file_save <- file.path(temp_dir, paste0(
"conditions_", s, "_on_",
nrow(conditions), ".Rds"
@ -264,4 +264,4 @@ full_data_frame <- do.call(rbind, results)
saveRDS(full_data_frame,
file = file_save
)
message("Finished simulations.")
message("Finished simulations.")