All analysis updates
This commit is contained in:
parent
af625836b6
commit
395a7beaed
22 changed files with 2304 additions and 548 deletions
|
|
@ -18,6 +18,12 @@ library(latex2exp)
|
|||
list_collection <- readRDS("{{clustering}}")
|
||||
unlisted_best_partition <- unlist(extract_best_bipartite_partition(list_collection))
|
||||
|
||||
BICL <- sum(sapply(unlisted_best_partition, function(col) col$BICL))
|
||||
```
|
||||
|
||||
Le clustering donne le critère suivant : $BICL = `r BICL`$
|
||||
|
||||
```{r}
|
||||
graph_size_df <- build_graph_size_dataframe(unlisted_best_partition)
|
||||
summarized_graph_size_df <- graph_size_df %>%
|
||||
group_by(collection_id) %>%
|
||||
|
|
|
|||
File diff suppressed because one or more lines are too long
|
|
@ -1,6 +1,6 @@
|
|||
---
|
||||
format: html
|
||||
title: Clustering avec `colSBM` des données
|
||||
title: Clustering avec `colSBM` des données Doré et al.
|
||||
execute:
|
||||
echo: false
|
||||
warning: false
|
||||
|
|
@ -12,13 +12,20 @@ execute:
|
|||
library(colSBM)
|
||||
library(here)
|
||||
library(stringr)
|
||||
library(tidyr)
|
||||
library(dplyr)
|
||||
library(aricode)
|
||||
library(reshape2)
|
||||
|
||||
library(ggplot2)
|
||||
library(ggalluvial)
|
||||
```
|
||||
|
||||
```{r}
|
||||
root_app_folder <- file.path(here(), "code", "applications")
|
||||
source(file.path(root_app_folder, "utils.R"))
|
||||
data_folder <- file.path(here(), "code", "results", "applications", "dore")
|
||||
files_vec <- get_recent_files(data_folder, n = 16)
|
||||
files_vec <- get_recent_files(data_folder, n = 16L)
|
||||
files_vec <- identify_models(files_vec)
|
||||
list_clustering <- files_vec
|
||||
# list_collection <- readRDS(file.path(data_folder, "dore_collection_iid_24-05-24_18-07-50.Rds"))
|
||||
|
|
@ -26,6 +33,22 @@ list_clustering <- files_vec
|
|||
|
||||
## Analyse par modèle
|
||||
|
||||
Les clustering donne le critère suivant :
|
||||
```{r}
|
||||
vec_bicl <- sapply(list_clustering, function(clustering) {
|
||||
list_collection <- readRDS(clustering)
|
||||
unlisted_best_partition <- unlist(
|
||||
extract_best_bipartite_partition(list_collection)
|
||||
)
|
||||
BICL <- sum(sapply(unlisted_best_partition, function(col) col$BICL))
|
||||
BICL
|
||||
})
|
||||
knitr::kable(vec_bicl,
|
||||
caption = "BICL par modèle", col.names = "$BICL$",
|
||||
row.names = TRUE
|
||||
)
|
||||
```
|
||||
|
||||
:::{.panel-tabset}
|
||||
|
||||
```{r write_tabs}
|
||||
|
|
@ -43,4 +66,97 @@ for (clustering_idx in seq_len(length(list_clustering))) {
|
|||
}
|
||||
```
|
||||
|
||||
:::
|
||||
:::
|
||||
|
||||
## Comparaison des modèles
|
||||
|
||||
### Table des clustering
|
||||
|
||||
```{r}
|
||||
clustering_df <- sapply(seq_len(length(list_clustering)), function(idx) {
|
||||
extract_clustering(readRDS(list_clustering[[idx]]))
|
||||
})
|
||||
|
||||
clust_to_compare_order <- crossing(
|
||||
f = seq_len(ncol(clustering_df)),
|
||||
s = seq_len(ncol(clustering_df))
|
||||
) %>%
|
||||
filter(!(f == s) & f < s)
|
||||
|
||||
clustering_partitions_df <- do.call("rbind", lapply(seq_len(length(list_clustering)), function(idx) {
|
||||
clust_vec <- extract_clustering(readRDS(list_clustering[[idx]]))
|
||||
net_id <- names(clust_vec)
|
||||
clust_vec <- unname(clust_vec)
|
||||
clust_idx <- idx
|
||||
data.frame(net_id = net_id, cluster = clust_vec, clustering_id = clust_idx)
|
||||
}))
|
||||
|
||||
clustering_partitions_df <- clustering_partitions_df %>%
|
||||
# mutate(net_id = str_trunc(net_id, width = 20L)) %>%
|
||||
mutate_all(as.factor)
|
||||
```
|
||||
|
||||
```{r ARI}
|
||||
ari_dist_matrix <- outer(
|
||||
X = seq_len(ncol(clustering_df)),
|
||||
Y = seq_len(ncol(clustering_df)),
|
||||
Vectorize(function(x, y) {
|
||||
f_clust <- clustering_df[, x]
|
||||
s_clust <- clustering_df[, y]
|
||||
ARI(f_clust, s_clust)
|
||||
})
|
||||
)
|
||||
ggplot(ari_dist_matrix %>% melt()) +
|
||||
aes(x = Var1, y = Var2) +
|
||||
geom_raster(aes(fill = value)) +
|
||||
scale_fill_gradient(low = "white", high = "red", limits = c(0, 1)) +
|
||||
scale_x_reverse() +
|
||||
geom_text(aes(label = round(value, digits = 4))) +
|
||||
labs(
|
||||
x = "id clusterings",
|
||||
y = "id clusterings", title = "ARI entre les clusterings"
|
||||
) +
|
||||
theme_bw()
|
||||
```
|
||||
|
||||
```{r table cooccurrence}
|
||||
list_tables_coocc <- lapply(seq_len(nrow(clust_to_compare_order)), function(row_id) {
|
||||
f <- clust_to_compare_order[[row_id, 1]]
|
||||
s <- clust_to_compare_order[[row_id, 2]]
|
||||
table(clustering_df[, f], clustering_df[, s])
|
||||
})
|
||||
```
|
||||
|
||||
```{r function plot tables}
|
||||
plot_table <- function(data, f = NULL, s = NULL) {
|
||||
melted_data <- data %>%
|
||||
melt() %>%
|
||||
mutate(
|
||||
Var1 = as.factor(Var1),
|
||||
Var2 = as.factor(Var2)
|
||||
)
|
||||
|
||||
ggplot(melted_data) +
|
||||
aes(y = Var1, x = Var2) +
|
||||
geom_raster(aes(fill = value)) +
|
||||
scale_fill_gradient(low = "white", high = "red", limits = c(0, NA)) +
|
||||
scale_x_discrete() +
|
||||
geom_text(data = subset(melted_data, value != 0), aes(label = value)) +
|
||||
labs(
|
||||
x = paste0("numéros clusters pour clustering : ", s),
|
||||
y = paste0("numéros clusters pour clustering : ", f), title = paste0("Co-occurence entre les clusterings ", f, " et ", s)
|
||||
) +
|
||||
theme_bw()
|
||||
}
|
||||
```
|
||||
|
||||
```{r tables coocc plot}
|
||||
#| output: asis
|
||||
|
||||
for (id in seq_len(length(list_tables_coocc))) {
|
||||
print(plot_table(list_tables_coocc,
|
||||
f = clust_to_compare_order[[id, 1]],
|
||||
s = clust_to_compare_order[[id, 2]]
|
||||
))
|
||||
}
|
||||
```
|
||||
920
code/applications/dore_no_small/02_dore_no_small_analysis.html
Normal file
920
code/applications/dore_no_small/02_dore_no_small_analysis.html
Normal file
File diff suppressed because one or more lines are too long
149
code/applications/dore_no_small/02_dore_no_small_analysis.qmd
Normal file
149
code/applications/dore_no_small/02_dore_no_small_analysis.qmd
Normal file
|
|
@ -0,0 +1,149 @@
|
|||
---
|
||||
format: html
|
||||
title: Clustering avec `colSBM` des données Doré et al. seulement sur les larges réseaux
|
||||
execute:
|
||||
echo: false
|
||||
warning: false
|
||||
---
|
||||
|
||||
# Analyse
|
||||
|
||||
```{r}
|
||||
library(colSBM)
|
||||
library(here)
|
||||
library(dplyr)
|
||||
library(stringr)
|
||||
```
|
||||
|
||||
```{r}
|
||||
root_app_folder <- file.path(here(), "code", "applications")
|
||||
source(file.path(root_app_folder, "utils.R"))
|
||||
data_folder <- file.path(here(), "code", "results", "applications", "dore_no_small")
|
||||
|
||||
# Full data
|
||||
list_full_files <- file.path(
|
||||
data_folder,
|
||||
list.files(data_folder,
|
||||
pattern = "full"
|
||||
)
|
||||
)
|
||||
names(list_full_files) <- stringr::str_extract(
|
||||
string = list_full_files,
|
||||
pattern = "(iid|pirho|pi|rho)"
|
||||
)
|
||||
|
||||
lapply(seq_len(length(list_full_files)), function(idx) {
|
||||
min_size <- readRDS(list_full_files[[idx]])[[1]]
|
||||
clustering <- readRDS(list_full_files[[idx]])[[2]]
|
||||
|
||||
min_filepath <- gsub("full_", "min_size_", list_full_files[[idx]])
|
||||
clustering_filepath <- gsub("full_", "clustering_", list_full_files[[idx]])
|
||||
|
||||
saveRDS(min_size, min_filepath)
|
||||
saveRDS(clustering, clustering_filepath)
|
||||
})
|
||||
|
||||
```
|
||||
|
||||
```{r}
|
||||
#| output: false
|
||||
files_vec <- get_recent_files(data_folder, pattern = "clustering")
|
||||
min_vec <- get_recent_files(data_folder, pattern = "min")
|
||||
files_vec <- identify_models(files_vec)
|
||||
min_vec <- identify_models(min_vec)
|
||||
|
||||
# Importing data
|
||||
base_data_folder <- file.path(here(), "code", "data", "dore")
|
||||
|
||||
interaction_data <- read.table(file = file.path(base_data_folder, "interaction-data.txt"), sep = "\t", header = TRUE)
|
||||
|
||||
seq_ids_network_aggreg <- unique(interaction_data$id_network_aggreg)
|
||||
names_aggreg_networks <- sapply(
|
||||
seq_ids_network_aggreg,
|
||||
function(id) {
|
||||
paste0(
|
||||
unique(interaction_data[which(interaction_data$id_network_aggreg == id), ]$web),
|
||||
collapse = "+"
|
||||
)
|
||||
}
|
||||
)
|
||||
# Computation of incidence matrices
|
||||
incidence_matrices <- lapply(
|
||||
seq_ids_network_aggreg,
|
||||
function(m) {
|
||||
current_interaction_data <- interaction_data[which(interaction_data$id_network_aggreg == m), ] %>%
|
||||
mutate(
|
||||
plantaggreg = paste(plantorder,
|
||||
plantfamily, plantgenus, plantspecies,
|
||||
sep = "-"
|
||||
),
|
||||
insectaggreg = paste(insectorder,
|
||||
insectfamily, insectgenus, insectspecies,
|
||||
sep = "-"
|
||||
)
|
||||
)
|
||||
current_interaction_data <- table(current_interaction_data$plantaggreg, current_interaction_data$insectaggreg)
|
||||
|
||||
current_incidence_matrix <- matrix(current_interaction_data,
|
||||
ncol = ncol(current_interaction_data), dimnames = dimnames(current_interaction_data)
|
||||
)
|
||||
|
||||
current_incidence_matrix[which(current_incidence_matrix > 0)] <- 1
|
||||
return(current_incidence_matrix)
|
||||
}
|
||||
)
|
||||
|
||||
names(incidence_matrices) <- names_aggreg_networks
|
||||
|
||||
min_sizes <- lapply(min_vec, function(file) readRDS(file))
|
||||
nb_networks <- sapply(min_sizes, function(min_size) {
|
||||
length(incidence_matrices[
|
||||
(colSums(sapply(incidence_matrices, dim) > min_size) == 2L)
|
||||
])
|
||||
})
|
||||
min_sizes_df <- do.call(rbind.data.frame, min_sizes)
|
||||
min_sizes_df <- cbind(min_sizes_df, nb_networks)
|
||||
colnames(min_sizes_df) <- c("nr >", "nc >", "nb_networks")
|
||||
|
||||
list_clustering <- files_vec
|
||||
# list_collection <- readRDS(file.path(data_folder, "dore_collection_iid_24-05-24_18-07-50.Rds"))
|
||||
```
|
||||
|
||||
Les données ont été filtrées pour retenir les réseaux de taille supérieure à
|
||||
celles du tableau ci-dessous, aboutissant au nombre de réseaux de la colonne de
|
||||
droite :
|
||||
`r knitr::kable(min_sizes_df, caption = "Table des tailles minimales retenues par modèle")`
|
||||
|
||||
## Analyse par modèle
|
||||
|
||||
Les clustering donne le critère suivant :
|
||||
```{r}
|
||||
vec_bicl <- sapply(list_clustering, function(clustering) {
|
||||
list_collection <- readRDS(clustering)
|
||||
unlisted_best_partition <- unlist(
|
||||
extract_best_bipartite_partition(list_collection)
|
||||
)
|
||||
BICL <- sum(sapply(unlisted_best_partition, function(col) col$BICL))
|
||||
BICL
|
||||
})
|
||||
knitr::kable(vec_bicl, caption = "BICL par modèle", col.names = "$BICL$", row.names = TRUE)
|
||||
```
|
||||
|
||||
:::{.panel-tabset}
|
||||
|
||||
```{r write_tabs}
|
||||
#| warning: false
|
||||
#| output: asis
|
||||
|
||||
# Generate content for each model using knit_expand
|
||||
for (clustering_idx in seq_len(length(list_clustering))) {
|
||||
clustering <- list_clustering[clustering_idx]
|
||||
model <- names(clustering)
|
||||
expanded_content <- knitr::knit_expand(file = file.path(root_app_folder, "base_analysis.qmd"), clustering = clustering, model = model)
|
||||
res <- knitr::knit_child(text = expanded_content, quiet = TRUE)
|
||||
cat(res, sep = "\n")
|
||||
cat("\n")
|
||||
}
|
||||
```
|
||||
|
||||
:::
|
||||
|
|
@ -0,0 +1,156 @@
|
|||
---
|
||||
format: html
|
||||
title: Clustering avec `colSBM` des données Doré et al. seulement sur les larges réseaux
|
||||
execute:
|
||||
echo: false
|
||||
warning: false
|
||||
---
|
||||
|
||||
|
||||
# Analyse
|
||||
|
||||
|
||||
```{r}
|
||||
library(colSBM)
|
||||
library(here)
|
||||
library(dplyr)
|
||||
library(stringr)
|
||||
```
|
||||
|
||||
```{r}
|
||||
root_app_folder <- file.path(here(), "code", "applications")
|
||||
source(file.path(root_app_folder, "utils.R"))
|
||||
data_folder <- file.path(here(), "code", "results", "applications", "dore_no_small")
|
||||
|
||||
# Full data
|
||||
list_full_files <- file.path(
|
||||
data_folder,
|
||||
list.files(data_folder,
|
||||
pattern = "full"
|
||||
)
|
||||
)
|
||||
names(list_full_files) <- stringr::str_extract(
|
||||
string = list_full_files,
|
||||
pattern = "(iid|pirho|pi|rho)"
|
||||
)
|
||||
|
||||
lapply(seq_len(length(list_full_files)), function(idx) {
|
||||
min_size <- readRDS(list_full_files[[idx]])[[1]]
|
||||
clustering <- readRDS(list_full_files[[idx]])[[2]]
|
||||
|
||||
min_filepath <- gsub("full_", "min_size_", list_full_files[[idx]])
|
||||
clustering_filepath <- gsub("full_", "clustering_", list_full_files[[idx]])
|
||||
|
||||
saveRDS(min_size, min_filepath)
|
||||
saveRDS(clustering, clustering_filepath)
|
||||
})
|
||||
|
||||
```
|
||||
|
||||
```{r}
|
||||
#| output: false
|
||||
files_vec <- get_recent_files(data_folder, pattern = "clustering")
|
||||
min_vec <- get_recent_files(data_folder, pattern = "min")
|
||||
files_vec <- identify_models(files_vec)
|
||||
min_vec <- identify_models(min_vec)
|
||||
|
||||
# Importing data
|
||||
base_data_folder <- file.path(here(), "code", "data", "dore")
|
||||
|
||||
interaction_data <- read.table(file = file.path(base_data_folder, "interaction-data.txt"), sep = "\t", header = TRUE)
|
||||
|
||||
seq_ids_network_aggreg <- unique(interaction_data$id_network_aggreg)
|
||||
names_aggreg_networks <- sapply(
|
||||
seq_ids_network_aggreg,
|
||||
function(id) {
|
||||
paste0(
|
||||
unique(interaction_data[which(interaction_data$id_network_aggreg == id), ]$web),
|
||||
collapse = "+"
|
||||
)
|
||||
}
|
||||
)
|
||||
# Computation of incidence matrices
|
||||
incidence_matrices <- lapply(
|
||||
seq_ids_network_aggreg,
|
||||
function(m) {
|
||||
current_interaction_data <- interaction_data[which(interaction_data$id_network_aggreg == m), ] %>%
|
||||
mutate(
|
||||
plantaggreg = paste(plantorder,
|
||||
plantfamily, plantgenus, plantspecies,
|
||||
sep = "-"
|
||||
),
|
||||
insectaggreg = paste(insectorder,
|
||||
insectfamily, insectgenus, insectspecies,
|
||||
sep = "-"
|
||||
)
|
||||
)
|
||||
current_interaction_data <- table(current_interaction_data$plantaggreg, current_interaction_data$insectaggreg)
|
||||
|
||||
current_incidence_matrix <- matrix(current_interaction_data,
|
||||
ncol = ncol(current_interaction_data), dimnames = dimnames(current_interaction_data)
|
||||
)
|
||||
|
||||
current_incidence_matrix[which(current_incidence_matrix > 0)] <- 1
|
||||
return(current_incidence_matrix)
|
||||
}
|
||||
)
|
||||
|
||||
names(incidence_matrices) <- names_aggreg_networks
|
||||
|
||||
min_sizes <- lapply(min_vec, function(file) readRDS(file))
|
||||
nb_networks <- sapply(min_sizes, function(min_size) {
|
||||
length(incidence_matrices[
|
||||
(colSums(sapply(incidence_matrices, dim) > min_size) == 2L)
|
||||
])
|
||||
})
|
||||
min_sizes_df <- do.call(rbind.data.frame, min_sizes)
|
||||
min_sizes_df <- cbind(min_sizes_df, nb_networks)
|
||||
colnames(min_sizes_df) <- c("nr >", "nc >", "nb_networks")
|
||||
|
||||
list_clustering <- files_vec
|
||||
# list_collection <- readRDS(file.path(data_folder, "dore_collection_iid_24-05-24_18-07-50.Rds"))
|
||||
```
|
||||
|
||||
|
||||
Les données ont été filtrées pour retenir les réseaux de taille supérieure à
|
||||
celles du tableau ci-dessous, aboutissant au nombre de réseaux de la colonne de
|
||||
droite :
|
||||
`r knitr::kable(min_sizes_df, caption = "Table des tailles minimales retenues par modèle")`
|
||||
|
||||
## Analyse par modèle
|
||||
|
||||
Les clustering donne le critère suivant :
|
||||
|
||||
```{r}
|
||||
vec_bicl <- sapply(list_clustering, function(clustering) {
|
||||
list_collection <- readRDS(clustering)
|
||||
unlisted_best_partition <- unlist(
|
||||
extract_best_bipartite_partition(list_collection)
|
||||
)
|
||||
BICL <- sum(sapply(unlisted_best_partition, function(col) col$BICL))
|
||||
BICL
|
||||
})
|
||||
knitr::kable(vec_bicl, caption = "BICL par modèle", col.names = "$BICL$", row.names = TRUE)
|
||||
```
|
||||
|
||||
|
||||
:::{.panel-tabset}
|
||||
|
||||
|
||||
```{r write_tabs}
|
||||
#| warning: false
|
||||
#| output: asis
|
||||
|
||||
# Generate content for each model using knit_expand
|
||||
for (clustering_idx in seq_len(length(list_clustering))) {
|
||||
clustering <- list_clustering[clustering_idx]
|
||||
model <- names(clustering)
|
||||
expanded_content <- knitr::knit_expand(file = file.path(root_app_folder, "base_analysis.qmd"), clustering = clustering, model = model)
|
||||
res <- knitr::knit_child(text = expanded_content, quiet = TRUE)
|
||||
cat(res, sep = "\n")
|
||||
cat("\n")
|
||||
}
|
||||
```
|
||||
|
||||
|
||||
:::
|
||||
File diff suppressed because one or more lines are too long
|
|
@ -1,115 +1,59 @@
|
|||
---
|
||||
title: Analysis of herbivores data
|
||||
format: html
|
||||
title: Clustering avec `colSBM` des données herbivores
|
||||
execute:
|
||||
echo: false
|
||||
freeze: auto
|
||||
warning: false
|
||||
---
|
||||
|
||||
```{r import_data}
|
||||
# Analyse
|
||||
|
||||
```{r}
|
||||
library(colSBM)
|
||||
library(here)
|
||||
library(stringr)
|
||||
```
|
||||
|
||||
```{r}
|
||||
root_app_folder <- file.path(here(), "code", "applications")
|
||||
source(file.path(root_app_folder, "utils.R"))
|
||||
data_folder <- file.path(here(), "code", "results", "applications", "herbivores")
|
||||
files_vec <- get_recent_files(data_folder)
|
||||
filepath <- files_vec[1]
|
||||
list_collection <- readRDS(filepath)
|
||||
files_vec <- identify_models(files_vec)
|
||||
list_clustering <- files_vec
|
||||
# list_collection <- readRDS(file.path(data_folder, "dore_collection_iid_24-05-24_18-07-50.Rds"))
|
||||
```
|
||||
|
||||
```{r libs_and_data}
|
||||
library(here)
|
||||
## Analyse par modèle
|
||||
|
||||
library(colSBM)
|
||||
|
||||
library(dplyr)
|
||||
library(tidyr)
|
||||
|
||||
library(plotly)
|
||||
library(ggplot2)
|
||||
library(ggforce)
|
||||
library(ggrepel)
|
||||
library(latex2exp)
|
||||
```
|
||||
```{r dataformatting}
|
||||
unlisted_best_partition <- unlist(extract_best_bipartite_partition(list_collection))
|
||||
|
||||
graph_size_df <- build_graph_size_dataframe(unlisted_best_partition)
|
||||
summarized_graph_size_df <- graph_size_df %>%
|
||||
group_by(collection_id) %>%
|
||||
select(-net_id) %>%
|
||||
summarise(
|
||||
M = n(),
|
||||
nr_mean = mean(nr),
|
||||
nr_sd = ifelse(length(nr) > 1, sd(nr), 0),
|
||||
nc_mean = mean(nc),
|
||||
nc_sd = ifelse(length(nc) > 1, sd(nc), 0),
|
||||
Qr = first(Qr),
|
||||
Qc = first(Qc)
|
||||
Les clustering donne le critère suivant :
|
||||
```{r}
|
||||
vec_bicl <- sapply(list_clustering, function(clustering) {
|
||||
list_collection <- readRDS(clustering)
|
||||
unlisted_best_partition <- unlist(
|
||||
extract_best_bipartite_partition(list_collection)
|
||||
)
|
||||
```
|
||||
### Analyse des tailles de cluster
|
||||
```{r collection_blocks_plot_size}
|
||||
#| layout-ncol: 2
|
||||
#| plotly: true
|
||||
filtered_summarized_graph_size_df <- summarized_graph_size_df
|
||||
ggplot(filtered_summarized_graph_size_df) +
|
||||
aes(x = Qc, y = Qr, color = collection_id, label = M) +
|
||||
geom_point() + # , position = position_jitter(h = 0.15, w = 0.05)
|
||||
geom_text_repel() +
|
||||
xlab(TeX("$Q_c$")) +
|
||||
ylab(TeX("$Q_r$")) +
|
||||
coord_fixed()
|
||||
|
||||
plot_ly(x = filtered_summarized_graph_size_df$Qc,
|
||||
y = filtered_summarized_graph_size_df$Qr,
|
||||
z = filtered_summarized_graph_size_df$M,
|
||||
type = "scatter3d", mode = "markers",
|
||||
color = filtered_summarized_graph_size_df$collection_id) %>%
|
||||
layout(scene = list(xaxis = list(title = "Q_c"),
|
||||
yaxis = list(title = "Q_r")))
|
||||
BICL <- sum(sapply(unlisted_best_partition, function(col) col$BICL))
|
||||
BICL
|
||||
})
|
||||
knitr::kable(vec_bicl, caption = "BICL par modèle", col.names = "$BICL$", row.names = TRUE)
|
||||
```
|
||||
|
||||
### Analyse des tailles de réseaux
|
||||
:::{.panel-tabset}
|
||||
|
||||
```{r size_plot}
|
||||
#| layout-ncol: 2
|
||||
ggplot(filtered_summarized_graph_size_df) +
|
||||
aes(x = nc_mean, y = nr_mean, color = collection_id) +
|
||||
geom_point(size = 4) + # , position = position_jitter(h = 0.15, w = 0.05)
|
||||
geom_ellipse(aes(
|
||||
x0 = nc_mean, y0 = nr_mean,
|
||||
a = 0.5 * nc_sd,
|
||||
b = 0.5 * nr_sd,
|
||||
fill = collection_id, angle = 0
|
||||
), alpha = 0.1) +
|
||||
geom_text_repel(aes(label = collection_id)) +
|
||||
xlab(TeX("$n_c$")) +
|
||||
ylab(TeX("$n_r$")) +
|
||||
coord_fixed() +
|
||||
ggtitle("Distribution des tailles moyennes et écart-types")
|
||||
```{r write_tabs}
|
||||
#| warning: false
|
||||
#| output: asis
|
||||
|
||||
max_nr <- 200
|
||||
max_nc <- 200
|
||||
|
||||
ggplot(filtered_summarized_graph_size_df %>% filter(nr_mean <= max_nr, nc_mean <= max_nc)) +
|
||||
aes(x = nc_mean, y = nr_mean, color = collection_id) +
|
||||
geom_point(size = 4) + # , position = position_jitter(h = 0.15, w = 0.05)
|
||||
geom_ellipse(aes(
|
||||
x0 = nc_mean, y0 = nr_mean,
|
||||
a = 0.5 * nc_sd,
|
||||
b = 0.5 * nr_sd,
|
||||
fill = collection_id, angle = 0
|
||||
), alpha = 0.1) +
|
||||
geom_text_repel(aes(label = collection_id)) +
|
||||
xlab(TeX("$n_c$")) +
|
||||
ylab(TeX("$n_r$")) +
|
||||
coord_fixed() +
|
||||
ggtitle(TeX(paste0("Distribution des tailles moyennes et écart-types,", "$n_r <=", max_nr, "$", " et ", "$n_c <=", max_nc, "$")))
|
||||
|
||||
filtered_graph_size <- graph_size_df %>% filter(M > 1)
|
||||
ggplot(filtered_graph_size) +
|
||||
aes(x = collection_id, y = nr, fill = collection_id) +
|
||||
geom_boxplot()
|
||||
ggplot(filtered_graph_size) +
|
||||
aes(x = collection_id, y = nc, fill = collection_id) +
|
||||
geom_boxplot()
|
||||
# Generate content for each model using knit_expand
|
||||
for (clustering_idx in seq_len(length(list_clustering))) {
|
||||
clustering <- list_clustering[clustering_idx]
|
||||
model <- names(clustering)
|
||||
expanded_content <- knitr::knit_expand(file = file.path(root_app_folder, "base_analysis.qmd"), clustering = clustering, model = model)
|
||||
res <- knitr::knit_child(text = expanded_content, quiet = TRUE)
|
||||
cat(res, sep = "\n")
|
||||
cat("\n")
|
||||
}
|
||||
```
|
||||
|
||||
:::
|
||||
|
|
@ -7,12 +7,12 @@
|
|||
#' warning if the number wanted `n` is larger than the number of files.
|
||||
#'
|
||||
#' @return A vector of size `n` with the file path.
|
||||
get_recent_files <- function(data_folder, n = 4) {
|
||||
get_recent_files <- function(data_folder, n = 4, pattern = NULL) {
|
||||
files_info <- file.info(file.path(data_folder, list.files(data_folder,
|
||||
include.dirs = FALSE, pattern = "*dore_collection_[a-z]*_seed"
|
||||
include.dirs = FALSE, pattern = pattern
|
||||
)))
|
||||
files_info[["filepath"]] <- file.path(data_folder, list.files(data_folder,
|
||||
include.dirs = FALSE, pattern = "*dore_collection_[a-z]*_seed"
|
||||
include.dirs = FALSE, pattern = pattern
|
||||
))
|
||||
files_info <- sort_by(files_info, files_info[["ctime"]], decreasing = TRUE)
|
||||
if (n > nrow(files_info)) {
|
||||
|
|
@ -48,3 +48,15 @@ build_graph_size_dataframe <- function(collection_list) {
|
|||
)
|
||||
}))
|
||||
}
|
||||
|
||||
extract_clustering <- function(clustering) {
|
||||
partition <- colSBM::extract_best_bipartite_partition(
|
||||
l = clustering,
|
||||
unnest = TRUE
|
||||
)
|
||||
unlist(sapply(seq_len(length(partition)), function(idx) {
|
||||
clust_vec <- rep(idx, partition[[idx]][["M"]])
|
||||
names(clust_vec) <- partition[[idx]][["net_id"]]
|
||||
clust_vec
|
||||
}))
|
||||
}
|
||||
|
|
|
|||
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Loading…
Add table
Reference in a new issue