526 lines
16 KiB
Text
526 lines
16 KiB
Text
```{r, setup, include=FALSE, warning=FALSE}
|
|
knitr::opts_chunk$set(echo = FALSE)
|
|
```
|
|
|
|
```{r require_lib, echo = FALSE, include=FALSE, warning=FALSE}
|
|
require("tidyverse")
|
|
require("knitr")
|
|
require("colSBM")
|
|
source("temporary_plot.R")
|
|
```
|
|
|
|
```{r pretty_matrix_print, echo = FALSE, warning=FALSE}
|
|
# Define a generic method that transforms an object x in a LaTeX string
|
|
as_latex <- function(x, ...) {
|
|
UseMethod("as_latex", x)
|
|
}
|
|
|
|
# Define a class latex for LaTeX expressions
|
|
as_latex.character <- function(x) {
|
|
structure(
|
|
paste(x, collapse = " "),
|
|
class = c("latex", "character")
|
|
)
|
|
}
|
|
|
|
# A character string of class latex is rendered in display mode
|
|
# Define a knit_print() method for the latex class
|
|
knit_print.latex <- function(x, ...) {
|
|
knitr::asis_output(
|
|
paste0("$$", x, "$$")
|
|
)
|
|
}
|
|
|
|
# Now, define a method as_latex for matrix
|
|
as_latex.matrix <- function(x, ...) {
|
|
as_latex(c(
|
|
"\\begin{pmatrix}",
|
|
paste(
|
|
t(x),
|
|
rep(c(rep("&", nrow(x) - 1), "\\\\"), ncol(x)),
|
|
collapse = ""
|
|
),
|
|
"\\end{pmatrix}"
|
|
))
|
|
}
|
|
|
|
# Indicate to knitr that matrix are rendered as latex
|
|
knit_print.matrix <- function(x, ...) {
|
|
knitr::knit_print(as_latex(round(x, 2)))
|
|
}
|
|
```
|
|
|
|
```{r better_collection_extraction, echo = FALSE, warning=FALSE}
|
|
extract_unlist_reorder <- function(clustering_data_path) {
|
|
clustering <- readRDS(clustering_data_path)
|
|
if (!is.list(clustering)) {
|
|
clustering <- list(clustering)
|
|
}
|
|
best_partition <- extract_best_bipartite_partition(clustering)
|
|
best_partition_unlist <- unlist(best_partition)
|
|
if (!is.list(best_partition_unlist)) {
|
|
best_partition_unlist <- list(best_partition_unlist)
|
|
}
|
|
size <- length(best_partition_unlist)
|
|
return(setNames(lapply(best_partition_unlist, function(collection) {
|
|
reorder_parameters(collection)
|
|
}), paste(rep("Collection", size), seq_len(size))))
|
|
}
|
|
|
|
meso_print <- function(unlisted_partition) {
|
|
for (idx in seq_along(unlisted_partition)) {
|
|
cat(paste("\\subsubsection{Pour la collection", idx, "}"))
|
|
print(plot(unlisted_partition[[idx]], type = "meso", mixture = TRUE))
|
|
cat("\\newline \\tiny")
|
|
print(knitr::kable(unlisted_partition[[idx]]$net_id,
|
|
col.names = "Networks",
|
|
format = "latex",
|
|
position = "!h",
|
|
booktabs = TRUE
|
|
))
|
|
cat("\\normalsize\\newline")
|
|
cat(knitr::knit_print(unlisted_partition[[idx]]$alpha))
|
|
}
|
|
}
|
|
|
|
alpha_print <- function(unlisted_partition) {
|
|
for (idx in seq_along(unlisted_partition)) {
|
|
cat(paste("\nPour la collection ", idx, ":\n"))
|
|
cat(knitr::knit_print(unlisted_partition[[idx]]$alpha))
|
|
}
|
|
}
|
|
```
|
|
|
|
```{r taxonomy_functions, echo = FALSE, warning=FALSE}
|
|
interaction_data <- read.table(file = "data/interaction-data.txt", sep = "\t", header = TRUE)
|
|
|
|
insect_orders <- unique(interaction_data$insectorder)
|
|
plant_family <- unique(interaction_data$plantorder)
|
|
|
|
insect_orders[is.na(insect_orders)] <- "NA"
|
|
plant_family[is.na(plant_family)] <- "NA"
|
|
### Matching taxonomy
|
|
taxonomy_in_clusters <- function(unlisted_model) {
|
|
if (is.list(unlisted_model)) {
|
|
lapply(seq_len(length(unlisted_model)), function(col_idx) {
|
|
# Per collection
|
|
# Empty init
|
|
insect_count <- t(sapply(insect_orders, function(order) {
|
|
out_count <- rep(0, unlisted_model[[col_idx]]$Q[2])
|
|
out_count
|
|
}))
|
|
|
|
plant_count <- t(sapply(plant_family, function(order) {
|
|
out_count <- rep(0, unlisted_model[[col_idx]]$Q[1])
|
|
out_count
|
|
}))
|
|
|
|
for (m in seq.int(unlisted_model[[col_idx]]$M)) {
|
|
#### Insect
|
|
insect_names <- names(unlisted_model[[col_idx]]$Z[[1]][[2]])
|
|
|
|
insect_count <- insect_count + t(sapply(insect_orders, function(order) {
|
|
out_count <- rep(0, unlisted_model[[col_idx]]$Q[2])
|
|
names(out_count) <- seq.int(unlisted_model[[col_idx]]$Q[2])
|
|
insect_count <- table(unlisted_model[[col_idx]]$Z[[m]][[2]][grep(order, insect_names)])
|
|
out_count[names(insect_count)] <- insect_count
|
|
out_count
|
|
}))
|
|
#### Plants
|
|
plant_names <- names(unlisted_model[[col_idx]]$Z[[1]][[1]])
|
|
|
|
plant_count <- t(sapply(plant_family, function(order) {
|
|
out_count <- rep(0, unlisted_model[[col_idx]]$Q[1])
|
|
names(out_count) <- seq.int(unlisted_model[[col_idx]]$Q[1])
|
|
plant_count <- table(unlisted_model[[col_idx]]$Z[[m]][[1]][grep(order, plant_names)])
|
|
out_count[names(plant_count)] <- plant_count
|
|
out_count
|
|
}))
|
|
}
|
|
return(list(insects = insect_count, plants = plant_count))
|
|
})
|
|
} else {
|
|
# Per collection
|
|
# Empty init
|
|
insect_count <- t(sapply(insect_orders, function(order) {
|
|
out_count <- rep(0, unlisted_model$Q[2])
|
|
out_count
|
|
}))
|
|
|
|
plant_count <- t(sapply(plant_family, function(order) {
|
|
out_count <- rep(0, unlisted_model$Q[1])
|
|
out_count
|
|
}))
|
|
|
|
for (m in seq.int(unlisted_model$M)) {
|
|
#### Insect
|
|
insect_names <- names(unlisted_model$Z[[1]][[2]])
|
|
|
|
insect_count <- insect_count + t(sapply(insect_orders, function(order) {
|
|
out_count <- rep(0, unlisted_model$Q[2])
|
|
names(out_count) <- seq.int(unlisted_model$Q[2])
|
|
insect_count <- table(unlisted_model$Z[[m]][[2]][grep(order, insect_names)])
|
|
out_count[names(insect_count)] <- insect_count
|
|
out_count
|
|
}))
|
|
#### Plants
|
|
plant_names <- names(unlisted_model$Z[[1]][[1]])
|
|
|
|
plant_count <- t(sapply(plant_family, function(order) {
|
|
out_count <- rep(0, unlisted_model$Q[1])
|
|
names(out_count) <- seq.int(unlisted_model$Q[1])
|
|
plant_count <- table(unlisted_model$Z[[m]][[1]][grep(order, plant_names)])
|
|
out_count[names(plant_count)] <- plant_count
|
|
out_count
|
|
}))
|
|
}
|
|
return(list(list(insects = insect_count, plants = plant_count)))
|
|
}
|
|
}
|
|
|
|
taxonomy_remove_empty <- function(taxonomy_collections_list) {
|
|
lapply(taxonomy_collections_list, function(collection) {
|
|
list(
|
|
insects = collection$insects[which(rowSums(collection$insects != 0) > 0), ],
|
|
plants = collection$plants[which(rowSums(collection$plants != 0) > 0), ]
|
|
)
|
|
})
|
|
}
|
|
|
|
get_formatted_data <- function(collection, group, max_rank = 6) {
|
|
collection[[group]] %>%
|
|
as.data.frame() %>% # Transformation en data frame
|
|
mutate(
|
|
Total = rowSums(.),
|
|
Rang = rank(-Total, ties.method = "min")
|
|
) %>% # Creation d'une colonne Total
|
|
rownames_to_column(var = "TaxonBrut") %>%
|
|
mutate(Taxon = ifelse(Rang <= max_rank & Total > 0, TaxonBrut, "Other")) %>%
|
|
arrange(Rang) %>%
|
|
mutate(Taxon = factor(Taxon, levels = unique(Taxon))) %>%
|
|
select(-Total, -TaxonBrut, -Rang) %>%
|
|
pivot_longer(
|
|
cols = -c("Taxon"),
|
|
names_to = "Bloc",
|
|
values_to = "Nombre",
|
|
names_prefix = "V"
|
|
) %>%
|
|
group_by(Taxon, Bloc) %>%
|
|
summarise_all(sum) %>%
|
|
ungroup() %>%
|
|
group_by(Bloc) %>%
|
|
mutate(Proportion = Nombre / sum(Nombre)) %>%
|
|
ungroup() %>%
|
|
mutate(Group = group)
|
|
}
|
|
|
|
taxonomy_plot <- function(data, insects_or_plants, model, stack_or_fill) {
|
|
plots <- filter(data, Group == insects_or_plants) %>%
|
|
ggplot(aes(x = Bloc, y = Nombre, fill = Taxon)) +
|
|
geom_bar(stat = "identity", position = stack_or_fill) +
|
|
labs(x = "Block", y = "Number of Nodes", fill = "Taxonomy") +
|
|
theme_minimal() +
|
|
theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 1)) +
|
|
ggtitle(paste(
|
|
ifelse(insects_or_plants == "insects",
|
|
"Pollinators", "Plants"
|
|
), "repartition (",
|
|
ifelse(stack_or_fill == "stack", "absolute", "proportion"),
|
|
") in the", model, "clustering"
|
|
)) +
|
|
facet_wrap(~Collection, ncol = 3, scales = "free_x")
|
|
|
|
# Arrange the plots in a grid layout
|
|
gridExtra::grid.arrange(plots, newpage = TRUE)
|
|
}
|
|
```
|
|
|
|
```{r load data, echo = FALSE, include = FALSE, warning=FALSE}
|
|
# All results
|
|
iid_unlist <- extract_unlist_reorder("data/dore_collection_clustering_nb_run1_iid_123networks_24-05-23-21:40:42.Rds")
|
|
|
|
rho_unlist <- extract_unlist_reorder("data/dore_collection_clustering_nb_run1_rho_123networks_25-05-23-13:58:30.Rds")
|
|
|
|
pi_unlist <- extract_unlist_reorder("data/dore_collection_clustering_nb_run1_pi_123networks_25-05-23-17:31:25.Rds")
|
|
|
|
pirho_unlist <- extract_unlist_reorder("data/dore_collection_clustering_nb_run1_pirho_123networks_26-05-23-19:22:55.Rds")
|
|
```
|
|
# Application to \cite{doreRelativeEffectsAnthropogenic2021} data
|
|
\label{sec:application-to-dorerelativeeffectsanthropogenic2021-data}
|
|
|
|
## Clustering with model iid
|
|
With the *iid-colBiSBM* we obtain `r length(iid_unlist)` collections with the
|
|
following structures:
|
|
|
|
```{r iid_meso_plot, echo = FALSE, message=FALSE, results="asis", warning=FALSE}
|
|
#| fig.cap=paste(names(iid_unlist), rep("- iid",length(iid_unlist))),
|
|
#| fig.asp = 0.5,
|
|
#| dpi = 300
|
|
meso_print(iid_unlist)
|
|
```
|
|
|
|
Et voici donc les valeurs numériques pour les $\alpha$ (paramètres de connectivité).
|
|
|
|
```{r iid_alpha, echo = FALSE, results="asis", warning=FALSE}
|
|
alpha_print(iid_unlist)
|
|
```
|
|
### Comparaison avec des infos supplémentaires
|
|
```{r supinfo, echo = FALSE}
|
|
supinfo <- readxl::read_xlsx("data/supinfo.xlsx", sheet = 2)
|
|
interaction_data <- read.table(file = "data/interaction-data.txt", sep = "\t", header = TRUE)
|
|
|
|
seq_ids_network_aggreg <- unique(interaction_data$id_network_aggreg)
|
|
incidence_matrices <- readRDS(file = "data/dore-matrices.Rds")
|
|
names_aggreg_networks <- names(incidence_matrices)
|
|
vectorClusteringNet <- numeric(nrow(supinfo))
|
|
for (k in 1:length(iid_unlist)) {
|
|
idclust <- match(iid_unlist[[k]]$net_id, names_aggreg_networks)
|
|
supinfoclust <- match(seq_ids_network_aggreg[idclust], supinfo$Idweb)
|
|
vectorClusteringNet[supinfoclust] <- k
|
|
}
|
|
```
|
|
|
|
```{r Annual_timespan_plot, echo = FALSE}
|
|
ggplot(supinfo) +
|
|
aes(
|
|
y = Annual_time_span,
|
|
x = vectorClusteringNet, group = vectorClusteringNet, fill = as.factor(vectorClusteringNet)
|
|
) +
|
|
xlab("Numéro de collection") +
|
|
ylab("Annual time span") +
|
|
guides(fill = guide_legend(title = "Numéro\nde collection")) +
|
|
geom_boxplot()
|
|
```
|
|
|
|
### Répartition dans les clusters selon la taxonomie
|
|
```{r iid_taxonomy, echo = FALSE, warning=FALSE}
|
|
iid_taxonomy <- taxonomy_in_clusters(iid_unlist)
|
|
|
|
iid_taxonomy_long <- map_dfr(iid_taxonomy,
|
|
function(collection) {
|
|
map_dfr(
|
|
c("insects", "plants"),
|
|
function(group) {
|
|
get_formatted_data(collection, group)
|
|
}
|
|
)
|
|
},
|
|
.id = "Collection"
|
|
)
|
|
```
|
|
|
|
```{r iid_plot_taxonomy_pollinators, echo = FALSE, message = FALSE,fig.cap = 'Pollinators repartition for the iid model regarding taxonomy', warning=FALSE}
|
|
# Pollinators
|
|
taxonomy_plot(
|
|
data = iid_taxonomy_long,
|
|
insects_or_plants = "insects",
|
|
stack_or_fill = "stack", model = "iid"
|
|
)
|
|
taxonomy_plot(
|
|
data = iid_taxonomy_long,
|
|
insects_or_plants = "insects",
|
|
stack_or_fill = "fill", model = "iid"
|
|
)
|
|
```
|
|
|
|
```{r iid_plot_taxonomy_plants, echo = FALSE, message = FALSE,fig.cap = 'Plants repartition for the iid model regarding taxonomy', warning=FALSE}
|
|
taxonomy_plot(
|
|
data = iid_taxonomy_long,
|
|
insects_or_plants = "plants",
|
|
stack_or_fill = "stack", model = "iid"
|
|
)
|
|
taxonomy_plot(
|
|
data = iid_taxonomy_long,
|
|
insects_or_plants = "plants",
|
|
stack_or_fill = "fill", model = "iid"
|
|
)
|
|
```
|
|
|
|
#### Tables
|
|
```{r iid_taxo_tables, echo = FALSE}
|
|
iid_taxonomy_long %>%
|
|
filter(Group == "insects") %>%
|
|
group_by(Taxon) %>%
|
|
mutate(row_id = row_number()) %>%
|
|
pivot_wider(names_from = c(Collection, Bloc), values_from = Nombre, names_glue = "Collection_{Collection}_Bloc_{Bloc}") %>%
|
|
ungroup() %>%
|
|
select(-row_id) %>%
|
|
group_by(Taxon) %>%
|
|
summarize(across(starts_with("Collection"), ~ na.omit(.)[1])) %>%
|
|
ungroup()
|
|
```
|
|
|
|
|
|
## Clustering with model pi
|
|
Avec le modèle *pi* nous obtenons les `r length(pi_unlist)` collections et
|
|
les structures suivantes:
|
|
|
|
```{r pi_meso_plot, echo = FALSE, message=FALSE, fig.cap=paste(names(pi_unlist), rep("- pi",length(pi_unlist))), results="asis", warning=FALSE}
|
|
meso_print(pi_unlist)
|
|
```
|
|
|
|
Et voici donc les valeurs numériques pour les $\alpha$ (paramètres de connectivité).
|
|
|
|
```{r pi_alpha, echo = FALSE, results="asis", warning=FALSE}
|
|
alpha_print(pi_unlist)
|
|
```
|
|
|
|
### Répartition dans les clusters selon la taxonomie
|
|
```{r pi_taxonomy, echo = FALSE, warning=FALSE}
|
|
pi_taxonomy <- taxonomy_in_clusters(pi_unlist)
|
|
|
|
pi_taxonomy_long <- map_dfr(pi_taxonomy,
|
|
function(collection) {
|
|
map_dfr(
|
|
c("insects", "plants"),
|
|
function(group) {
|
|
get_formatted_data(collection, group)
|
|
}
|
|
)
|
|
},
|
|
.id = "Collection"
|
|
)
|
|
```
|
|
|
|
```{r pi_plot_taxonomy_pollinators, echo = FALSE, message = FALSE,fig.cap = 'Pollinators repartition for the pi model regarding taxonomy', warning=FALSE}
|
|
# Pollinators
|
|
taxonomy_plot(
|
|
data = pi_taxonomy_long,
|
|
insects_or_plants = "insects",
|
|
stack_or_fill = "stack", model = "pi"
|
|
)
|
|
taxonomy_plot(
|
|
data = pi_taxonomy_long,
|
|
insects_or_plants = "insects",
|
|
stack_or_fill = "fill", model = "pi"
|
|
)
|
|
```
|
|
|
|
```{r pi_plot_taxonomy_plants, echo = FALSE, message = FALSE,fig.cap = 'Plants repartition for the pi model regarding taxonomy', warning=FALSE}
|
|
taxonomy_plot(
|
|
data = pi_taxonomy_long,
|
|
insects_or_plants = "plants",
|
|
stack_or_fill = "stack", model = "pi"
|
|
)
|
|
taxonomy_plot(
|
|
data = pi_taxonomy_long,
|
|
insects_or_plants = "plants",
|
|
stack_or_fill = "fill", model = "pi"
|
|
)
|
|
```
|
|
|
|
## Clustering with model rho
|
|
Avec le modèle *rho* nous obtenons les `r length(rho_unlist)` collections et
|
|
les structures suivantes:
|
|
|
|
```{r rho_meso_plot, echo = FALSE, message=FALSE, fig.cap=paste(names(rho_unlist), rep("- rho",length(rho_unlist))), results="asis", warning=FALSE}
|
|
meso_print(rho_unlist)
|
|
```
|
|
|
|
Et voici donc les valeurs numériques pour les $\alpha$ (paramètres de connectivité).
|
|
|
|
```{r rho_alpha, echo = FALSE, results="asis", warning=FALSE}
|
|
alpha_print(rho_unlist)
|
|
```
|
|
|
|
### Répartition dans les clusters selon la taxonomie
|
|
```{r rho_taxonomy, echo = FALSE, warning=FALSE}
|
|
rho_taxonomy <- taxonomy_in_clusters(rho_unlist)
|
|
|
|
rho_taxonomy_long <- map_dfr(rho_taxonomy,
|
|
function(collection) {
|
|
map_dfr(
|
|
c("insects", "plants"),
|
|
function(group) {
|
|
get_formatted_data(collection, group)
|
|
}
|
|
)
|
|
},
|
|
.id = "Collection"
|
|
)
|
|
```
|
|
|
|
```{r rho_plot_taxonomy_pollinators, echo = FALSE, message = FALSE,fig.cap = 'Pollinators repartition for the rho model regarding taxonomy', warning=FALSE}
|
|
# Pollinators
|
|
taxonomy_plot(
|
|
data = rho_taxonomy_long,
|
|
insects_or_plants = "insects",
|
|
stack_or_fill = "stack", model = "rho"
|
|
)
|
|
taxonomy_plot(
|
|
data = rho_taxonomy_long,
|
|
insects_or_plants = "insects",
|
|
stack_or_fill = "fill", model = "rho"
|
|
)
|
|
```
|
|
|
|
```{r rho_plot_taxonomy_plants, echo = FALSE, message = FALSE,fig.cap = 'Plants repartition for the rho model regarding taxonomy', warning=FALSE}
|
|
taxonomy_plot(
|
|
data = rho_taxonomy_long,
|
|
insects_or_plants = "plants",
|
|
stack_or_fill = "stack", model = "rho"
|
|
)
|
|
taxonomy_plot(
|
|
data = rho_taxonomy_long,
|
|
insects_or_plants = "plants",
|
|
stack_or_fill = "fill", model = "rho"
|
|
)
|
|
```
|
|
|
|
## Clustering with model pirho
|
|
Avec le modèle *pirho* nous obtenons les `r length(pirho_unlist)` collections et
|
|
les structures suivantes:
|
|
|
|
```{r pirho_meso_plot, echo = FALSE, message=FALSE, fig.cap=paste(names(pirho_unlist), rep("- pirho",length(pirho_unlist))), results="asis", warning=FALSE}
|
|
meso_print(pirho_unlist)
|
|
```
|
|
|
|
Et voici donc les valeurs numériques pour les $\alpha$ (paramètres de connectivité).
|
|
|
|
```{r pirho_alpha, echo = FALSE, results="asis", warning=FALSE}
|
|
alpha_print(pirho_unlist)
|
|
```
|
|
|
|
### Répartition dans les clusters selon la taxonomie
|
|
```{r pirho_taxonomy, echo = FALSE, warning=FALSE}
|
|
pirho_taxonomy <- taxonomy_in_clusters(pirho_unlist)
|
|
|
|
pirho_taxonomy_long <- map_dfr(pirho_taxonomy,
|
|
function(collection) {
|
|
map_dfr(
|
|
c("insects", "plants"),
|
|
function(group) {
|
|
get_formatted_data(collection, group)
|
|
}
|
|
)
|
|
},
|
|
.id = "Collection"
|
|
)
|
|
```
|
|
|
|
```{r pirho_plot_taxonomy_pollinators, echo = FALSE, message = FALSE, fig.cap = 'Pollinators repartition for the pirho model regarding taxonomy', warning=FALSE}
|
|
# Pollinators
|
|
taxonomy_plot(
|
|
data = pirho_taxonomy_long,
|
|
insects_or_plants = "insects",
|
|
stack_or_fill = "stack", model = "pirho"
|
|
)
|
|
taxonomy_plot(
|
|
data = pirho_taxonomy_long,
|
|
insects_or_plants = "insects",
|
|
stack_or_fill = "fill", model = "pirho"
|
|
)
|
|
```
|
|
|
|
```{r pirho_plot_taxonomy_plants, echo = FALSE, message = FALSE,fig.cap = 'Plants repartition for the pirho model regarding taxonomy', warning=FALSE}
|
|
taxonomy_plot(
|
|
data = pirho_taxonomy_long,
|
|
insects_or_plants = "plants",
|
|
stack_or_fill = "stack", model = "pirho"
|
|
)
|
|
taxonomy_plot(
|
|
data = pirho_taxonomy_long,
|
|
insects_or_plants = "plants",
|
|
stack_or_fill = "fill", model = "pirho"
|
|
)
|
|
```
|