mia-rapport-2024/Rcodes/real_data/presentation_dore.Rmd
2024-06-28 10:49:49 +02:00

526 lines
16 KiB
Text

```{r, setup, include=FALSE, warning=FALSE}
knitr::opts_chunk$set(echo = FALSE)
```
```{r require_lib, echo = FALSE, include=FALSE, warning=FALSE}
require("tidyverse")
require("knitr")
require("colSBM")
source("temporary_plot.R")
```
```{r pretty_matrix_print, echo = FALSE, warning=FALSE}
# Define a generic method that transforms an object x in a LaTeX string
as_latex <- function(x, ...) {
UseMethod("as_latex", x)
}
# Define a class latex for LaTeX expressions
as_latex.character <- function(x) {
structure(
paste(x, collapse = " "),
class = c("latex", "character")
)
}
# A character string of class latex is rendered in display mode
# Define a knit_print() method for the latex class
knit_print.latex <- function(x, ...) {
knitr::asis_output(
paste0("$$", x, "$$")
)
}
# Now, define a method as_latex for matrix
as_latex.matrix <- function(x, ...) {
as_latex(c(
"\\begin{pmatrix}",
paste(
t(x),
rep(c(rep("&", nrow(x) - 1), "\\\\"), ncol(x)),
collapse = ""
),
"\\end{pmatrix}"
))
}
# Indicate to knitr that matrix are rendered as latex
knit_print.matrix <- function(x, ...) {
knitr::knit_print(as_latex(round(x, 2)))
}
```
```{r better_collection_extraction, echo = FALSE, warning=FALSE}
extract_unlist_reorder <- function(clustering_data_path) {
clustering <- readRDS(clustering_data_path)
if (!is.list(clustering)) {
clustering <- list(clustering)
}
best_partition <- extract_best_bipartite_partition(clustering)
best_partition_unlist <- unlist(best_partition)
if (!is.list(best_partition_unlist)) {
best_partition_unlist <- list(best_partition_unlist)
}
size <- length(best_partition_unlist)
return(setNames(lapply(best_partition_unlist, function(collection) {
reorder_parameters(collection)
}), paste(rep("Collection", size), seq_len(size))))
}
meso_print <- function(unlisted_partition) {
for (idx in seq_along(unlisted_partition)) {
cat(paste("\\subsubsection{Pour la collection", idx, "}"))
print(plot(unlisted_partition[[idx]], type = "meso", mixture = TRUE))
cat("\\newline \\tiny")
print(knitr::kable(unlisted_partition[[idx]]$net_id,
col.names = "Networks",
format = "latex",
position = "!h",
booktabs = TRUE
))
cat("\\normalsize\\newline")
cat(knitr::knit_print(unlisted_partition[[idx]]$alpha))
}
}
alpha_print <- function(unlisted_partition) {
for (idx in seq_along(unlisted_partition)) {
cat(paste("\nPour la collection ", idx, ":\n"))
cat(knitr::knit_print(unlisted_partition[[idx]]$alpha))
}
}
```
```{r taxonomy_functions, echo = FALSE, warning=FALSE}
interaction_data <- read.table(file = "data/interaction-data.txt", sep = "\t", header = TRUE)
insect_orders <- unique(interaction_data$insectorder)
plant_family <- unique(interaction_data$plantorder)
insect_orders[is.na(insect_orders)] <- "NA"
plant_family[is.na(plant_family)] <- "NA"
### Matching taxonomy
taxonomy_in_clusters <- function(unlisted_model) {
if (is.list(unlisted_model)) {
lapply(seq_len(length(unlisted_model)), function(col_idx) {
# Per collection
# Empty init
insect_count <- t(sapply(insect_orders, function(order) {
out_count <- rep(0, unlisted_model[[col_idx]]$Q[2])
out_count
}))
plant_count <- t(sapply(plant_family, function(order) {
out_count <- rep(0, unlisted_model[[col_idx]]$Q[1])
out_count
}))
for (m in seq.int(unlisted_model[[col_idx]]$M)) {
#### Insect
insect_names <- names(unlisted_model[[col_idx]]$Z[[1]][[2]])
insect_count <- insect_count + t(sapply(insect_orders, function(order) {
out_count <- rep(0, unlisted_model[[col_idx]]$Q[2])
names(out_count) <- seq.int(unlisted_model[[col_idx]]$Q[2])
insect_count <- table(unlisted_model[[col_idx]]$Z[[m]][[2]][grep(order, insect_names)])
out_count[names(insect_count)] <- insect_count
out_count
}))
#### Plants
plant_names <- names(unlisted_model[[col_idx]]$Z[[1]][[1]])
plant_count <- t(sapply(plant_family, function(order) {
out_count <- rep(0, unlisted_model[[col_idx]]$Q[1])
names(out_count) <- seq.int(unlisted_model[[col_idx]]$Q[1])
plant_count <- table(unlisted_model[[col_idx]]$Z[[m]][[1]][grep(order, plant_names)])
out_count[names(plant_count)] <- plant_count
out_count
}))
}
return(list(insects = insect_count, plants = plant_count))
})
} else {
# Per collection
# Empty init
insect_count <- t(sapply(insect_orders, function(order) {
out_count <- rep(0, unlisted_model$Q[2])
out_count
}))
plant_count <- t(sapply(plant_family, function(order) {
out_count <- rep(0, unlisted_model$Q[1])
out_count
}))
for (m in seq.int(unlisted_model$M)) {
#### Insect
insect_names <- names(unlisted_model$Z[[1]][[2]])
insect_count <- insect_count + t(sapply(insect_orders, function(order) {
out_count <- rep(0, unlisted_model$Q[2])
names(out_count) <- seq.int(unlisted_model$Q[2])
insect_count <- table(unlisted_model$Z[[m]][[2]][grep(order, insect_names)])
out_count[names(insect_count)] <- insect_count
out_count
}))
#### Plants
plant_names <- names(unlisted_model$Z[[1]][[1]])
plant_count <- t(sapply(plant_family, function(order) {
out_count <- rep(0, unlisted_model$Q[1])
names(out_count) <- seq.int(unlisted_model$Q[1])
plant_count <- table(unlisted_model$Z[[m]][[1]][grep(order, plant_names)])
out_count[names(plant_count)] <- plant_count
out_count
}))
}
return(list(list(insects = insect_count, plants = plant_count)))
}
}
taxonomy_remove_empty <- function(taxonomy_collections_list) {
lapply(taxonomy_collections_list, function(collection) {
list(
insects = collection$insects[which(rowSums(collection$insects != 0) > 0), ],
plants = collection$plants[which(rowSums(collection$plants != 0) > 0), ]
)
})
}
get_formatted_data <- function(collection, group, max_rank = 6) {
collection[[group]] %>%
as.data.frame() %>% # Transformation en data frame
mutate(
Total = rowSums(.),
Rang = rank(-Total, ties.method = "min")
) %>% # Creation d'une colonne Total
rownames_to_column(var = "TaxonBrut") %>%
mutate(Taxon = ifelse(Rang <= max_rank & Total > 0, TaxonBrut, "Other")) %>%
arrange(Rang) %>%
mutate(Taxon = factor(Taxon, levels = unique(Taxon))) %>%
select(-Total, -TaxonBrut, -Rang) %>%
pivot_longer(
cols = -c("Taxon"),
names_to = "Bloc",
values_to = "Nombre",
names_prefix = "V"
) %>%
group_by(Taxon, Bloc) %>%
summarise_all(sum) %>%
ungroup() %>%
group_by(Bloc) %>%
mutate(Proportion = Nombre / sum(Nombre)) %>%
ungroup() %>%
mutate(Group = group)
}
taxonomy_plot <- function(data, insects_or_plants, model, stack_or_fill) {
plots <- filter(data, Group == insects_or_plants) %>%
ggplot(aes(x = Bloc, y = Nombre, fill = Taxon)) +
geom_bar(stat = "identity", position = stack_or_fill) +
labs(x = "Block", y = "Number of Nodes", fill = "Taxonomy") +
theme_minimal() +
theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 1)) +
ggtitle(paste(
ifelse(insects_or_plants == "insects",
"Pollinators", "Plants"
), "repartition (",
ifelse(stack_or_fill == "stack", "absolute", "proportion"),
") in the", model, "clustering"
)) +
facet_wrap(~Collection, ncol = 3, scales = "free_x")
# Arrange the plots in a grid layout
gridExtra::grid.arrange(plots, newpage = TRUE)
}
```
```{r load data, echo = FALSE, include = FALSE, warning=FALSE}
# All results
iid_unlist <- extract_unlist_reorder("data/dore_collection_clustering_nb_run1_iid_123networks_24-05-23-21:40:42.Rds")
rho_unlist <- extract_unlist_reorder("data/dore_collection_clustering_nb_run1_rho_123networks_25-05-23-13:58:30.Rds")
pi_unlist <- extract_unlist_reorder("data/dore_collection_clustering_nb_run1_pi_123networks_25-05-23-17:31:25.Rds")
pirho_unlist <- extract_unlist_reorder("data/dore_collection_clustering_nb_run1_pirho_123networks_26-05-23-19:22:55.Rds")
```
# Application to \cite{doreRelativeEffectsAnthropogenic2021} data
\label{sec:application-to-dorerelativeeffectsanthropogenic2021-data}
## Clustering with model iid
With the *iid-colBiSBM* we obtain `r length(iid_unlist)` collections with the
following structures:
```{r iid_meso_plot, echo = FALSE, message=FALSE, results="asis", warning=FALSE}
#| fig.cap=paste(names(iid_unlist), rep("- iid",length(iid_unlist))),
#| fig.asp = 0.5,
#| dpi = 300
meso_print(iid_unlist)
```
Et voici donc les valeurs numériques pour les $\alpha$ (paramètres de connectivité).
```{r iid_alpha, echo = FALSE, results="asis", warning=FALSE}
alpha_print(iid_unlist)
```
### Comparaison avec des infos supplémentaires
```{r supinfo, echo = FALSE}
supinfo <- readxl::read_xlsx("data/supinfo.xlsx", sheet = 2)
interaction_data <- read.table(file = "data/interaction-data.txt", sep = "\t", header = TRUE)
seq_ids_network_aggreg <- unique(interaction_data$id_network_aggreg)
incidence_matrices <- readRDS(file = "data/dore-matrices.Rds")
names_aggreg_networks <- names(incidence_matrices)
vectorClusteringNet <- numeric(nrow(supinfo))
for (k in 1:length(iid_unlist)) {
idclust <- match(iid_unlist[[k]]$net_id, names_aggreg_networks)
supinfoclust <- match(seq_ids_network_aggreg[idclust], supinfo$Idweb)
vectorClusteringNet[supinfoclust] <- k
}
```
```{r Annual_timespan_plot, echo = FALSE}
ggplot(supinfo) +
aes(
y = Annual_time_span,
x = vectorClusteringNet, group = vectorClusteringNet, fill = as.factor(vectorClusteringNet)
) +
xlab("Numéro de collection") +
ylab("Annual time span") +
guides(fill = guide_legend(title = "Numéro\nde collection")) +
geom_boxplot()
```
### Répartition dans les clusters selon la taxonomie
```{r iid_taxonomy, echo = FALSE, warning=FALSE}
iid_taxonomy <- taxonomy_in_clusters(iid_unlist)
iid_taxonomy_long <- map_dfr(iid_taxonomy,
function(collection) {
map_dfr(
c("insects", "plants"),
function(group) {
get_formatted_data(collection, group)
}
)
},
.id = "Collection"
)
```
```{r iid_plot_taxonomy_pollinators, echo = FALSE, message = FALSE,fig.cap = 'Pollinators repartition for the iid model regarding taxonomy', warning=FALSE}
# Pollinators
taxonomy_plot(
data = iid_taxonomy_long,
insects_or_plants = "insects",
stack_or_fill = "stack", model = "iid"
)
taxonomy_plot(
data = iid_taxonomy_long,
insects_or_plants = "insects",
stack_or_fill = "fill", model = "iid"
)
```
```{r iid_plot_taxonomy_plants, echo = FALSE, message = FALSE,fig.cap = 'Plants repartition for the iid model regarding taxonomy', warning=FALSE}
taxonomy_plot(
data = iid_taxonomy_long,
insects_or_plants = "plants",
stack_or_fill = "stack", model = "iid"
)
taxonomy_plot(
data = iid_taxonomy_long,
insects_or_plants = "plants",
stack_or_fill = "fill", model = "iid"
)
```
#### Tables
```{r iid_taxo_tables, echo = FALSE}
iid_taxonomy_long %>%
filter(Group == "insects") %>%
group_by(Taxon) %>%
mutate(row_id = row_number()) %>%
pivot_wider(names_from = c(Collection, Bloc), values_from = Nombre, names_glue = "Collection_{Collection}_Bloc_{Bloc}") %>%
ungroup() %>%
select(-row_id) %>%
group_by(Taxon) %>%
summarize(across(starts_with("Collection"), ~ na.omit(.)[1])) %>%
ungroup()
```
## Clustering with model pi
Avec le modèle *pi* nous obtenons les `r length(pi_unlist)` collections et
les structures suivantes:
```{r pi_meso_plot, echo = FALSE, message=FALSE, fig.cap=paste(names(pi_unlist), rep("- pi",length(pi_unlist))), results="asis", warning=FALSE}
meso_print(pi_unlist)
```
Et voici donc les valeurs numériques pour les $\alpha$ (paramètres de connectivité).
```{r pi_alpha, echo = FALSE, results="asis", warning=FALSE}
alpha_print(pi_unlist)
```
### Répartition dans les clusters selon la taxonomie
```{r pi_taxonomy, echo = FALSE, warning=FALSE}
pi_taxonomy <- taxonomy_in_clusters(pi_unlist)
pi_taxonomy_long <- map_dfr(pi_taxonomy,
function(collection) {
map_dfr(
c("insects", "plants"),
function(group) {
get_formatted_data(collection, group)
}
)
},
.id = "Collection"
)
```
```{r pi_plot_taxonomy_pollinators, echo = FALSE, message = FALSE,fig.cap = 'Pollinators repartition for the pi model regarding taxonomy', warning=FALSE}
# Pollinators
taxonomy_plot(
data = pi_taxonomy_long,
insects_or_plants = "insects",
stack_or_fill = "stack", model = "pi"
)
taxonomy_plot(
data = pi_taxonomy_long,
insects_or_plants = "insects",
stack_or_fill = "fill", model = "pi"
)
```
```{r pi_plot_taxonomy_plants, echo = FALSE, message = FALSE,fig.cap = 'Plants repartition for the pi model regarding taxonomy', warning=FALSE}
taxonomy_plot(
data = pi_taxonomy_long,
insects_or_plants = "plants",
stack_or_fill = "stack", model = "pi"
)
taxonomy_plot(
data = pi_taxonomy_long,
insects_or_plants = "plants",
stack_or_fill = "fill", model = "pi"
)
```
## Clustering with model rho
Avec le modèle *rho* nous obtenons les `r length(rho_unlist)` collections et
les structures suivantes:
```{r rho_meso_plot, echo = FALSE, message=FALSE, fig.cap=paste(names(rho_unlist), rep("- rho",length(rho_unlist))), results="asis", warning=FALSE}
meso_print(rho_unlist)
```
Et voici donc les valeurs numériques pour les $\alpha$ (paramètres de connectivité).
```{r rho_alpha, echo = FALSE, results="asis", warning=FALSE}
alpha_print(rho_unlist)
```
### Répartition dans les clusters selon la taxonomie
```{r rho_taxonomy, echo = FALSE, warning=FALSE}
rho_taxonomy <- taxonomy_in_clusters(rho_unlist)
rho_taxonomy_long <- map_dfr(rho_taxonomy,
function(collection) {
map_dfr(
c("insects", "plants"),
function(group) {
get_formatted_data(collection, group)
}
)
},
.id = "Collection"
)
```
```{r rho_plot_taxonomy_pollinators, echo = FALSE, message = FALSE,fig.cap = 'Pollinators repartition for the rho model regarding taxonomy', warning=FALSE}
# Pollinators
taxonomy_plot(
data = rho_taxonomy_long,
insects_or_plants = "insects",
stack_or_fill = "stack", model = "rho"
)
taxonomy_plot(
data = rho_taxonomy_long,
insects_or_plants = "insects",
stack_or_fill = "fill", model = "rho"
)
```
```{r rho_plot_taxonomy_plants, echo = FALSE, message = FALSE,fig.cap = 'Plants repartition for the rho model regarding taxonomy', warning=FALSE}
taxonomy_plot(
data = rho_taxonomy_long,
insects_or_plants = "plants",
stack_or_fill = "stack", model = "rho"
)
taxonomy_plot(
data = rho_taxonomy_long,
insects_or_plants = "plants",
stack_or_fill = "fill", model = "rho"
)
```
## Clustering with model pirho
Avec le modèle *pirho* nous obtenons les `r length(pirho_unlist)` collections et
les structures suivantes:
```{r pirho_meso_plot, echo = FALSE, message=FALSE, fig.cap=paste(names(pirho_unlist), rep("- pirho",length(pirho_unlist))), results="asis", warning=FALSE}
meso_print(pirho_unlist)
```
Et voici donc les valeurs numériques pour les $\alpha$ (paramètres de connectivité).
```{r pirho_alpha, echo = FALSE, results="asis", warning=FALSE}
alpha_print(pirho_unlist)
```
### Répartition dans les clusters selon la taxonomie
```{r pirho_taxonomy, echo = FALSE, warning=FALSE}
pirho_taxonomy <- taxonomy_in_clusters(pirho_unlist)
pirho_taxonomy_long <- map_dfr(pirho_taxonomy,
function(collection) {
map_dfr(
c("insects", "plants"),
function(group) {
get_formatted_data(collection, group)
}
)
},
.id = "Collection"
)
```
```{r pirho_plot_taxonomy_pollinators, echo = FALSE, message = FALSE, fig.cap = 'Pollinators repartition for the pirho model regarding taxonomy', warning=FALSE}
# Pollinators
taxonomy_plot(
data = pirho_taxonomy_long,
insects_or_plants = "insects",
stack_or_fill = "stack", model = "pirho"
)
taxonomy_plot(
data = pirho_taxonomy_long,
insects_or_plants = "insects",
stack_or_fill = "fill", model = "pirho"
)
```
```{r pirho_plot_taxonomy_plants, echo = FALSE, message = FALSE,fig.cap = 'Plants repartition for the pirho model regarding taxonomy', warning=FALSE}
taxonomy_plot(
data = pirho_taxonomy_long,
insects_or_plants = "plants",
stack_or_fill = "stack", model = "pirho"
)
taxonomy_plot(
data = pirho_taxonomy_long,
insects_or_plants = "plants",
stack_or_fill = "fill", model = "pirho"
)
```