Adding new files to the report

2023-07-03 15:10:39 +02:00 · 2023-07-03 15:10:39 +02:00 · 7b898bc258
commit 7b898bc258
parent a11c85f09c
56 changed files with 1143 additions and 1515 deletions
--- a/Rcodes/real_data/CoOPLBM_completion_analyze.Rmd
+++ b/Rcodes/real_data/CoOPLBM_completion_analyze.Rmd
@ -1,17 +1,11 @@
 ---
-title: "Netclustering analysis with the CoOPLBM completion"
-bibliography: references.bib
-suppress-bibliography: true
 output: 
-    html_document:
-        toc: true
-        theme: journal
-    pdf_document:
-        keep_tex: true
+  md_document:
+    citation_package: biblatex
 ---

 ```{r libraries, echo = FALSE, include=FALSE}
-devtools::load_all()
+require("colSBM")
 require(aricode)
 ```

@ -77,10 +71,10 @@ extract_full_reorder <- function(model_collections_list, target) {
 ```{r data_importation, echo = FALSE}
 # Uncompleted
 uncompleted_model_list <- list(
-  "iid" = extract_unlist("real_data/data/dore_uncompleted_collection_clustering_nb_run_1_iid_70_networks_08-06-23-16:31:17.Rds"),
-  "pi" = extract_unlist("real_data/data/dore_uncompleted_collection_clustering_nb_run_1_pi_70_networks_08-06-23-16:52:16.Rds"),
-  "rho" = extract_unlist("real_data/data/dore_uncompleted_collection_clustering_nb_run_1_rho_70_networks_08-06-23-16:49:58.Rds"),
-  "pirho" = extract_unlist("real_data/data/dore_uncompleted_collection_clustering_nb_run_1_pirho_70_networks_08-06-23-16:41:33.Rds")
+  "iid" = extract_unlist("data/dore_uncompleted_collection_clustering_nb_run_1_iid_70_networks_08-06-23-16:31:17.Rds"),
+  "pi" = extract_unlist("data/dore_uncompleted_collection_clustering_nb_run_1_pi_70_networks_08-06-23-16:52:16.Rds"),
+  "rho" = extract_unlist("data/dore_uncompleted_collection_clustering_nb_run_1_rho_70_networks_08-06-23-16:49:58.Rds"),
+  "pirho" = extract_unlist("data/dore_uncompleted_collection_clustering_nb_run_1_pirho_70_networks_08-06-23-16:41:33.Rds")
 )

 # Below we will need to have the netid in the same order so we choose to use the
@ -92,38 +86,38 @@ uncompleted_clusterings <- extract_full_reorder(uncompleted_model_list, netid_or

 # 0.2 threshold
 point_2_model_list <- list(
-  "iid" = extract_unlist("real_data/data/dore_point_2_completed_collection_clustering_nb_run_1_iid_70_networks_07-06-23-18:40:10.Rds"),
-  "pi" = extract_unlist("real_data/data/dore_point_2_completed_collection_clustering_nb_run_1_pi_70_networks_07-06-23-19:22:19.Rds"),
-  "rho" = extract_unlist("real_data/data/dore_point_2_completed_collection_clustering_nb_run_1_rho_70_networks_07-06-23-20:03:53.Rds"),
-  "pirho" = extract_unlist("real_data/data/dore_point_2_completed_collection_clustering_nb_run_1_pirho_70_networks_07-06-23-21:09:12.Rds")
+  "iid" = extract_unlist("data/dore_point_2_completed_collection_clustering_nb_run_1_iid_70_networks_07-06-23-18:40:10.Rds"),
+  "pi" = extract_unlist("data/dore_point_2_completed_collection_clustering_nb_run_1_pi_70_networks_07-06-23-19:22:19.Rds"),
+  "rho" = extract_unlist("data/dore_point_2_completed_collection_clustering_nb_run_1_rho_70_networks_07-06-23-20:03:53.Rds"),
+  "pirho" = extract_unlist("data/dore_point_2_completed_collection_clustering_nb_run_1_pirho_70_networks_07-06-23-21:09:12.Rds")
 )
 point_2_clusterings <- extract_full_reorder(point_2_model_list, netid_order)

 # 0.5 threshold
 point_5_model_list <- list(
-  "iid" = extract_unlist("real_data/data/dore_point_5_completed_collection_clustering_nb_run_1_iid_70_networks_07-06-23-19:19:53.Rds"),
-  "pi" = extract_unlist("real_data/data/dore_point_5_completed_collection_clustering_nb_run_1_pi_70_networks_07-06-23-21:31:20.Rds"),
-  "rho" = extract_unlist("real_data/data/dore_point_5_completed_collection_clustering_nb_run_1_rho_70_networks_07-06-23-21:03:50.Rds"),
-  "pirho" = extract_unlist("real_data/data/dore_point_5_completed_collection_clustering_nb_run_1_pirho_70_networks_07-06-23-21:13:10.Rds")
+  "iid" = extract_unlist("data/dore_point_5_completed_collection_clustering_nb_run_1_iid_70_networks_07-06-23-19:19:53.Rds"),
+  "pi" = extract_unlist("data/dore_point_5_completed_collection_clustering_nb_run_1_pi_70_networks_07-06-23-21:31:20.Rds"),
+  "rho" = extract_unlist("data/dore_point_5_completed_collection_clustering_nb_run_1_rho_70_networks_07-06-23-21:03:50.Rds"),
+  "pirho" = extract_unlist("data/dore_point_5_completed_collection_clustering_nb_run_1_pirho_70_networks_07-06-23-21:13:10.Rds")
 )
 point_5_clusterings <- extract_full_reorder(point_5_model_list, netid_order)

 # Uniform re-sampled
 random_model_list <- list(
-  "iid" = extract_unlist("real_data/data/dore_random_completed_collection_clustering_nb_run_1_iid_70_networks_07-06-23-21:44:14.Rds"),
-  "pi" = extract_unlist("real_data/data/dore_random_completed_collection_clustering_nb_run_1_pi_70_networks_07-06-23-22:52:47.Rds"),
-  "rho" = extract_unlist("real_data/data/dore_random_completed_collection_clustering_nb_run_1_rho_70_networks_08-06-23-18:16:04.Rds"),
-  "pirho" = extract_unlist("real_data/data/dore_random_completed_collection_clustering_nb_run_1_pirho_70_networks_07-06-23-23:07:08.Rds")
+  "iid" = extract_unlist("data/dore_random_completed_collection_clustering_nb_run_1_iid_70_networks_07-06-23-21:44:14.Rds"),
+  "pi" = extract_unlist("data/dore_random_completed_collection_clustering_nb_run_1_pi_70_networks_07-06-23-22:52:47.Rds"),
+  "rho" = extract_unlist("data/dore_random_completed_collection_clustering_nb_run_1_rho_70_networks_08-06-23-18:16:04.Rds"),
+  "pirho" = extract_unlist("data/dore_random_completed_collection_clustering_nb_run_1_pirho_70_networks_07-06-23-23:07:08.Rds")
 )
 random_clusterings <- extract_full_reorder(random_model_list, netid_order)
 ```

-# Context of this analysis
+### Context of this analysis

 After performing a netclustering on the raw data, we will see if the detect
 structure resulting in the clustering comes from the sampling effort. To test
 this we will use the CoOPLBM model by
-@anakokDisentanglingStructureEcological2022 to complete the data.
+\cite{anakokDisentanglingStructureEcological2022} to complete the data.

 The CoOPLBM model assumes that the observed incidence matrix $R$ is an
 element-wise product of an $M$ matrix following an LBM and an $N$ matrix which
@ -141,7 +135,7 @@ Note that if $R_{ij} = 1$ then $\widehat{M_{ij}} = 1$
 This *completed matrix* can be used in different manners to be fed to the colSBM
 model.

-# Threshold based completions
+### Threshold based completions
 With the thresholds, the infered incidence matrix obtained by
 CoOPLBM is used to generate a completed incidence matrix by the following
 procedure :
@ -150,7 +144,7 @@ $$X_{ij} = \begin{cases}
  0 & \text{else} \\
 \end{cases}$$

-## 0.5 completed threshold
+#### 0.5 completed threshold
 ```{r useful-functions, echo = FALSE, include=FALSE}
 ARI_netclustering_models <- function(
    clustering_compare,
@ -183,11 +177,11 @@ knitr::kable(ARI_netclustering_models(point_5_clusterings),
 In the above table, one can see the network clustering obtained after applying
 CoOPLBM has not much in common with the clustering of the uncompleted data.

-### Number of sub-collections and details of each sub-collection
+##### Number of sub-collections and details of each sub-collection
 ```{r 0.5_partition_numbers, echo = FALSE}
 ```

-## 0.2 completed threshold
+### 0.2 completed threshold

 The $0.2$ threshold adds a lot of interactions compared to raw matrix.

@ -197,7 +191,10 @@ knitr::kable(ARI_netclustering_models(point_2_clusterings),
 )
 ```

-# Sample based completions
+Same as for $0.5$, after applying CoOPLBM the obtained clustering doesn't match
+the uncompleted data.
+
+### Sample based completions

 The $M$ matrix is used to sample a new $X$ matrix which elements are the
 realisation of Bernoulli distributions of probability $M_{i,j}$.
--- a/Rcodes/real_data/CoOPLBM_completion_analyze.tex
+++ b/Rcodes/real_data/CoOPLBM_completion_analyze.tex
@ -1,90 +1,22 @@
-% Options for packages loaded elsewhere
-\PassOptionsToPackage{unicode}{hyperref}
-\PassOptionsToPackage{hyphens}{url}
-%
-\documentclass[
-]{article}
-\usepackage{lmodern}
-\usepackage{amssymb,amsmath}
-\usepackage{ifxetex,ifluatex}
-\ifnum 0\ifxetex 1\fi\ifluatex 1\fi=0 % if pdftex
-  \usepackage[T1]{fontenc}
-  \usepackage[utf8]{inputenc}
-  \usepackage{textcomp} % provide euro and other symbols
-\else % if luatex or xetex
-  \usepackage{unicode-math}
-  \defaultfontfeatures{Scale=MatchLowercase}
-  \defaultfontfeatures[\rmfamily]{Ligatures=TeX,Scale=1}
-\fi
-% Use upquote if available, for straight quotes in verbatim environments
-\IfFileExists{upquote.sty}{\usepackage{upquote}}{}
-\IfFileExists{microtype.sty}{% use microtype if available
-  \usepackage[]{microtype}
-  \UseMicrotypeSet[protrusion]{basicmath} % disable protrusion for tt fonts
-}{}
-\makeatletter
-\@ifundefined{KOMAClassName}{% if non-KOMA class
-  \IfFileExists{parskip.sty}{%
-    \usepackage{parskip}
-  }{% else
-    \setlength{\parindent}{0pt}
-    \setlength{\parskip}{6pt plus 2pt minus 1pt}}
-}{% if KOMA class
-  \KOMAoptions{parskip=half}}
-\makeatother
-\usepackage{xcolor}
-\IfFileExists{xurl.sty}{\usepackage{xurl}}{} % add URL line breaks if available
-\IfFileExists{bookmark.sty}{\usepackage{bookmark}}{\usepackage{hyperref}}
-\hypersetup{
-  pdftitle={Netclustering analysis with the CoOPLBM completion},
-  hidelinks,
-  pdfcreator={LaTeX via pandoc}}
-\urlstyle{same} % disable monospaced font for URLs
-\usepackage[margin=1in]{geometry}
-\usepackage{graphicx}
-\makeatletter
-\def\maxwidth{\ifdim\Gin@nat@width>\linewidth\linewidth\else\Gin@nat@width\fi}
-\def\maxheight{\ifdim\Gin@nat@height>\textheight\textheight\else\Gin@nat@height\fi}
-\makeatother
-% Scale images if necessary, so that they will not overflow the page
-% margins by default, and it is still possible to overwrite the defaults
-% using explicit options in \includegraphics[width, height, ...]{}
-\setkeys{Gin}{width=\maxwidth,height=\maxheight,keepaspectratio}
-% Set default figure placement to htbp
-\makeatletter
-\def\fps@figure{htbp}
-\makeatother
-\setlength{\emergencystretch}{3em} % prevent overfull lines
-\providecommand{\tightlist}{%
-  \setlength{\itemsep}{0pt}\setlength{\parskip}{0pt}}
-\setcounter{secnumdepth}{-\maxdimen} % remove section numbering
-\newlength{\cslhangindent}
-\setlength{\cslhangindent}{1.5em}
-\newenvironment{cslreferences}%
-  {\setlength{\parindent}{0pt}%
-  \everypar{\setlength{\hangindent}{\cslhangindent}}\ignorespaces}%
-  {\par}
-
-\title{Netclustering analysis with the CoOPLBM completion}
-\author{}
-\date{\vspace{-2.5em}}
-
-\begin{document}
-\maketitle
-
 \hypertarget{context-of-this-analysis}{%
-\section{Context of this analysis}\label{context-of-this-analysis}}
+\subsubsection{Context of this
+analysis}\label{context-of-this-analysis}}

 After performing a netclustering on the raw data, we will see if the
 detect structure resulting in the clustering comes from the sampling
-effort. To test this we will use the CoOPLBM model by Anakok et al.
-(2022) to complete the data.
+effort. To test this we will use the CoOPLBM model by
+\cite{anakokDisentanglingStructureEcological2022} to complete the data.

 The CoOPLBM model assumes that the observed incidence matrix \(R\) is an
 element-wise product of an \(M\) matrix following an LBM and an \(N\)
 matrix which elements follow Poisson distributions independent on \(M\).

-The model gives us the \(\hat{M}\) matrix, which elements are:
+The model gives us the \(\widehat{M}\) matrix, the elements of which
+are:
+
+\[\widehat{M_{ij}} = \mathbb{P}(M_{ij} = 1)\]
+
+Note that if \(R_{ij} = 1\) then \(\widehat{M_{ij}} = 1\)

 \begin{itemize}
 \tightlist
@ -99,7 +31,7 @@ This \emph{completed matrix} can be used in different manners to be fed
 to the colSBM model.

 \hypertarget{threshold-based-completions}{%
-\section{Threshold based
+\subsubsection{Threshold based
 completions}\label{threshold-based-completions}}

 With the thresholds, the infered incidence matrix obtained by CoOPLBM is
@ -110,32 +42,69 @@ used to generate a completed incidence matrix by the following procedure
 \end{cases}\]

 \hypertarget{completed-threshold}{%
-\subsection{0.5 completed threshold}\label{completed-threshold}}
+\paragraph{0.5 completed threshold}\label{completed-threshold}}

 Here, the completion threshold is set to \(0.5\).

-\hypertarget{ari-of-networks-clustering-0.5-threshold-vs-raw-data}{%
-\subsubsection{ARI of networks clustering: 0.5 threshold vs raw
-data}\label{ari-of-networks-clustering-0.5-threshold-vs-raw-data}}
+First we will compute an ARI on the collection id given by the raw data
+and the completed matrix.
+
+\begin{longtable}[]{@{}lr@{}}
+\toprule
+& ARI with uncompleted data\tabularnewline
+\midrule
+\endhead
+iid & 0.1142823\tabularnewline
+pi & 0.0263660\tabularnewline
+rho & 0.0933340\tabularnewline
+pirho & 0.2158747\tabularnewline
+\bottomrule
+\end{longtable}
+
+In the above table, one can see the network clustering obtained after
+applying CoOPLBM has not much in common with the clustering of the
+uncompleted data.
+
+\hypertarget{number-of-sub-collections-and-details-of-each-sub-collection}{%
+\subparagraph{Number of sub-collections and details of each
+sub-collection}\label{number-of-sub-collections-and-details-of-each-sub-collection}}
+
+\hypertarget{completed-threshold-1}{%
+\subsubsection{0.2 completed threshold}\label{completed-threshold-1}}
+
+The \(0.2\) threshold adds a lot of interactions compared to raw matrix.
+
+\begin{longtable}[]{@{}lr@{}}
+\toprule
+& ARI with uncompleted data\tabularnewline
+\midrule
+\endhead
+iid & 0.0429465\tabularnewline
+pi & 0.0330057\tabularnewline
+rho & 0.0187305\tabularnewline
+pirho & 0.0357728\tabularnewline
+\bottomrule
+\end{longtable}
+
+Same as for \(0.5\), after applying CoOPLBM the obtained clustering
+doesn't match the uncompleted data.

 \hypertarget{sample-based-completions}{%
-\section{Sample based completions}\label{sample-based-completions}}
+\subsubsection{Sample based
+completions}\label{sample-based-completions}}

 The \(M\) matrix is used to sample a new \(X\) matrix which elements are
 the realisation of Bernoulli distributions of probability \(M_{i,j}\).
 \[\mathbb{P}(X_{i,j} = 1) = M_{i,j} \]

-\hypertarget{references}{%
-\section*{References}\label{references}}
-\addcontentsline{toc}{section}{References}
-
-\hypertarget{refs}{}
-\begin{cslreferences}
-\leavevmode\hypertarget{ref-anakokDisentanglingStructureEcological2022}{}%
-Anakok, Emre, Pierre Barbillon, Colin Fontaine, and Elisa Thebault.
-2022. ``Disentangling the Structure of Ecological Bipartite Networks
-from Observation Processes.'' arXiv.
-\url{http://arxiv.org/abs/2211.16364}.
-\end{cslreferences}
-
-\end{document}
+\begin{longtable}[]{@{}lr@{}}
+\toprule
+& ARI with uncompleted data\tabularnewline
+\midrule
+\endhead
+iid & 0.0148172\tabularnewline
+pi & 0.0265793\tabularnewline
+rho & 0.0051536\tabularnewline
+pirho & 0.0152299\tabularnewline
+\bottomrule
+\end{longtable}
--- a/Rcodes/real_data/application_dore_data.Rmd
+++ b/Rcodes/real_data/application_dore_data.Rmd
@ -0,0 +1,2 @@
+# Application to \cite{doreRelativeEffectsAnthropogenic2021} data
+\label{sec:application-to-dorerelativeeffectsanthropogenic2021-data}
--- a/Rcodes/real_data/application_dore_data.tex
+++ b/Rcodes/real_data/application_dore_data.tex
@ -0,0 +1,6 @@
+\hypertarget{application-to-data}{%
+\section{\texorpdfstring{Application to
+\cite{doreRelativeEffectsAnthropogenic2021}
+data}{Application to  data}}\label{application-to-data}}
+
+\label{sec:application-to-dorerelativeeffectsanthropogenic2021-data}
--- a/Rcodes/real_data/presentation_dore.Rmd
+++ b/Rcodes/real_data/presentation_dore.Rmd
@ -18,8 +18,8 @@ options(knitr.table.format = function() {
 ```{r require_lib, echo = FALSE, include=FALSE, warning=FALSE}
 require("tidyverse")
 require("knitr")
-devtools::load_all("R/")
-source("real_data/temporary_plot.R")
+require("colSBM")
+source("temporary_plot.R")
 ```

 ```{r pretty_matrix_print, echo = FALSE, warning=FALSE}
@ -98,7 +98,7 @@ alpha_print <- function(unlisted_partition) {
 ```

 ```{r taxonomy_functions, echo = FALSE, warning=FALSE}
-interaction_data <- read.table(file = "real_data/data/interaction-data.txt", sep = "\t", header = TRUE)
+interaction_data <- read.table(file = "data/interaction-data.txt", sep = "\t", header = TRUE)

 insect_orders <- unique(interaction_data$insectorder)
 plant_family <- unique(interaction_data$plantorder)
@ -243,13 +243,13 @@ taxonomy_plot <- function(data, insects_or_plants, model, stack_or_fill) {

 ```{r load data, echo = FALSE, include = FALSE, warning=FALSE}
 # All results
-iid_unlist <- extract_unlist_reorder("real_data/data/dore_collection_clustering_nb_run1_iid_123networks_24-05-23-21:40:42.Rds")
+iid_unlist <- extract_unlist_reorder("data/dore_collection_clustering_nb_run1_iid_123networks_24-05-23-21:40:42.Rds")

-rho_unlist <- extract_unlist_reorder("real_data/data/dore_collection_clustering_nb_run1_rho_123networks_25-05-23-13:58:30.Rds")
+rho_unlist <- extract_unlist_reorder("data/dore_collection_clustering_nb_run1_rho_123networks_25-05-23-13:58:30.Rds")

-pi_unlist <- extract_unlist_reorder("real_data/data/dore_collection_clustering_nb_run1_pi_123networks_25-05-23-17:31:25.Rds")
+pi_unlist <- extract_unlist_reorder("data/dore_collection_clustering_nb_run1_pi_123networks_25-05-23-17:31:25.Rds")

-pirho_unlist <- extract_unlist_reorder("real_data/data/dore_collection_clustering_nb_run1_pirho_123networks_26-05-23-19:22:55.Rds")
+pirho_unlist <- extract_unlist_reorder("data/dore_collection_clustering_nb_run1_pirho_123networks_26-05-23-19:22:55.Rds")
 ```

 ## Clustering avec le modèle iid
@ -267,11 +267,11 @@ alpha_print(iid_unlist)
 ```
 ### Comparaison avec des infos supplémentaires
 ```{r supinfo, echo = FALSE}
-supinfo <- readxl::read_xlsx("real_data/data/supinfo.xlsx", sheet = 2)
-interaction_data <- read.table(file = "real_data/data/interaction-data.txt", sep = "\t", header = TRUE)
+supinfo <- readxl::read_xlsx("data/supinfo.xlsx", sheet = 2)
+interaction_data <- read.table(file = "data/interaction-data.txt", sep = "\t", header = TRUE)

 seq_ids_network_aggreg <- unique(interaction_data$id_network_aggreg)
-incidence_matrices <- readRDS(file = "real_data/data/dore-matrices.Rds")
+incidence_matrices <- readRDS(file = "data/dore-matrices.Rds")
 names_aggreg_networks <- names(incidence_matrices)
 vectorClusteringNet <- numeric(nrow(supinfo))
 for (k in 1:length(iid_unlist)) {
--- a/Rcodes/real_data/presentation_dore.tex
+++ b/Rcodes/real_data/presentation_dore.tex
--- a/Rcodes/simulation/inference_analyze.R
+++ b/Rcodes/simulation/inference_analyze.R
@ -1,6 +1,6 @@
 require("ggplot2")
 filenames <- list.files(
-    path = "./simulation/data/",
+    path = "./Rcodes/simulation/data/",
    pattern = "inference_testing_2023-05*",
    full.names = TRUE
 )
--- a/Rcodes/simulation/inference_analyze.Rmd
+++ b/Rcodes/simulation/inference_analyze.Rmd
--- a/Rcodes/simulation/inference_check.R
+++ b/Rcodes/simulation/inference_check.R
@ -1,7 +1,7 @@
 require("bettermc")
 require("gtools")
 require("tictoc")
-devtools::load_all("R/")
+require("colSBM")

 # Network param
 nr <- 120
@ -16,13 +16,13 @@ base_pi1 <- c(0.2, 0.4, 0.4, 0)
 rho1 <- rep(0.25, 4)

 pi2 <- rep(0.25, 4)
-base_rho2 <- c(0, 1/3, 1/3, 1/3)
+base_rho2 <- c(0, 1 / 3, 1 / 3, 1 / 3)

 pi1 <- matrix(unlist(combinat::permn(base_pi1)), byrow = TRUE, ncol = 4)
-pi1 <- pi1[!duplicated(pi1), ][1:4, ]
+pi1 <- pi1[!duplicated(pi1), ]

 rho2 <- matrix(unlist(combinat::permn(base_rho2)), byrow = TRUE, ncol = 4)
-rho2 <- rho2[!duplicated(rho2),]
+rho2 <- rho2[!duplicated(rho2), ]

 repetition <- seq.int(3)

@ -30,8 +30,10 @@ conditions <- tidyr::crossing(epsilon_alpha, pi1, rho2, repetition)

 # Filter conditions to prevent the same blocks from being empty
 conditions <- conditions[
-    !apply(conditions$pi1[, 1:4] == 0 & conditions$rho2[, 1:4] == 0, 
-    1, any),
+    !apply(
+        conditions$pi1[, 1:4] == 0 & conditions$rho2[, 1:4] == 0,
+        1, any
+    ),
 ]

 # To speed up computations and debug adding an argument based selection
@ -58,18 +60,20 @@ if (arg[2] > nrow(conditions) | arg[2] < 1) {

 choosed_conditions <- seq.int(from = arg[1], to = arg[2])

-conditions <- conditions[choosed_conditions,]
+conditions <- conditions[choosed_conditions, ]
 tic()
 results <- bettermc::mclapply(seq_len(nrow(conditions)), function(s) {
-    ea <- conditions[s,]$epsilon_alpha
+    ea <- conditions[s, ]$epsilon_alpha
    current_pi1 <- conditions[s, ]$pi1
-    current_rho2 <- conditions[s,]$rho2
+    current_rho2 <- conditions[s, ]$rho2

-    current_alpha <- base_alpha + matrix(c(
+    current_alpha <- base_alpha + matrix(
+        c(
            3 * ea, 2 * ea, ea, -ea,
-                            2 * ea, 2 * ea, - ea, ea,
-                            ea, - ea, ea, 2 * ea,
-                            - ea, ea, 2 * ea, 0),
+            2 * ea, 2 * ea, -ea, ea,
+            ea, -ea, ea, 2 * ea,
+            -ea, ea, 2 * ea, 0
+        ),
        byrow = TRUE, nrow = 4, ncol = 4
    )

@ -78,25 +82,25 @@ results <- bettermc::mclapply(seq_len(nrow(conditions)), function(s) {
    Cpi2 <- matrix(c(rho1, current_rho2), byrow = TRUE, nrow = M) > 0

    netlist_generated <- list(
-        generate_bipartite_network(
+        generate_bipartite_collection(
            nr, nc, conditions[s, ]$pi1, rho1,
-            current_alpha
-        ),
-        generate_bipartite_network(
+            current_alpha, M = 1, return_memberships = TRUE
+        )[[1]],
+        generate_bipartite_collection(
            nr, nc, pi2, conditions[s, ]$rho2,
-            current_alpha
-        )
+            current_alpha, M = 1, return_memberships = TRUE
+        )[[1]]
    )
    netlist <- lapply(seq_along(netlist_generated), function(m) {
        return(netlist_generated[[m]]$incidence_matrix)
    })

    row_clusterings <- lapply(seq_along(netlist_generated), function(m) {
-        return(netlist_generated[[m]]$row_clustering)
+        return(netlist_generated[[m]]$row_blockmemberships)
    })

    col_clusterings <- lapply(seq_along(netlist_generated), function(m) {
-        return(netlist_generated[[m]]$col_clustering)
+        return(netlist_generated[[m]]$col_blockmemberships)
    })

    full_row_clustering <- as.vector(sapply(
--- a/Rcodes/simulation/model_selection_analyze.R
+++ b/Rcodes/simulation/model_selection_analyze.R
@ -1,6 +1,6 @@
 require("ggplot2")
 filenames <- list.files(
-    path = "./simulation/data/",
+    path = "./Rcodes/simulation/data/",
    pattern = "model_selection_check_batch_15mai_3_rep_",
    full.names = TRUE
 )
--- a/figure/Annual_timespan_plot-1.png
+++ b/figure/Annual_timespan_plot-1.png
--- a/figure/iid_meso_plot-1.png
+++ b/figure/iid_meso_plot-1.png
--- a/figure/iid_meso_plot-2.png
+++ b/figure/iid_meso_plot-2.png
--- a/figure/iid_meso_plot-3.png
+++ b/figure/iid_meso_plot-3.png
--- a/figure/iid_meso_plot-4.png
+++ b/figure/iid_meso_plot-4.png
--- a/figure/iid_meso_plot-5.png
+++ b/figure/iid_meso_plot-5.png
--- a/figure/iid_plot_taxonomy_plants-1.png
+++ b/figure/iid_plot_taxonomy_plants-1.png
--- a/figure/iid_plot_taxonomy_plants-2.png
+++ b/figure/iid_plot_taxonomy_plants-2.png
--- a/figure/iid_plot_taxonomy_pollinators-1.png
+++ b/figure/iid_plot_taxonomy_pollinators-1.png
--- a/figure/iid_plot_taxonomy_pollinators-2.png
+++ b/figure/iid_plot_taxonomy_pollinators-2.png
--- a/figure/pi_meso_plot-1.png
+++ b/figure/pi_meso_plot-1.png
--- a/figure/pi_meso_plot-2.png
+++ b/figure/pi_meso_plot-2.png
--- a/figure/pi_plot_taxonomy_plants-1.png
+++ b/figure/pi_plot_taxonomy_plants-1.png
--- a/figure/pi_plot_taxonomy_plants-2.png
+++ b/figure/pi_plot_taxonomy_plants-2.png
--- a/figure/pi_plot_taxonomy_pollinators-1.png
+++ b/figure/pi_plot_taxonomy_pollinators-1.png
--- a/figure/pi_plot_taxonomy_pollinators-2.png
+++ b/figure/pi_plot_taxonomy_pollinators-2.png
--- a/figure/pirho_meso_plot-1.png
+++ b/figure/pirho_meso_plot-1.png
--- a/figure/pirho_meso_plot-10.png
+++ b/figure/pirho_meso_plot-10.png
--- a/figure/pirho_meso_plot-11.png
+++ b/figure/pirho_meso_plot-11.png
--- a/figure/pirho_meso_plot-12.png
+++ b/figure/pirho_meso_plot-12.png
--- a/figure/pirho_meso_plot-13.png
+++ b/figure/pirho_meso_plot-13.png
--- a/figure/pirho_meso_plot-14.png
+++ b/figure/pirho_meso_plot-14.png
--- a/figure/pirho_meso_plot-15.png
+++ b/figure/pirho_meso_plot-15.png
--- a/figure/pirho_meso_plot-2.png
+++ b/figure/pirho_meso_plot-2.png
--- a/figure/pirho_meso_plot-3.png
+++ b/figure/pirho_meso_plot-3.png
--- a/figure/pirho_meso_plot-4.png
+++ b/figure/pirho_meso_plot-4.png
--- a/figure/pirho_meso_plot-5.png
+++ b/figure/pirho_meso_plot-5.png
--- a/figure/pirho_meso_plot-6.png
+++ b/figure/pirho_meso_plot-6.png
--- a/figure/pirho_meso_plot-7.png
+++ b/figure/pirho_meso_plot-7.png
--- a/figure/pirho_meso_plot-8.png
+++ b/figure/pirho_meso_plot-8.png
--- a/figure/pirho_meso_plot-9.png
+++ b/figure/pirho_meso_plot-9.png
--- a/figure/pirho_plot_taxonomy_plants-1.png
+++ b/figure/pirho_plot_taxonomy_plants-1.png
--- a/figure/pirho_plot_taxonomy_plants-2.png
+++ b/figure/pirho_plot_taxonomy_plants-2.png
--- a/figure/pirho_plot_taxonomy_pollinators-1.png
+++ b/figure/pirho_plot_taxonomy_pollinators-1.png
--- a/figure/pirho_plot_taxonomy_pollinators-2.png
+++ b/figure/pirho_plot_taxonomy_pollinators-2.png
--- a/figure/rho_meso_plot-1.png
+++ b/figure/rho_meso_plot-1.png
--- a/figure/rho_plot_taxonomy_plants-1.png
+++ b/figure/rho_plot_taxonomy_plants-1.png
--- a/figure/rho_plot_taxonomy_plants-2.png
+++ b/figure/rho_plot_taxonomy_plants-2.png
--- a/figure/rho_plot_taxonomy_pollinators-1.png
+++ b/figure/rho_plot_taxonomy_pollinators-1.png
--- a/figure/rho_plot_taxonomy_pollinators-2.png
+++ b/figure/rho_plot_taxonomy_pollinators-2.png
--- a/img/Organigramme_MIA-Paris-Saclay.png
+++ b/img/Organigramme_MIA-Paris-Saclay.png
--- a/img/Organigramme_MIA-Paris-Saclay.svg
+++ b/img/Organigramme_MIA-Paris-Saclay.svg
--- a/presentation_UMR.tex
+++ b/presentation_UMR.tex
@ -0,0 +1,66 @@
+\section{Présentation}
+L'UMR MIA Paris-Saclay est une entité de recherche qui regroupe des
+statisticiens et des informaticiens spécialisés dans la modélisation et
+l'apprentissage statistique et informatique appliqués à la biologie, l'écologie,
+l'environnement, l'agronomie et l'agro-alimentaire. Elle est affiliée à
+AgroParisTech, INRAE et l'Université Paris Saclay.
+
+Les membres de cette unité possèdent des compétences variées en matière de
+méthodes d'inférence statistique, telles que les modèles complexes, les modèles
+à variables latentes, l'inférence bayésienne, l'apprentissage et la sélection de
+modèle. Ils sont également experts en algorithmique, notamment en
+généralisation, transfert de domaine et représentation des connaissances.
+
+L'objectif de cette unité est de développer des méthodes statistiques et
+informatiques originales, à la fois génériques et motivées par des
+problématiques spécifiques dans le domaine des sciences du vivant. Les activités
+de recherche s'appuient sur une solide culture dans les disciplines cibles,
+telles que l'écologie, l'environnement, l'agro-alimentaire, la biologie
+moléculaire et la biologie des systèmes.
+
+L'unité est structurée en deux équipes de recherche : SOLsTIS (Statistical
+mOdelling and Learning for environnemenT and lIfe Sciences) et EkINocs (Expert
+Knowledge, INteractive modellINg and learnINg for understandINg and decisiOn
+makINg in dINamic Complexe Systems).
+
+Elle est rattachée au département MATHNUM d'INRAE et au département MMIP
+d'AgroParisTech.
+
+Les responsables au sein de l'unité sont : Julien Chiquet en tant que Directeur
+d'unité, Sophie Donnet en tant que Directrice d'unité adjointe, Antoine
+Cornuéjols en tant que Responsable de l'équipe EkINocs, et Sophie Donnet
+et Pierre Barbillon en tant que Responsables de l'équipe SOLsTIS.
+\newline
+\emph{Source:~\cite{AccueilMIAParisSaclay}}\\
+La figure \ref{fig:organigramme-umr} présente l'organigramme complet de l'unité.
+
+\begin{sidewaysfigure}[h!]
+    \begin{center}
+        % \includegraphics[scale=0.4]{img/Organigramme_MIA-Paris-Saclay}
+        \includesvg[scale=0.6]{img/Organigramme_MIA-Paris-Saclay.svg}
+        \caption{Organigramme de l'UMR}
+        \label{fig:organigramme-umr}
+    \end{center}
+\end{sidewaysfigure}
+
+\section{Encadrement et vie en stage}
+
+Au cours de mon stage, j'étais encadré par Pierre Barbillon et fréquemment en
+discussion avec lui et Saint-Clair Chabert-Liddell dont j'ai poursuivi les
+travaux.
+
+Le contexte de travail, au sein des ingénieurs d'études, des doctorants, des
+chercheurs et des maîtres de conférences, a été pour moi très enrichissant. Ce
+stage s'inscrit dans la construction de mon parcours professionnel en validant
+le désir que je présentais de faire de la recherche.
+
+J'ai particulièrement apprécié la disponibilité de toutes les personnes de
+l'unité qui n'ont jamais hésité à se rendre disponible pour répondre à mes
+questions.
+Les nombreux séminaires et le désir de partage de connaissances à travers des
+formations internes et de l'auto-formation m'a vraiment plu et m'a ouvert à de
+nouvelles problématiques passionnantes.
+De plus j'ai beaucoup progressé dans les domaines abordés pendant mon
+stage, et cela m'a rendu confiant dans le choix de faire le
+master \emph{MathSV} pour l'année scolaire 2023-2024. Ce stage a donc été
+déterminant et confirme l'orientation de mon parcours professionnel.
--- a/rapport.pdf
+++ b/rapport.pdf
--- a/rapport.tex
+++ b/rapport.tex
@ -1,5 +1,4 @@
 \documentclass[12pt,a4paper]{report}
-
 %====En-tête====
 % Ajout des packages
 \usepackage[english]{babel} % pour dire que le texte est en francais
@ -18,11 +17,18 @@
 \usepackage{tikz} % For graph plots
 \usepackage{caption} % Figures
 \usepackage{subcaption} % And Subfigures
+\usepackage{longtable}
+\usepackage{rotating} % For allowing to rotate figures
+\usepackage{svg} % To allow svg inclusions

 %% Bibliography
 \usepackage[style=apa,citestyle=authoryear-comp]{biblatex}
 \addbibresource{references.bib}

+%% For good md to tex conversion
+\providecommand{\tightlist}{%
+  \setlength{\itemsep}{0pt}\setlength{\parskip}{0pt}}
+\usepackage{booktabs}

 %% Tikz Related
 \usetikzlibrary{calc,shapes,backgrounds,arrows,automata,shadows,positioning}
@ -62,14 +68,26 @@
 % titre et auteur
 \title{Rapport de stage dans l'UMR MIA Paris-Saclay}
 \author{Louis Lacoste}
-
+\IfFileExists{upquote.sty}{\usepackage{upquote}}{}
 \begin{document}
 \maketitle
 \tableofcontents

-\chapter{Présentation de l'UMR}
+\section{Remerciements}

-\chapter{Context}
+Je tiens à remercier en premier lieu Pierre Barbillon pour son encadrement
+remarquable, sa disponibilité, ses conseils avisés et sa gentillesse.
+Saint-Clair Chabert-Liddell pour son accompagnement, ses remarques,
+ses explications et le temps qu'il m'a consacré. Merci à Sophie Donnet, pour les
+cours et les idées qu'elle m'a donné
+
+% TODO Compléter les remerciements
+
+\chapter{L'UMR MIA Paris-Saclay}
+
+\include{presentation_UMR}
+
+\chapter{Context of the study}

 \section{Usage and importance of bipartite graphs}
 \label{sec:usage-and-importance-of-bipartite-graphs}
@ -253,6 +271,7 @@ This model supposes that:
 \end{figure}

 Parameters
+% TODO fix parameters according to presentation
 \begin{itemize}
    \item $Q_1 = \{{\color{blueind}\bullet},{\color{cyanind}\bullet},{\color{electricblue}\bullet}\}$ blocks in rows
    \item $Q_2 = \{{\color{burntorange}\bullet},{\color{goldenyellow}\bullet},{\color{yellow}\bullet}\}$ blocks in columns
@ -433,7 +452,7 @@ the column dimension.
 For a given number of blocks $Q_1$, $Q_2$ and matrix $S^2$ ($S^1$ being in this case the matrix full of ones), the number of
 parameters is:
 \begin{equation*}
-    \text{NP}(\pi\text{-}colBiSBM) = (Q_1 - 1) + \sum_{m=1}^{M}\Bigg( \sum_{r=1}^{Q_2} S^2_{mr} - 1 \Bigg) + \sum_{\substack{q=1,\dots,Q_1 \\ r=1,\dots,Q_2}} \mathbb{1}_{{(S^{1\prime}S^2)}_{qr}>0}
+    \text{NP}(\rho\text{-}colBiSBM) = (Q_1 - 1) + \sum_{m=1}^{M}\Bigg( \sum_{r=1}^{Q_2} S^2_{mr} - 1 \Bigg) + \sum_{\substack{q=1,\dots,Q_1 \\ r=1,\dots,Q_2}} \mathbb{1}_{{(S^{1\prime}S^2)}_{qr}>0}
 \end{equation*}

 $\pi\rho$-colBiSBM model still assumes that the networks share a common connectivity
@ -451,7 +470,7 @@ $\rho^m_r \in \left[ 0,1 \right], \sum_{r=1}^{Q_2} \rho^m_r = 1 $.
 For a given number of blocks $Q_1$, $Q_2$ and matrices $S^1$, $S^2$, the number of
 parameters is:
 \begin{equation*}
-    \text{NP}(\pi\text{-}colBiSBM) = \sum_{m=1}^{M}\Bigg( \sum_{q=1}^{Q_1} S^1_{mq} - 1 \Bigg) + \sum_{m=1}^{M}\Bigg( \sum_{r=1}^{Q_2} S^2_{mr} - 1 \Bigg) + \sum_{\substack{q=1,\dots,Q_1 \\ r=1,\dots,Q_2}} \mathbb{1}_{{(S^{1\prime}S^2)}_{qr}>0}
+    \text{NP}(\pi\rho\text{-}colBiSBM) = \sum_{m=1}^{M}\Bigg( \sum_{q=1}^{Q_1} S^1_{mq} - 1 \Bigg) + \sum_{m=1}^{M}\Bigg( \sum_{r=1}^{Q_2} S^2_{mr} - 1 \Bigg) + \sum_{\substack{q=1,\dots,Q_1 \\ r=1,\dots,Q_2}} \mathbb{1}_{{(S^{1\prime}S^2)}_{qr}>0}
 \end{equation*}


@ -463,34 +482,67 @@ we use a variatonal version of the Expectation Maximization (VEM) algorithm.

 We maximize a variational lower bound of the log-likelihood of the observed data
 by approximating $p(\bm{Z,W}|\bm{X};\bm{\theta})$ with a distribution on $\bm{Z}$
-and $\bm{W}$ named $\mathcal{R}$ issued from a family of factorizable distribution
-\parencite{daudinMixtureModelRandom2008}:
+and $\bm{W}$ named $\mathcal{R}$ defined as
+$\mathcal{R} = \otimes_{m=1}^M \mathcal{R}_m$.\

-\[
-    \mathcal{J}(\mathcal{R};\bm{\theta}) \coloneqq \mathbb{E}_{\mathcal{R}}[\ell(\bm{X},\bm{Z},\bm{W};\bm{\theta})] + \mathcal{H}(\bm{Z,W}) \leq \ell(\bm{X};\bm{\theta})
-\]
-$\mathcal{H}$ is the entropy of the distribution. $\bm{Z}$ and $\bm{W}$ are
+The lower bound is defined as:
+\begin{equation*}
+    \mathcal{J}(\mathcal{R};\bm{\theta}) \coloneqq \sum_{m=1}^{M} \bigg( \mathbb{E}_{\mathcal{R}_m}[\ell(X^m,Z^m,W^m;\bm{\theta})] + \mathcal{H}(\mathcal{R}_m) \bigg)  \leq \ell(\bm{X};\bm{\theta})
+\end{equation*}
+
+$\bm{Z}$ and $\bm{W}$ are
 redefined using the \emph{one-hot encoded} conversion (i.e., $Z_i^m = q
-\rightarrow Z_{iq}^m = 1$ and $W_j^m = r \rightarrow W_{jr}^m = 1$)
-We define $\tau_{iq}^{1,m} = \mathbb{P}_{\mathcal{R}}(Z_{iq}^m = 1|X_{ij}^m)$
-and $\tau_{jr}^{2,m} = \mathbb{P}_{\mathcal{R}}(W_{jr}^m = 1|X_{ij}^m)$ and the
-variational approximation is
-$\mathbb{P}_{\mathcal{R}} (Z_{iq}^m = 1, W_{jr}^m = 1|X_{ij}^m) =
-\mathbb{P}_{\mathcal{R}}(Z_{iq}^m = 1|X_{ij}^m) {\color{red}\times} \mathbb{P}_{\mathcal{R}}(W_{jr}^m = 1|X_{ij}^m) = \tau_{iq}^{1,m} {\color{red}\times} \tau_{jr}^{2,m}$.
+\rightarrow Z_{iq}^m = 1$ and $W_j^m = r \rightarrow W_{jr}^m = 1$).\\ % W_{jr\prime}^m pour r != r égal 0
+
+When $\mathcal{R}_m$ is issued from the set of the factorizable distributions,
+we denote
+$\tau_{iq}^{1,m} = \mathbb{P}_{\mathcal{R}_m}(Z_{iq}^m = 1|X_{ij}^m)$
+and $\tau_{jr}^{2,m} = \mathbb{P}_{\mathcal{R}_m}(W_{jr}^m = 1|X_{ij}^m)$, thus
+we have:
+$\mathbb{P}_{\mathcal{R}_m} (Z_{iq}^m = 1, W_{jr}^m = 1|X_{ij}^m) =
+\mathbb{P}_{\mathcal{R}_m}(Z_{iq}^m = 1|X_{ij}^m) {\color{red}\times} \mathbb{P}_{\mathcal{R}_m}(W_{jr}^m = 1|X_{ij}^m) = \tau_{iq}^{1,m} {\color{red}\times} \tau_{jr}^{2,m}$.
+
+
+The formula for the entropy per network is thus:
+\begin{equation*}
+    \mathcal{H}(\mathcal{R}_m) = - \sum_{i=1}^{n_1} \tau^{1,m}_{i,q} \log \tau^{1,m}_{i,q} - \sum_{j=1}^{n_2} \tau^{2,m}_{j,r} \log \tau^{2,m}_{j,r}
+\end{equation*}
+
+And the expectation of the completed log-likelihood under the $\mathcal{R}_m$ variational distribution for network $m$ is:
+\begin{align*}
+    \mathbb{E}_{\mathcal{R}_m}[\ell(X^m,Z^m,W^m;\bm{\theta})] = \sum_{i = 1}^{n_1^m}\sum_{j=1}^{n_2^m}\sum_{q \in \mathcal{Q}_{1,m}} \sum_{r \in \mathcal{Q}_{2,m}} \tau^{1,m}_{i,q} \tau^{2,m}_{j,r} \log f(X^{m}_{ij}; \alpha_{qr}) \\
+        + \sum_{i=1}^{n_1^m} \sum_{q \in \mathcal{Q}_{1,m}} \tau^{1,m}_{i,q} \log \pi_{\color{black}q}^{\color{gray}m} + \sum_{j=1}^{n_2^m} \sum_{r \in \mathcal{Q}_{2,m}} \tau^{2,m}_{j,r} \log \rho_{\color{black}r}^{\color{gray}m}
+\end{align*}
+
+And thus the lower bound becomes:
+
+\begin{align*}
+    \mathcal{J}(\bm{\tau};\bm{\theta}) \coloneqq \sum_{m=1}^{M} \bigg(\sum_{i = 1}^{n_1^m}\sum_{j=1}^{n_2^m}\sum_{q \in \mathcal{Q}_{1,m}} \sum_{r \in \mathcal{Q}_{2,m}} \tau^{1,m}_{i,q} \tau^{2,m}_{j,r} \log f(X^{m}_{ij}; \alpha_{qr}) \\
+        + \sum_{i=1}^{n_1^m} \sum_{q \in \mathcal{Q}_{1,m}} \tau^{1,m}_{i,q} \log \pi_{\color{black}q}^{\color{gray}m} + \sum_{j=1}^{n_2^m} \sum_{r \in \mathcal{Q}_{2,m}} \tau^{2,m}_{j,r} \log \rho_{\color{black}r}^{\color{gray}m} \\
+        - \sum_{i=1}^{n_1} \tau^{1,m}_{i,q} \log \tau^{1,m}_{i,q} - \sum_{j=1}^{n_2} \tau^{2,m}_{j,r} \log \tau^{2,m}_{j,r} \bigg) \color{black}
+\end{align*}
+
+where we identify the variational distribution $\mathcal{R}$ with its parameter
+$\bm{\tau}$. \\
+
+% \begin{equation*}
+%     \mathcal{J}(\mathcal{R};\bm{\theta}) \coloneqq \mathbb{E}_{\mathcal{R}}[\ell(\bm{X},\bm{Z},\bm{W};\bm{\theta})] + \mathcal{H}(\bm{Z,W}) \leq \ell(\bm{X};\bm{\theta})
+% \end{equation*}
+

 % TODO Develop the formula

 The VEM algorithm alternates between two steps, the variational E step and the M step.
-The E steps consists in optimizing $\mathcal{J}(\mathcal{R};\bm{\theta})$ for a
-current value of $\bm{\theta}$ with respect to $\mathcal{R}$. And the M step
-consists of maximizing $\mathcal{J}(\mathcal{R};\bm{\theta})$ with respect to
-$\bm{\theta}$ and for a given variational distribution $\mathcal{R}$.
+The E steps consists in optimizing $\mathcal{J}(\bm{\tau};\bm{\theta})$ for a
+current value of $\bm{\theta}$ with respect to $\bm{\tau}$. And the M step
+consists of maximizing $\mathcal{J}(\bm{\tau};\bm{\theta})$ with respect to
+$\bm{\theta}$ and for a given variational distribution $\bm{\tau}$.

 \subsection{Variational E step}
 \label{ssec:variational-e-step}

-At this step we maximize with respect to $\bm{\tau}$:
-$$\widehat{\bm{\tau}}^{(t+1)} = \arg \max_{\bm{\tau}} \mathcal{J}(\mathcal{\bm{\tau}},\bm{\widehat{\theta}}^{(t)})$$
+At this step we maximize with respect to the variational distribution $\bm{\tau}$:
+$$\widehat{\bm{\tau}}^{(t+1)} = \arg \max_{\bm{\tau}} \mathcal{J}(\mathcal{\bm{\tau}},\bm{\widehat{\theta}}^{(t)}).$$

 And we obtain the following formulae for the $\bm{\tau^m}$:

@ -498,10 +550,11 @@ And we obtain the following formulae for the $\bm{\tau^m}$:
    \widehat{\tau}_{iq}^{1,m} \propto \widehat{\pi}_{q}^{m(t)} \prod_{j=1}^{n_2^m}\prod_{r\in\mathcal{Q}_2^m} f(X_{ij}^m;\widehat{\alpha}_{qr}^{(t)})^{\widehat{\tau}_{jr}^{2,m(t+1)}}  & \forall i = 1, \dots , n_1^m, q \in \mathcal{Q}_1^m \\
    \widehat{\tau}_{jr}^{2,m} \propto \widehat{\rho}_{r}^{m(t)} \prod_{i=1}^{n_1^m}\prod_{q\in\mathcal{Q}_1^m} f(X_{ij}^m;\widehat{\alpha}_{qr}^{(t)})^{\widehat{\tau}_{iq}^{1,m(t+1)}} & \forall j = 1, \dots , n_2^m, r \in \mathcal{Q}_2^m
 \end{align*}
+which are used to update iteratively the values by a fixed point algorithm with
+only one step.

-
-From the above formulae we obtain for the Bernoulli distribution:
 % TODO move to technical.tex
+% From the above formulae we obtain for the Bernoulli distribution:
 % \begin{itemize}
 %     \item[-] \textit{iid} :
 %         \[ \bm{\tau}^{m,1} = ~^{t}\pi + \exp((\text{Mask}^{m} \odot A^{m})
@ -589,7 +642,7 @@ We obtain the following expression
 \]
 with pen the penalties.\\
 Using the formula $\mathbb{E}_{\widehat{\mathcal{R}}} [\ell(\bm{X,Z,W;\theta})] \approx \ell (\bm{X;\theta}) - \mathcal{H(\widehat{R})}$,
-it becomes evident, as highlighted in the existing literature, that the
+it becomes clearer, as highlighted in the existing literature, that the
 Integrated Classified Likelihood (ICL) gives preference to well-separated blocks
 by imposing a penalty on the entropy of node grouping. However, the objective of
 our study extends beyond grouping nodes into coherent blocks. We also aim to
@ -994,7 +1047,7 @@ networks and then compute the dissimilarity matrix between all networks of the
 collection. We obtain the collection $\mathcal{G} = \{\mathcal{M}\}$ the trivial
 partition in a unique group.

-Then using the \emph{KNN} we split the collection in two sub-collections with
+Then using the \emph{Kmeans} we split the collection in two sub-collections with
 the dissimilarity matrix. The two sub-collections are fitted and we compute
 the score of this new partition $\mathcal{G}^{*} = \{G_1, G_2\}$.

@ -1002,12 +1055,17 @@ If $Sc(\mathcal{G}^{*}) > Sc(\mathcal{G})$ then we repeat the same procedure on
 $G_1$ and $G_2$. Else we return $\mathcal{G}$.

 We illustrate our capacity to perform a partition of a collection for all
-colBiSBM models in \ref{ssec:network-clustering-of-simulated-networks}.
+colBiSBM models in \ref{sec:network-clustering-of-simulated-networks}.

-\section{Simulation studies}\label{sec:simulation-studies}
-\subsection{Network clustering of simulated networks}\label{ssec:network-clustering-of-simulated-networks}
+\chapter{Simulation studies}\label{chap:simulation-studies}
+\section{Network clustering of simulated networks}\label{sec:network-clustering-of-simulated-networks}

-\section{Application to~\cite{doreRelativeEffectsAnthropogenic2021} data}\label{sec:application-to-dorerelativeeffectsanthropogenic2021-data}
+\chapter{Applications}
+\include{Rcodes/real_data/application_dore_data}
+% \include{Rcodes/real_data/presentation_dore}
+\subsection{Completing raw data using CoOPLBM \parencite{anakokDisentanglingStructureEcological2022}}
+
+\include{Rcodes/real_data/CoOPLBM_completion_analyze}

 \printbibliography
 \listoffigures
--- a/references.bib
+++ b/references.bib
@ -1,50 +1,56 @@
-@misc{anakokDisentanglingStructureEcological2022,
+@online{AccueilMIAParisSaclay,
+  title = {Accueil | {{MIA Paris-Saclay}}},
+  url = {https://mia-ps.inrae.fr/},
+  urldate = {2023-07-03},
+  file = {/home/polarolouis/Zotero/storage/I7FWTZC3/mia-ps.inrae.fr.html}
+}
+
+@online{anakokDisentanglingStructureEcological2022,
  title = {Disentangling the Structure of Ecological Bipartite Networks from Observation Processes},
  author = {Anakok, Emre and Barbillon, Pierre and Fontaine, Colin and Thebault, Elisa},
-  year = {2022},
-  month = nov,
-  number = {arXiv:2211.16364},
+  date = {2022-11-29},
  eprint = {2211.16364},
-  primaryclass = {stat},
-  publisher = {{arXiv}},
+  eprinttype = {arxiv},
+  eprintclass = {stat},
+  url = {http://arxiv.org/abs/2211.16364},
  urldate = {2023-06-14},
  abstract = {The structure of a bipartite interaction network can be described by providing a clustering for each of the two types of nodes. Such clusterings are outputted by fitting a Latent Block Model (LBM) on an observed network that comes from a sampling of species interactions in the field. However, the sampling is limited and possibly uneven. This may jeopardize the fit of the LBM and then the description of the structure of the network by detecting structures which result from the sampling and not from actual underlying ecological phenomena. If the observed interaction network consists of a weighted bipartite network where the number of observed interactions between two species is available, the sampling efforts for all species can be estimated and used to correct the LBM fit. We propose to combine an observation model that accounts for sampling and an LBM for describing the structure of underlying possible ecological interactions. We develop an original inference procedure for this model, the efficiency of which is demonstrated in simulation studies. The practical interest in ecology of our model is highlighted on a large dataset of plant-pollinator network.},
-  archiveprefix = {arxiv},
  langid = {english},
+  pubstate = {preprint},
  keywords = {Statistics - Methodology},
  file = {/home/polarolouis/Zotero/storage/LQ3FINZG/Anakok et al. - 2022 - Disentangling the structure of ecological bipartit.pdf}
 }

@article{aubertModelbasedBiclusteringOverdispersed2021,
  title = {Model-Based Biclustering for Overdispersed Count Data with Application in Microbial Ecology},
-  author = {Aubert, Julie and Schbath, Sophie and Robin, St{\'e}phane},
-  year = {2021},
-  journal = {Methods in Ecology and Evolution},
+  author = {Aubert, Julie and Schbath, Sophie and Robin, Stéphane},
+  date = {2021},
+  journaltitle = {Methods in Ecology and Evolution},
  volume = {12},
  number = {6},
  pages = {1050--1061},
  issn = {2041-210X},
  doi = {10.1111/2041-210X.13582},
+  url = {https://onlinelibrary.wiley.com/doi/abs/10.1111/2041-210X.13582},
  urldate = {2023-06-22},
-  abstract = {Different studies have shown that microbial communities living in animals (humans included), in or around plants have a significant impact on health and disease of their host and on various services, such as adaptation under stressing environment. The basic input data to study microbiomes is a matrix representing abundance data of micro-organisms across different sampling units. Such a matrix typically corresponds to taxonomic profiles derived from the high-throughput sequencing of environmental samples. Biclustering is one way to study the interactions between the structure of micro-organism communities and the environmental samples they come from. We propose a latent block model (LBM) and an associated inference procedure for the biclustering of rows and columns of abundance matrices. The LBM assumes that micro-organisms (rows) and environmental samples (columns) can both be clustered into groups characterizing preferential interaction or avoidance. We use the Poisson\textendash Gamma distribution to model the overdispersion observed in microbial abundance data and introduce row and column effects to account for the sequencing effort in each sample and the mean abundance of each micro-organism. Because the latent variables are not independent conditionally on the observed ones, classical maximum likelihood inference is intractable. We then derive a variational-based inference algorithm and propose a strategy to select the number of biclusters. We illustrate the flexibility and performance of our approach both on a simulation study and on three ecological datasets. The model-based framework allows us to adapt to peculiarities of microbial ecological abundance data and allows us to explore relationships between entities of two different natures. We implemented our method in the cobiclust R package available on the CRAN and built a website with example of usage (https://julieaubert.github.io/cobiclust/cobiclust-example1.html).},
-  copyright = {\textcopyright{} 2021 British Ecological Society},
+  abstract = {Different studies have shown that microbial communities living in animals (humans included), in or around plants have a significant impact on health and disease of their host and on various services, such as adaptation under stressing environment. The basic input data to study microbiomes is a matrix representing abundance data of micro-organisms across different sampling units. Such a matrix typically corresponds to taxonomic profiles derived from the high-throughput sequencing of environmental samples. Biclustering is one way to study the interactions between the structure of micro-organism communities and the environmental samples they come from. We propose a latent block model (LBM) and an associated inference procedure for the biclustering of rows and columns of abundance matrices. The LBM assumes that micro-organisms (rows) and environmental samples (columns) can both be clustered into groups characterizing preferential interaction or avoidance. We use the Poisson–Gamma distribution to model the overdispersion observed in microbial abundance data and introduce row and column effects to account for the sequencing effort in each sample and the mean abundance of each micro-organism. Because the latent variables are not independent conditionally on the observed ones, classical maximum likelihood inference is intractable. We then derive a variational-based inference algorithm and propose a strategy to select the number of biclusters. We illustrate the flexibility and performance of our approach both on a simulation study and on three ecological datasets. The model-based framework allows us to adapt to peculiarities of microbial ecological abundance data and allows us to explore relationships between entities of two different natures. We implemented our method in the cobiclust R package available on the CRAN and built a website with example of usage (https://julieaubert.github.io/cobiclust/cobiclust-example1.html).},
  langid = {english},
-  keywords = {count data,latent block model,metabarcoding,microbial interactions,model-based biclustering,Poisson\textendash Gamma distribution,variational EM algorithm},
+  keywords = {count data,latent block model,metabarcoding,microbial interactions,model-based biclustering,Poisson–Gamma distribution,variational EM algorithm},
  file = {/home/polarolouis/Zotero/storage/A4V9MJAF/Aubert et al. - 2021 - Model-based biclustering for overdispersed count d.pdf}
 }

@article{biernackiAssessingMixtureModel2000,
  title = {Assessing a Mixture Model for Clustering with the Integrated Completed Likelihood},
  author = {Biernacki, C. and Celeux, G. and Govaert, G.},
-  year = {2000},
-  month = jul,
-  journal = {IEEE Transactions on Pattern Analysis and Machine Intelligence},
+  date = {2000-07},
+  journaltitle = {IEEE Transactions on Pattern Analysis and Machine Intelligence},
  volume = {22},
  number = {7},
  pages = {719--725},
  issn = {1939-3539},
  doi = {10.1109/34.865189},
  abstract = {We propose an assessing method of mixture model in a cluster analysis setting with integrated completed likelihood. For this purpose, the observed data are assigned to unknown clusters using a maximum a posteriori operator. Then, the integrated completed likelihood (ICL) is approximated using the Bayesian information criterion (BIC). Numerical experiments on simulated and real data of the resulting ICL criterion show that it performs well both for choosing a mixture model and a relevant number of clusters. In particular, ICL appears to be more robust than BIC to violation of some of the mixture model assumptions and it can select a number of dusters leading to a sensible partitioning of the data.},
+  eventtitle = {{{IEEE Transactions}} on {{Pattern Analysis}} and {{Machine Intelligence}}},
  keywords = {Bayesian methods,Context modeling,Gaussian distribution,Numerical simulation,Probability distribution,Robustness},
  file = {/home/polarolouis/Zotero/storage/MK9H446U/Biernacki et al. - 2000 - Assessing a mixture model for clustering with the .pdf}
 }
@ -52,35 +58,34 @@
@article{celisseConsistencyMaximumlikelihoodVariational2012,
  title = {Consistency of Maximum-Likelihood and Variational Estimators in the Stochastic Block Model},
  author = {Celisse, Alain and Daudin, Jean-Jacques and Pierre, Laurent},
-  year = {2012},
-  month = jan,
-  journal = {Electronic Journal of Statistics},
+  date = {2012-01},
+  journaltitle = {Electronic Journal of Statistics},
  volume = {6},
-  number = {none},
  pages = {1847--1899},
  publisher = {{Institute of Mathematical Statistics and Bernoulli Society}},
  issn = {1935-7524, 1935-7524},
  doi = {10.1214/12-EJS729},
+  url = {https://projecteuclid.org/journals/electronic-journal-of-statistics/volume-6/issue-none/Consistency-of-maximum-likelihood-and-variational-estimators-in-the-stochastic/10.1214/12-EJS729.full},
  urldate = {2023-06-06},
  abstract = {The stochastic block model (SBM) is a probabilistic model designed to describe heterogeneous directed and undirected graphs. In this paper, we address the asymptotic inference in SBM by use of maximum-likelihood and variational approaches. The identifiability of SBM is proved while asymptotic properties of maximum-likelihood and variational estimators are derived. In particular, the consistency of these estimators is settled for the probability of an edge between two vertices (and for the group proportions at the price of an additional assumption), which is to the best of our knowledge the first result of this type for variational estimators in random graphs.},
+  issue = {none},
  keywords = {62E17,62G05,62G20,62H30,Concentration inequalities,consistency,maximum likelihood estimators,Random graphs,Stochastic block model,variational estimators},
  file = {/home/polarolouis/Zotero/storage/JNWRIYKG/celisse2012.pdf.pdf;/home/polarolouis/Zotero/storage/XG463B5I/Celisse et al. - 2012 - Consistency of maximum-likelihood and variational .pdf}
 }

-@misc{chabert-liddellLearningCommonStructures2023,
-  type = {Article},
+@online{chabert-liddellLearningCommonStructures2023,
+  type = {article},
  title = {Learning Common Structures in a Collection of Networks. {{An}} Application to Food Webs},
-  author = {{Chabert-Liddell}, Saint-Clair and Barbillon, Pierre and Donnet, Sophie},
-  year = {2023},
-  month = mar,
-  number = {arXiv:2206.00560},
+  author = {Chabert-Liddell, Saint-Clair and Barbillon, Pierre and Donnet, Sophie},
+  date = {2023-03-27},
  eprint = {2206.00560},
-  primaryclass = {stat},
-  publisher = {{arXiv}},
+  eprinttype = {arxiv},
+  eprintclass = {stat},
  doi = {10.48550/arXiv.2206.00560},
+  url = {http://arxiv.org/abs/2206.00560},
  urldate = {2023-05-22},
  abstract = {Let a collection of networks represent interactions within several (social or ecological) systems. We pursue two objectives: identifying similarities in the topological structures that are held in common between the networks and clustering the collection into sub-collections of structurally homogeneous networks. We tackle these two questions with a probabilistic model based approach. We propose an extension of the Stochastic Block Model (SBM) adapted to the joint modeling of a collection of networks. The networks in the collection are assumed to be independent realizations of SBMs. The common connectivity structure is imposed through the equality of some parameters. The model parameters are estimated with a variational Expectation-Maximization (EM) algorithm. We derive an ad-hoc penalized likelihood criterion to select the number of blocks and to assess the adequacy of the consensus found between the structures of the different networks. This same criterion can also be used to cluster networks on the basis of their connectivity structure. It thus provides a partition of the collection into subsets of structurally homogeneous networks. The relevance of our proposition is assessed on two collections of ecological networks. First, an application to three stream food webs reveals the homogeneity of their structures and the correspondence between groups of species in different ecosystems playing equivalent ecological roles. Moreover, the joint analysis allows a finer analysis of the structure of smaller networks. Second, we cluster 67 food webs according to their connectivity structures and demonstrate that five mesoscale structures are sufficient to describe this collection.},
-  archiveprefix = {arxiv},
+  pubstate = {preprint},
  keywords = {Statistics - Applications,Statistics - Methodology},
  file = {/home/polarolouis/Zotero/storage/M74TXGCF/Chabert-Liddell et al. - 2023 - Learning common structures in a collection of netw.pdf;/home/polarolouis/Zotero/storage/A35M8KNP/2206.html}
 }
@ -88,16 +93,17 @@
@article{daudinMixtureModelRandom2008,
  title = {A Mixture Model for Random Graphs},
  author = {Daudin, J.-J. and Picard, F. and Robin, S.},
-  year = {2008},
-  month = jun,
-  journal = {Statistics and Computing},
+  date = {2008-06-01},
+  journaltitle = {Statistics and Computing},
+  shortjournal = {Stat Comput},
  volume = {18},
  number = {2},
  pages = {173--183},
  issn = {1573-1375},
  doi = {10.1007/s11222-007-9046-7},
+  url = {https://doi.org/10.1007/s11222-007-9046-7},
  urldate = {2023-06-16},
-  abstract = {The Erd\"os\textendash R\'enyi model of a network is simple and possesses many explicit expressions for average and asymptotic properties, but it does not fit well to real-world networks. The vertices of those networks are often structured in unknown classes (functionally related proteins or social communities) with different connectivity properties. The stochastic block structures model was proposed for this purpose in the context of social sciences, using a Bayesian approach. We consider the same model in a frequentest statistical framework. We give the degree distribution and the clustering coefficient associated with this model, a variational method to estimate its parameters and a model selection criterion to select the number of classes. This estimation procedure allows us to deal with large networks containing thousands of vertices. The method is used to uncover the modular structure of a network of enzymatic reactions.},
+  abstract = {The Erdös–Rényi model of a network is simple and possesses many explicit expressions for average and asymptotic properties, but it does not fit well to real-world networks. The vertices of those networks are often structured in unknown classes (functionally related proteins or social communities) with different connectivity properties. The stochastic block structures model was proposed for this purpose in the context of social sciences, using a Bayesian approach. We consider the same model in a frequentest statistical framework. We give the degree distribution and the clustering coefficient associated with this model, a variational method to estimate its parameters and a model selection criterion to select the number of classes. This estimation procedure allows us to deal with large networks containing thousands of vertices. The method is used to uncover the modular structure of a network of enzymatic reactions.},
  langid = {english},
  keywords = {Mixture models,Random graphs,Variational~method},
  file = {/home/polarolouis/Zotero/storage/439HK27B/Daudin et al. - 2008 - A mixture model for random graphs.pdf;/home/polarolouis/Zotero/storage/HVVF5MNY/daudin2007.pdf.pdf}
@ -105,34 +111,35 @@

@article{desjardins-proulxEcologicalInteractionsNetflix2017,
  title = {Ecological Interactions and the {{Netflix}} Problem},
-  author = {{Desjardins-Proulx}, Philippe and Laigle, Idaline and Poisot, Timoth{\'e}e and Gravel, Dominique},
-  year = {2017},
-  month = aug,
-  journal = {PeerJ},
+  author = {Desjardins-Proulx, Philippe and Laigle, Idaline and Poisot, Timothée and Gravel, Dominique},
+  date = {2017-08-10},
+  journaltitle = {PeerJ},
+  shortjournal = {PeerJ},
  volume = {5},
  pages = {e3644},
  publisher = {{PeerJ Inc.}},
  issn = {2167-8359},
  doi = {10.7717/peerj.3644},
+  url = {https://peerj.com/articles/3644},
  urldate = {2023-06-15},
-  abstract = {Species interactions are a key component of ecosystems but we generally have an incomplete picture of who-eats-who in a given community. Different techniques have been devised to predict species interactions using theoretical models or abundances. Here, we explore the K nearest neighbour approach, with a special emphasis on recommendation, along with a supervised machine learning technique. Recommenders are algorithms developed for companies like Netflix to predict whether a customer will like a product given the preferences of similar customers. These machine learning techniques are well-suited to study binary ecological interactions since they focus on positive-only data. By removing a prey from a predator, we find that recommenders can guess the missing prey around 50\% of the times on the first try, with up to 881 possibilities. Traits do not improve significantly the results for the K nearest neighbour, although a simple test with a supervised learning approach (random forests) show we can predict interactions with high accuracy using only three traits per species. This result shows that binary interactions can be predicted without regard to the ecological community given only three variables: body mass and two variables for the species' phylogeny. These techniques are complementary, as recommenders can predict interactions in the absence of traits, using only information about other species' interactions, while supervised learning algorithms such as random forests base their predictions on traits only but do not exploit other species' interactions. Further work should focus on developing custom similarity measures specialized for ecology to improve the KNN algorithms and using richer data to capture indirect relationships between species.},
+  abstract = {Species interactions are a key component of ecosystems but we generally have an incomplete picture of who-eats-who in a given community. Different techniques have been devised to predict species interactions using theoretical models or abundances. Here, we explore the K nearest neighbour approach, with a special emphasis on recommendation, along with a supervised machine learning technique. Recommenders are algorithms developed for companies like Netflix to predict whether a customer will like a product given the preferences of similar customers. These machine learning techniques are well-suited to study binary ecological interactions since they focus on positive-only data. By removing a prey from a predator, we find that recommenders can guess the missing prey around 50\% of the times on the first try, with up to 881 possibilities. Traits do not improve significantly the results for the K nearest neighbour, although a simple test with a supervised learning approach (random forests) show we can predict interactions with high accuracy using only three traits per species. This result shows that binary interactions can be predicted without regard to the ecological community given only three variables: body mass and two variables for the species’ phylogeny. These techniques are complementary, as recommenders can predict interactions in the absence of traits, using only information about other species’ interactions, while supervised learning algorithms such as random forests base their predictions on traits only but do not exploit other species’ interactions. Further work should focus on developing custom similarity measures specialized for ecology to improve the KNN algorithms and using richer data to capture indirect relationships between species.},
  langid = {english},
  file = {/home/polarolouis/Zotero/storage/3L7JALP4/Desjardins-Proulx et al. - 2017 - Ecological interactions and the Netflix problem.pdf}
 }

@article{doreRelativeEffectsAnthropogenic2021,
  title = {Relative Effects of Anthropogenic Pressures, Climate, and Sampling Design on the Structure of Pollination Networks at the Global Scale},
-  author = {Dor{\'e}, Ma{\"e}l and Fontaine, Colin and Th{\'e}bault, Elisa},
-  year = {2021},
-  journal = {Global Change Biology},
+  author = {Doré, Maël and Fontaine, Colin and Thébault, Elisa},
+  date = {2021},
+  journaltitle = {Global Change Biology},
  volume = {27},
  number = {6},
  pages = {1266--1280},
  issn = {1365-2486},
  doi = {10.1111/gcb.15474},
+  url = {https://onlinelibrary.wiley.com/doi/abs/10.1111/gcb.15474},
  urldate = {2023-06-21},
-  abstract = {Pollinators provide crucial ecosystem services that underpin to wild plant reproduction and yields of insect-pollinated crops. Understanding the relative impacts of anthropogenic pressures and climate on the structure of plant\textendash pollinator interaction networks is vital considering ongoing global change and pollinator decline. Our ability to predict the consequences of global change for pollinator assemblages worldwide requires global syntheses, but these analytical approaches may be hindered by variable methods among studies that either invalidate comparisons or mask biological phenomena. Here we conducted a synthetic analysis that assesses the relative impact of anthropogenic pressures and climatic variability, and accounts for heterogeneity in sampling methodology to reveal network responses at the global scale. We analyzed an extensive dataset, comprising 295 networks over 123 locations all over the world, and reporting over 50,000 interactions between flowering plant species and their insect visitors. Our study revealed that anthropogenic pressures correlate with an increase in generalism in pollination networks while pollinator richness and taxonomic composition are more related to climatic variables with an increase in dipteran pollinator richness associated with cooler temperatures. The contrasting response of species richness and generalism of the plant\textendash pollinator networks stresses the importance of considering interaction network structure alongside diversity in ecological monitoring. In addition, differences in sampling design explained more variation than anthropogenic pressures or climate on both pollination networks richness and generalism, highlighting the crucial need to report and incorporate sampling design in macroecological comparative studies of pollination networks. As a whole, our study reveals a potential human impact on pollination networks at a global scale. However, further research is needed to evaluate potential consequences of loss of specialist species and their unique ecological interactions and evolutionary pathways on the ecosystem pollination function at a global scale.},
-  copyright = {\textcopyright{} 2020 John Wiley \& Sons Ltd},
+  abstract = {Pollinators provide crucial ecosystem services that underpin to wild plant reproduction and yields of insect-pollinated crops. Understanding the relative impacts of anthropogenic pressures and climate on the structure of plant–pollinator interaction networks is vital considering ongoing global change and pollinator decline. Our ability to predict the consequences of global change for pollinator assemblages worldwide requires global syntheses, but these analytical approaches may be hindered by variable methods among studies that either invalidate comparisons or mask biological phenomena. Here we conducted a synthetic analysis that assesses the relative impact of anthropogenic pressures and climatic variability, and accounts for heterogeneity in sampling methodology to reveal network responses at the global scale. We analyzed an extensive dataset, comprising 295 networks over 123 locations all over the world, and reporting over 50,000 interactions between flowering plant species and their insect visitors. Our study revealed that anthropogenic pressures correlate with an increase in generalism in pollination networks while pollinator richness and taxonomic composition are more related to climatic variables with an increase in dipteran pollinator richness associated with cooler temperatures. The contrasting response of species richness and generalism of the plant–pollinator networks stresses the importance of considering interaction network structure alongside diversity in ecological monitoring. In addition, differences in sampling design explained more variation than anthropogenic pressures or climate on both pollination networks richness and generalism, highlighting the crucial need to report and incorporate sampling design in macroecological comparative studies of pollination networks. As a whole, our study reveals a potential human impact on pollination networks at a global scale. However, further research is needed to evaluate potential consequences of loss of specialist species and their unique ecological interactions and evolutionary pathways on the ecosystem pollination function at a global scale.},
  langid = {english},
  keywords = {anthropogenic pressures,climate,connectance,data,generalism,human impacts,plant-pollinator,pollination networks,richness,sampling effects,specialization},
  file = {/home/polarolouis/Zotero/storage/89ZXBJQP/10.1111@gcb.15474.pdf.pdf;/home/polarolouis/Zotero/storage/IVR6RGG7/Doré et al. - 2021 - Relative effects of anthropogenic pressures, clima.pdf;/home/polarolouis/Zotero/storage/WSJ4DV98/gcb.html}
@ -141,31 +148,31 @@
@article{govaertEMAlgorithmBlock2005,
  title = {An {{EM}} Algorithm for the Block Mixture Model},
  author = {Govaert, G. and Nadif, M.},
-  year = {2005},
-  month = apr,
-  journal = {IEEE Transactions on Pattern Analysis and Machine Intelligence},
+  date = {2005-04},
+  journaltitle = {IEEE Transactions on Pattern Analysis and Machine Intelligence},
  volume = {27},
  number = {4},
  pages = {643--647},
  issn = {1939-3539},
  doi = {10.1109/TPAMI.2005.69},
  abstract = {Although many clustering procedures aim to construct an optimal partition of objects or, sometimes, of variables, there are other methods, called block clustering methods, which consider simultaneously the two sets and organize the data into homogeneous blocks. Recently, we have proposed a new mixture model called block mixture model which takes into account this situation. This model allows one to embed simultaneous clustering of objects and variables in a mixture approach. We have studied this probabilistic model under the classification likelihood approach and developed a new algorithm for simultaneous partitioning based on the classification EM algorithm. In this paper, we consider the block clustering problem under the maximum likelihood approach and the goal of our contribution is to estimate the parameters of this model. Unfortunately, the application of the EM algorithm for the block mixture model cannot be made directly; difficulties arise due to the dependence structure in the model and approximations are required. Using a variational approximation, we propose a generalized EM algorithm to estimate the parameters of the block mixture model and, to illustrate our approach, we study the case of binary data by using a Bernoulli block mixture.},
+  eventtitle = {{{IEEE Transactions}} on {{Pattern Analysis}} and {{Machine Intelligence}}},
  keywords = {Approximation algorithms,Classification algorithms,Clustering algorithms,Clustering methods,Data mining,EM algorithm,Index Terms- Block mixture model,Maximum likelihood estimation,Parameter estimation,Partitioning algorithms,Self organizing feature maps,Sparse matrices,variational approximation.},
  file = {/home/polarolouis/Zotero/storage/6IG45HH2/govaert2005.pdf.pdf;/home/polarolouis/Zotero/storage/TL8M3XRF/Govaert et Nadif - 2005 - An EM algorithm for the block mixture model.pdf;/home/polarolouis/Zotero/storage/2Y48IB26/1401917.html}
 }

@article{govaertLatentBlockModel2010,
  title = {Latent {{Block Model}} for {{Contingency Table}}},
-  author = {Govaert, G{\'e}rard and Nadif, Mohamed},
-  year = {2010},
-  month = jan,
-  journal = {Communications in Statistics - Theory and Methods},
+  author = {Govaert, Gérard and Nadif, Mohamed},
+  date = {2010-01-13},
+  journaltitle = {Communications in Statistics - Theory and Methods},
  volume = {39},
  number = {3},
  pages = {416--425},
  publisher = {{Taylor \& Francis}},
  issn = {0361-0926},
  doi = {10.1080/03610920903140197},
+  url = {https://doi.org/10.1080/03610920903140197},
  urldate = {2023-06-15},
  abstract = {Although many clustering procedures aim to construct an optimal partition of objects or, sometimes, variables, there are other methods, called block clustering methods, which simultaneously consider the two sets and organize the data into homogeneous blocks. This kind of method has practical importance in a wide variety of applications such as text and market basket data analysis. Typically, the data that arise in these applications are arranged as a two-way contingency table. Using Poisson distributions, a latent block model for these data is proposed and, setting it under the maximum likelihood approach and the classification maximum likelihood approach, various algorithms are provided. Their performances are evaluated and compared to a simple use of EM or CEM applied separately on the rows and columns of the contingency table.},
  keywords = {62H17,62H30,Block clustering,Block Poisson mixture model,CEM algorithm,Contingency table,EM algorithm},
@ -176,14 +183,15 @@
  title = {Stochastic Blockmodels: {{First}} Steps},
  shorttitle = {Stochastic Blockmodels},
  author = {Holland, Paul W. and Laskey, Kathryn Blackmond and Leinhardt, Samuel},
-  year = {1983},
-  month = jun,
-  journal = {Social Networks},
+  date = {1983-06-01},
+  journaltitle = {Social Networks},
+  shortjournal = {Social Networks},
  volume = {5},
  number = {2},
  pages = {109--137},
  issn = {0378-8733},
  doi = {10.1016/0378-8733(83)90021-7},
+  url = {https://www.sciencedirect.com/science/article/pii/0378873383900217},
  urldate = {2023-06-15},
  abstract = {A stochastic model is proposed for social networks in which the actors in a network are partitioned into subgroups called blocks. The model provides a stochastic generalization of the blockmodel. Estimation techniques are developed for the special case of a single relation social network, with blocks specified a priori. An extension of the model allows for tendencies toward reciprocation of ties beyond those explained by the partition. The extended model provides a one degree-of-freedom test of the model. A numerical example from the social network literature is used to illustrate the methods.},
  langid = {english},
@ -192,46 +200,45 @@

@article{kaszewska-gilasGlobalStudiesHostParasite2021,
  title = {Global {{Studies}} of the {{Host-Parasite Relationships}} between {{Ectoparasitic Mites}} of the {{Family Syringophilidae}} and {{Birds}} of the {{Order Columbiformes}}},
-  author = {{Kaszewska-Gilas}, Katarzyna and Kosicki, Jakub Ziemowit and Hromada, Martin and Skoracki, Maciej},
-  year = {2021},
-  month = dec,
-  journal = {Animals},
+  author = {Kaszewska-Gilas, Katarzyna and Kosicki, Jakub Ziemowit and Hromada, Martin and Skoracki, Maciej},
+  date = {2021-12},
+  journaltitle = {Animals},
  volume = {11},
  number = {12},
  pages = {3392},
  publisher = {{Multidisciplinary Digital Publishing Institute}},
  issn = {2076-2615},
  doi = {10.3390/ani11123392},
+  url = {https://www.mdpi.com/2076-2615/11/12/3392},
  urldate = {2023-06-15},
-  abstract = {The quill mites belonging to the family Syringophilidae (Acari: Prostigmata: Cheyletoidea) are obligate ectoparasites of birds. They inhabit different types of the quills, where they spend their whole life cycle. In this paper, we conducted a global study of syringophilid mites associated with columbiform birds. We examined 772 pigeon and dove individuals belonging to 112 species (35\% world fauna) from all zoogeographical regions (except Madagascan) where Columbiformes occur. We measured the prevalence (IP) and the confidence interval (CI) for all infested host species. IP ranges between 4.2 and 66.7 (CI 0.2\textendash 100). We applied a bipartite analysis to determine host\textendash parasite interaction, network indices, and host specificity on species and whole network levels. The Syringophilidae\textendash Columbiformes network was composed of 25 mite species and 65 host species. The bipartite network was characterized by a high network level specialization H2{${'}$} = 0.93, high nestedness N = 0.908, connectance C = 0.90, and high modularity Q = 0.83, with 20 modules. Moreover, we reconstructed the phylogeny of the quill mites associated with columbiform birds on the generic level. Analysis shows two distinct clades: Meitingsunes + Psittaciphilus, and Peristerophila + Terratosyringophilus.},
-  copyright = {http://creativecommons.org/licenses/by/3.0/},
+  abstract = {The quill mites belonging to the family Syringophilidae (Acari: Prostigmata: Cheyletoidea) are obligate ectoparasites of birds. They inhabit different types of the quills, where they spend their whole life cycle. In this paper, we conducted a global study of syringophilid mites associated with columbiform birds. We examined 772 pigeon and dove individuals belonging to 112 species (35\% world fauna) from all zoogeographical regions (except Madagascan) where Columbiformes occur. We measured the prevalence (IP) and the confidence interval (CI) for all infested host species. IP ranges between 4.2 and 66.7 (CI 0.2–100). We applied a bipartite analysis to determine host–parasite interaction, network indices, and host specificity on species and whole network levels. The Syringophilidae–Columbiformes network was composed of 25 mite species and 65 host species. The bipartite network was characterized by a high network level specialization H2′ = 0.93, high nestedness N = 0.908, connectance C = 0.90, and high modularity Q = 0.83, with 20 modules. Moreover, we reconstructed the phylogeny of the quill mites associated with columbiform birds on the generic level. Analysis shows two distinct clades: Meitingsunes + Psittaciphilus, and Peristerophila + Terratosyringophilus.},
+  issue = {12},
  langid = {english},
  keywords = {Acari,biodiversity,bipartite-example,network,pigeons and doves,quill mites},
  file = {/home/polarolouis/Zotero/storage/VXVQ5CPH/Kaszewska-Gilas et al. - 2021 - Global Studies of the Host-Parasite Relationships .pdf}
 }

-@misc{larousseDefinitionsBipartiBipartite,
-  title = {{D\'efinitions : biparti, bipartite - Dictionnaire de fran\c{c}ais Larousse}},
-  shorttitle = {{D\'efinitions}},
-  author = {Larousse, {\'E}ditions},
+@online{larousseDefinitionsBipartiBipartite,
+  title = {Définitions : biparti, bipartite - Dictionnaire de français Larousse},
+  shorttitle = {Définitions},
+  author = {Larousse, Éditions},
+  url = {https://www.larousse.fr/dictionnaires/francais/biparti/9503},
  urldate = {2023-06-17},
-  abstract = {biparti, bipartite - D\'efinitions Fran\c{c}ais : Retrouvez la d\'efinition de biparti, bipartite, ainsi que les difficult\'es... - synonymes, homonymes, difficult\'es, citations.},
-  howpublished = {https://www.larousse.fr/dictionnaires/francais/biparti/9503},
+  abstract = {biparti, bipartite - Définitions Français : Retrouvez la définition de biparti, bipartite, ainsi que les difficultés... - synonymes, homonymes, difficultés, citations.},
  langid = {french},
  file = {/home/polarolouis/Zotero/storage/MA2VH6NX/9503.html}
 }

@article{maeldoreMaelDorePollinationNetworks2020,
-  title = {{{MaelDore}}/{{Pollination}}\_networks: {{R}} Scripts for {{Dor\'e}} et al., 2020 - {{Relative}} Effects of Anthropogenic Pressures, Climate, and Sampling Design on the Structure of Pollination Networks at the Global Scale},
+  title = {{{MaelDore}}/{{Pollination}}\_networks: {{R}} Scripts for {{Doré}} et al., 2020 - {{Relative}} Effects of Anthropogenic Pressures, Climate, and Sampling Design on the Structure of Pollination Networks at the Global Scale},
  shorttitle = {{{MaelDore}}/{{Pollination}}\_networks},
  author = {MaelDore},
-  year = {2020},
-  month = nov,
+  date = {2020-11-25},
  publisher = {{Zenodo}},
  doi = {10.5281/ZENODO.4290503},
+  url = {https://zenodo.org/record/4290503},
  urldate = {2023-06-21},
-  abstract = {R scripts for Dor\'e et al., 2020 - Relative effects of anthropogenic pressures, climate, and sampling design on the structure of pollination networks at the global scale},
-  copyright = {Open Access},
+  abstract = {R scripts for Doré et al., 2020 - Relative effects of anthropogenic pressures, climate, and sampling design on the structure of pollination networks at the global scale},
  keywords = {data,plant-pollinator}
 }

@ -239,32 +246,34 @@
  title = {Bipartite Graphs in Systems Biology and Medicine: A Survey of Methods and Applications},
  shorttitle = {Bipartite Graphs in Systems Biology and Medicine},
  author = {Pavlopoulos, Georgios A and Kontou, Panagiota I and Pavlopoulou, Athanasia and Bouyioukos, Costas and Markou, Evripides and Bagos, Pantelis G},
-  year = {2018},
-  month = apr,
-  journal = {GigaScience},
+  date = {2018-04-01},
+  journaltitle = {GigaScience},
+  shortjournal = {GigaScience},
  volume = {7},
  number = {4},
  pages = {giy014},
  issn = {2047-217X},
  doi = {10.1093/gigascience/giy014},
+  url = {https://doi.org/10.1093/gigascience/giy014},
  urldate = {2023-06-15},
  abstract = {The latest advances in high-throughput techniques during the past decade allowed the systems biology field to expand significantly. Today, the focus of biologists has shifted from the study of individual biological components to the study of complex biological systems and their dynamics at a larger scale. Through the discovery of novel bioentity relationships, researchers reveal new information about biological functions and processes. Graphs are widely used to represent bioentities such as proteins, genes, small molecules, ligands, and others such as nodes and their connections as edges within a network. In this review, special focus is given to the usability of bipartite graphs and their impact on the field of network biology and medicine. Furthermore, their topological properties and how these can be applied to certain biological case studies are discussed. Finally, available methodologies and software are presented, and useful insights on how bipartite graphs can shape the path toward the solution of challenging biological problems are provided.},
  file = {/home/polarolouis/Zotero/storage/2KJFL3SB/Pavlopoulos et al. - 2018 - Bipartite graphs in systems biology and medicine .pdf;/home/polarolouis/Zotero/storage/A2Y2EGPA/pavlopoulos2018.pdf.pdf;/home/polarolouis/Zotero/storage/UK2MK5FW/pavlopoulos2018.pdf.pdf;/home/polarolouis/Zotero/storage/XP7G4PZF/4875933.html}
 }

@article{ramos-jilibertoTopologicalChangeAndean2010,
-  title = {Topological Change of {{Andean}} Plant\textendash Pollinator Networks along an Altitudinal Gradient},
-  author = {{Ramos-Jiliberto}, Rodrigo and Dom{\'i}nguez, Daniela and Espinoza, Claudia and L{\'o}pez, Gioconda and Valdovinos, Fernanda S. and Bustamante, Ramiro O. and Medel, Rodrigo},
-  year = {2010},
-  month = mar,
-  journal = {Ecological Complexity},
+  title = {Topological Change of {{Andean}} Plant–Pollinator Networks along an Altitudinal Gradient},
+  author = {Ramos-Jiliberto, Rodrigo and Domínguez, Daniela and Espinoza, Claudia and López, Gioconda and Valdovinos, Fernanda S. and Bustamante, Ramiro O. and Medel, Rodrigo},
+  date = {2010-03-01},
+  journaltitle = {Ecological Complexity},
+  shortjournal = {Ecological Complexity},
  volume = {7},
  number = {1},
  pages = {86--90},
  issn = {1476-945X},
  doi = {10.1016/j.ecocom.2009.06.001},
+  url = {https://www.sciencedirect.com/science/article/pii/S1476945X09000622},
  urldate = {2023-06-15},
-  abstract = {Pollination interaction networks exhibit structural regularities across a wide range of natural environments. Long-tailed degree distribution, nestedness, and modularity are the most prevalent topological patterns found in most bipartite networks analyzed up to day. In this work we evaluate the variation of these topological properties along an altitudinal gradient. To this end, we examined four plant\textendash pollinator networks from the Chilean Andes at 33\textdegree S, in range from 1800 to 3600m elevation. Our results indicate that network topology is strongly and systematically affected by elevation. At increasing altitude, the number of potential visitors per plant decreased, and species' degree distributions are closer to random expectations. On the other hand, the nested structure of mutualistic interactions systematically decreased with elevation, and network modularity was significantly higher than random expectations over the entire altitudinal range. In addition, at increasing elevations the pollination networks were organized in fewer and more strongly connected modules. Our results suggest that the severe abiotic conditions found at increased elevations translate into less organized pollination networks.},
+  abstract = {Pollination interaction networks exhibit structural regularities across a wide range of natural environments. Long-tailed degree distribution, nestedness, and modularity are the most prevalent topological patterns found in most bipartite networks analyzed up to day. In this work we evaluate the variation of these topological properties along an altitudinal gradient. To this end, we examined four plant–pollinator networks from the Chilean Andes at 33°S, in range from 1800 to 3600m elevation. Our results indicate that network topology is strongly and systematically affected by elevation. At increasing altitude, the number of potential visitors per plant decreased, and species’ degree distributions are closer to random expectations. On the other hand, the nested structure of mutualistic interactions systematically decreased with elevation, and network modularity was significantly higher than random expectations over the entire altitudinal range. In addition, at increasing elevations the pollination networks were organized in fewer and more strongly connected modules. Our results suggest that the severe abiotic conditions found at increased elevations translate into less organized pollination networks.},
  langid = {english},
  keywords = {bipartite-example,Chile,Complexity,Degree distribution,Modularity,Mutualistic networks,Nestedness,Power law},
  file = {/home/polarolouis/Zotero/storage/ATY3ZP2X/Ramos-Jiliberto et al. - 2010 - Topological change of Andean plant–pollinator netw.pdf;/home/polarolouis/Zotero/storage/HPBGUP65/ramos-jiliberto2010.pdf.pdf;/home/polarolouis/Zotero/storage/I33MZQQ7/ramos-jiliberto2010.pdf.pdf;/home/polarolouis/Zotero/storage/YJX8XBNW/S1476945X09000622.html}
@ -273,14 +282,15 @@
@article{snijdersEstimationPredictionStochastic1997,
  title = {Estimation and {{Prediction}} for {{Stochastic Blockmodels}} for {{Graphs}} with {{Latent Block Structure}}},
  author = {Snijders, Tom A.B. and Nowicki, Krzysztof},
-  year = {1997},
-  month = jan,
-  journal = {Journal of Classification},
+  date = {1997-01-01},
+  journaltitle = {Journal of Classification},
+  shortjournal = {J. of Classification},
  volume = {14},
  number = {1},
  pages = {75--100},
  issn = {1432-1343},
  doi = {10.1007/s003579900004},
+  url = {https://doi.org/10.1007/s003579900004},
  urldate = {2023-06-15},
  abstract = {blockmodeling for graphs is proposed. The model assumes that the vertices of the graph are partitioned into two unknown blocks and that the probability of an edge between two vertices depends only on the blocks to which they belong. Statistical procedures are derived for estimating the probabilities of edges and for predicting the block structure from observations of the edge pattern only. ML estimators can be computed using the EM algorithm, but this strategy is practical only for small graphs. A Bayesian estimator, based on the Gibbs sampling, is proposed. This estimator is practical also for large graphs. When ML estimators are used, the block structure can be predicted based on predictive likelihood. When Gibbs sampling is used, the block structure can be predicted from posterior predictive probabilities. A side result is that when the number of vertices tends to infinity while the probabilities remain constant, the block structure can be recovered correctly with probability tending to 1.},
  langid = {english},
@ -288,63 +298,66 @@
  file = {/home/polarolouis/Zotero/storage/2GYRASW5/snijders1997.pdf.pdf;/home/polarolouis/Zotero/storage/JJNQV32Y/Snijders et Nowicki - 1997 - Estimation and Prediction for Stochastic Blockmode.pdf;/home/polarolouis/Zotero/storage/LXGG9SRP/snijders1997.pdf.pdf}
 }

-@misc{thebaultDatabasePlantpollinatorNetworks2020,
+@dataset{thebaultDatabasePlantpollinatorNetworks2020,
  title = {A Database of Plant-Pollinator Networks},
-  author = {Th{\'e}bault, Elisa and Fontaine, Colin},
-  year = {2020},
-  month = dec,
+  author = {Thébault, Elisa and Fontaine, Colin},
+  date = {2020-12-01},
  publisher = {{Zenodo}},
  doi = {10.5281/zenodo.4300427},
+  url = {https://zenodo.org/record/4300427},
  urldate = {2023-06-21},
  abstract = {This database assembles different published datasets of observed interaction networks between plants and pollinators, which were extracted from articles, theses and existing online databases. Each row in the data table corresponds to an interaction between a plant and a pollinator species reported at a given site by a given publication.},
+  version = {1},
  keywords = {diversity,flower visitors,mutualistic network,plant-pollinator interaction}
 }

-@misc{thebaultelisaDatabasePlantpollinatorNetworks2020,
+@dataset{thebaultelisaDatabasePlantpollinatorNetworks2020,
  title = {A Database of Plant-Pollinator Networks},
-  author = {Th{\'e}bault, Elisa and Fontaine, Colin},
-  year = {2020},
-  month = dec,
+  author = {Thébault, Elisa and Fontaine, Colin},
+  date = {2020-12-01},
  publisher = {{Zenodo}},
  doi = {10.5281/ZENODO.4300427},
+  url = {https://zenodo.org/record/4300427},
  urldate = {2023-06-21},
  abstract = {This database assembles different published datasets of observed interaction networks between plants and pollinators, which were extracted from articles, theses and existing online databases. Each row in the data table corresponds to an interaction between a plant and a pollinator species reported at a given site by a given publication.},
-  copyright = {Creative Commons Attribution 4.0 International, Open Access},
+  version = {1},
  keywords = {data,diversity,flower visitors,mutualistic network,plant-pollinator,plant-pollinator interaction}
 }

-@misc{thebaultelisaDatabasePlantpollinatorNetworks2022,
+@dataset{thebaultelisaDatabasePlantpollinatorNetworks2022,
  title = {A Database of Plant-Pollinator Networks},
-  author = {Th{\'e}bault, Elisa and Fontaine, Colin},
-  year = {2022},
-  month = jun,
+  author = {Thébault, Elisa and Fontaine, Colin},
+  editora = {Doré, Maël and Parra, Santiago},
+  editoratype = {collaborator},
+  date = {2022-06-10},
  publisher = {{Zenodo}},
  doi = {10.5281/ZENODO.6630184},
+  url = {https://zenodo.org/record/6630184},
  urldate = {2023-06-21},
  abstract = {This database assembles different published datasets of observed interaction networks between plants and pollinators, which were extracted from articles, theses and existing online databases. Each row in the data table corresponds to an interaction between a plant and a pollinator species reported at a given site by a given publication.},
-  collaborator = {Dor{\'e}, Ma{\"e}l and Parra, Santiago},
-  copyright = {Creative Commons Attribution 4.0 International, Open Access},
+  version = {2},
  keywords = {data,diversity,flower visitors,mutualistic network,plant-pollinator,plant-pollinator interaction}
 }

-@misc{thebaultelisaDatabasePlantpollinatorNetworks2022a,
+@dataset{thebaultelisaDatabasePlantpollinatorNetworks2022a,
  title = {A Database of Plant-Pollinator Networks},
-  author = {Th{\'e}bault, Elisa and Fontaine, Colin},
-  year = {2022},
-  month = jun,
+  author = {Thébault, Elisa and Fontaine, Colin},
+  editora = {Doré, Maël and Parra, Santiago},
+  editoratype = {collaborator},
+  date = {2022-06-10},
  publisher = {{Zenodo}},
  doi = {10.5281/ZENODO.4300426},
+  url = {https://zenodo.org/record/4300426},
  urldate = {2023-06-21},
  abstract = {This database assembles different published datasets of observed interaction networks between plants and pollinators, which were extracted from articles, theses and existing online databases. Each row in the data table corresponds to an interaction between a plant and a pollinator species reported at a given site by a given publication.},
-  collaborator = {Dor{\'e}, Ma{\"e}l and Parra, Santiago},
-  copyright = {Creative Commons Attribution 4.0 International, Open Access},
+  version = {2},
  keywords = {data,diversity,flower visitors,mutualistic network,plant-pollinator,plant-pollinator interaction}
 }

-@misc{WebLifeEcological,
+@online{WebLifeEcological,
  title = {Web of {{Life}}: Ecological Networks Database},
+  url = {https://www.web-of-life.es/map.php},
  urldate = {2023-06-17},
-  howpublished = {https://www.web-of-life.es/map.php},
  keywords = {networks,site},
  file = {/home/polarolouis/Zotero/storage/9WZE8QLQ/map.html}
 }