Adding new files to the report

This commit is contained in:
Louis Lacoste 2023-07-03 15:10:39 +02:00
parent a11c85f09c
commit 7b898bc258
56 changed files with 1143 additions and 1515 deletions

View file

@ -1,17 +1,11 @@
--- ---
title: "Netclustering analysis with the CoOPLBM completion"
bibliography: references.bib
suppress-bibliography: true
output: output:
html_document: md_document:
toc: true citation_package: biblatex
theme: journal
pdf_document:
keep_tex: true
--- ---
```{r libraries, echo = FALSE, include=FALSE} ```{r libraries, echo = FALSE, include=FALSE}
devtools::load_all() require("colSBM")
require(aricode) require(aricode)
``` ```
@ -77,10 +71,10 @@ extract_full_reorder <- function(model_collections_list, target) {
```{r data_importation, echo = FALSE} ```{r data_importation, echo = FALSE}
# Uncompleted # Uncompleted
uncompleted_model_list <- list( uncompleted_model_list <- list(
"iid" = extract_unlist("real_data/data/dore_uncompleted_collection_clustering_nb_run_1_iid_70_networks_08-06-23-16:31:17.Rds"), "iid" = extract_unlist("data/dore_uncompleted_collection_clustering_nb_run_1_iid_70_networks_08-06-23-16:31:17.Rds"),
"pi" = extract_unlist("real_data/data/dore_uncompleted_collection_clustering_nb_run_1_pi_70_networks_08-06-23-16:52:16.Rds"), "pi" = extract_unlist("data/dore_uncompleted_collection_clustering_nb_run_1_pi_70_networks_08-06-23-16:52:16.Rds"),
"rho" = extract_unlist("real_data/data/dore_uncompleted_collection_clustering_nb_run_1_rho_70_networks_08-06-23-16:49:58.Rds"), "rho" = extract_unlist("data/dore_uncompleted_collection_clustering_nb_run_1_rho_70_networks_08-06-23-16:49:58.Rds"),
"pirho" = extract_unlist("real_data/data/dore_uncompleted_collection_clustering_nb_run_1_pirho_70_networks_08-06-23-16:41:33.Rds") "pirho" = extract_unlist("data/dore_uncompleted_collection_clustering_nb_run_1_pirho_70_networks_08-06-23-16:41:33.Rds")
) )
# Below we will need to have the netid in the same order so we choose to use the # Below we will need to have the netid in the same order so we choose to use the
@ -92,38 +86,38 @@ uncompleted_clusterings <- extract_full_reorder(uncompleted_model_list, netid_or
# 0.2 threshold # 0.2 threshold
point_2_model_list <- list( point_2_model_list <- list(
"iid" = extract_unlist("real_data/data/dore_point_2_completed_collection_clustering_nb_run_1_iid_70_networks_07-06-23-18:40:10.Rds"), "iid" = extract_unlist("data/dore_point_2_completed_collection_clustering_nb_run_1_iid_70_networks_07-06-23-18:40:10.Rds"),
"pi" = extract_unlist("real_data/data/dore_point_2_completed_collection_clustering_nb_run_1_pi_70_networks_07-06-23-19:22:19.Rds"), "pi" = extract_unlist("data/dore_point_2_completed_collection_clustering_nb_run_1_pi_70_networks_07-06-23-19:22:19.Rds"),
"rho" = extract_unlist("real_data/data/dore_point_2_completed_collection_clustering_nb_run_1_rho_70_networks_07-06-23-20:03:53.Rds"), "rho" = extract_unlist("data/dore_point_2_completed_collection_clustering_nb_run_1_rho_70_networks_07-06-23-20:03:53.Rds"),
"pirho" = extract_unlist("real_data/data/dore_point_2_completed_collection_clustering_nb_run_1_pirho_70_networks_07-06-23-21:09:12.Rds") "pirho" = extract_unlist("data/dore_point_2_completed_collection_clustering_nb_run_1_pirho_70_networks_07-06-23-21:09:12.Rds")
) )
point_2_clusterings <- extract_full_reorder(point_2_model_list, netid_order) point_2_clusterings <- extract_full_reorder(point_2_model_list, netid_order)
# 0.5 threshold # 0.5 threshold
point_5_model_list <- list( point_5_model_list <- list(
"iid" = extract_unlist("real_data/data/dore_point_5_completed_collection_clustering_nb_run_1_iid_70_networks_07-06-23-19:19:53.Rds"), "iid" = extract_unlist("data/dore_point_5_completed_collection_clustering_nb_run_1_iid_70_networks_07-06-23-19:19:53.Rds"),
"pi" = extract_unlist("real_data/data/dore_point_5_completed_collection_clustering_nb_run_1_pi_70_networks_07-06-23-21:31:20.Rds"), "pi" = extract_unlist("data/dore_point_5_completed_collection_clustering_nb_run_1_pi_70_networks_07-06-23-21:31:20.Rds"),
"rho" = extract_unlist("real_data/data/dore_point_5_completed_collection_clustering_nb_run_1_rho_70_networks_07-06-23-21:03:50.Rds"), "rho" = extract_unlist("data/dore_point_5_completed_collection_clustering_nb_run_1_rho_70_networks_07-06-23-21:03:50.Rds"),
"pirho" = extract_unlist("real_data/data/dore_point_5_completed_collection_clustering_nb_run_1_pirho_70_networks_07-06-23-21:13:10.Rds") "pirho" = extract_unlist("data/dore_point_5_completed_collection_clustering_nb_run_1_pirho_70_networks_07-06-23-21:13:10.Rds")
) )
point_5_clusterings <- extract_full_reorder(point_5_model_list, netid_order) point_5_clusterings <- extract_full_reorder(point_5_model_list, netid_order)
# Uniform re-sampled # Uniform re-sampled
random_model_list <- list( random_model_list <- list(
"iid" = extract_unlist("real_data/data/dore_random_completed_collection_clustering_nb_run_1_iid_70_networks_07-06-23-21:44:14.Rds"), "iid" = extract_unlist("data/dore_random_completed_collection_clustering_nb_run_1_iid_70_networks_07-06-23-21:44:14.Rds"),
"pi" = extract_unlist("real_data/data/dore_random_completed_collection_clustering_nb_run_1_pi_70_networks_07-06-23-22:52:47.Rds"), "pi" = extract_unlist("data/dore_random_completed_collection_clustering_nb_run_1_pi_70_networks_07-06-23-22:52:47.Rds"),
"rho" = extract_unlist("real_data/data/dore_random_completed_collection_clustering_nb_run_1_rho_70_networks_08-06-23-18:16:04.Rds"), "rho" = extract_unlist("data/dore_random_completed_collection_clustering_nb_run_1_rho_70_networks_08-06-23-18:16:04.Rds"),
"pirho" = extract_unlist("real_data/data/dore_random_completed_collection_clustering_nb_run_1_pirho_70_networks_07-06-23-23:07:08.Rds") "pirho" = extract_unlist("data/dore_random_completed_collection_clustering_nb_run_1_pirho_70_networks_07-06-23-23:07:08.Rds")
) )
random_clusterings <- extract_full_reorder(random_model_list, netid_order) random_clusterings <- extract_full_reorder(random_model_list, netid_order)
``` ```
# Context of this analysis ### Context of this analysis
After performing a netclustering on the raw data, we will see if the detect After performing a netclustering on the raw data, we will see if the detect
structure resulting in the clustering comes from the sampling effort. To test structure resulting in the clustering comes from the sampling effort. To test
this we will use the CoOPLBM model by this we will use the CoOPLBM model by
@anakokDisentanglingStructureEcological2022 to complete the data. \cite{anakokDisentanglingStructureEcological2022} to complete the data.
The CoOPLBM model assumes that the observed incidence matrix $R$ is an The CoOPLBM model assumes that the observed incidence matrix $R$ is an
element-wise product of an $M$ matrix following an LBM and an $N$ matrix which element-wise product of an $M$ matrix following an LBM and an $N$ matrix which
@ -141,7 +135,7 @@ Note that if $R_{ij} = 1$ then $\widehat{M_{ij}} = 1$
This *completed matrix* can be used in different manners to be fed to the colSBM This *completed matrix* can be used in different manners to be fed to the colSBM
model. model.
# Threshold based completions ### Threshold based completions
With the thresholds, the infered incidence matrix obtained by With the thresholds, the infered incidence matrix obtained by
CoOPLBM is used to generate a completed incidence matrix by the following CoOPLBM is used to generate a completed incidence matrix by the following
procedure : procedure :
@ -150,7 +144,7 @@ $$X_{ij} = \begin{cases}
0 & \text{else} \\ 0 & \text{else} \\
\end{cases}$$ \end{cases}$$
## 0.5 completed threshold #### 0.5 completed threshold
```{r useful-functions, echo = FALSE, include=FALSE} ```{r useful-functions, echo = FALSE, include=FALSE}
ARI_netclustering_models <- function( ARI_netclustering_models <- function(
clustering_compare, clustering_compare,
@ -183,11 +177,11 @@ knitr::kable(ARI_netclustering_models(point_5_clusterings),
In the above table, one can see the network clustering obtained after applying In the above table, one can see the network clustering obtained after applying
CoOPLBM has not much in common with the clustering of the uncompleted data. CoOPLBM has not much in common with the clustering of the uncompleted data.
### Number of sub-collections and details of each sub-collection ##### Number of sub-collections and details of each sub-collection
```{r 0.5_partition_numbers, echo = FALSE} ```{r 0.5_partition_numbers, echo = FALSE}
``` ```
## 0.2 completed threshold ### 0.2 completed threshold
The $0.2$ threshold adds a lot of interactions compared to raw matrix. The $0.2$ threshold adds a lot of interactions compared to raw matrix.
@ -197,7 +191,10 @@ knitr::kable(ARI_netclustering_models(point_2_clusterings),
) )
``` ```
# Sample based completions Same as for $0.5$, after applying CoOPLBM the obtained clustering doesn't match
the uncompleted data.
### Sample based completions
The $M$ matrix is used to sample a new $X$ matrix which elements are the The $M$ matrix is used to sample a new $X$ matrix which elements are the
realisation of Bernoulli distributions of probability $M_{i,j}$. realisation of Bernoulli distributions of probability $M_{i,j}$.

View file

@ -1,90 +1,22 @@
% Options for packages loaded elsewhere
\PassOptionsToPackage{unicode}{hyperref}
\PassOptionsToPackage{hyphens}{url}
%
\documentclass[
]{article}
\usepackage{lmodern}
\usepackage{amssymb,amsmath}
\usepackage{ifxetex,ifluatex}
\ifnum 0\ifxetex 1\fi\ifluatex 1\fi=0 % if pdftex
\usepackage[T1]{fontenc}
\usepackage[utf8]{inputenc}
\usepackage{textcomp} % provide euro and other symbols
\else % if luatex or xetex
\usepackage{unicode-math}
\defaultfontfeatures{Scale=MatchLowercase}
\defaultfontfeatures[\rmfamily]{Ligatures=TeX,Scale=1}
\fi
% Use upquote if available, for straight quotes in verbatim environments
\IfFileExists{upquote.sty}{\usepackage{upquote}}{}
\IfFileExists{microtype.sty}{% use microtype if available
\usepackage[]{microtype}
\UseMicrotypeSet[protrusion]{basicmath} % disable protrusion for tt fonts
}{}
\makeatletter
\@ifundefined{KOMAClassName}{% if non-KOMA class
\IfFileExists{parskip.sty}{%
\usepackage{parskip}
}{% else
\setlength{\parindent}{0pt}
\setlength{\parskip}{6pt plus 2pt minus 1pt}}
}{% if KOMA class
\KOMAoptions{parskip=half}}
\makeatother
\usepackage{xcolor}
\IfFileExists{xurl.sty}{\usepackage{xurl}}{} % add URL line breaks if available
\IfFileExists{bookmark.sty}{\usepackage{bookmark}}{\usepackage{hyperref}}
\hypersetup{
pdftitle={Netclustering analysis with the CoOPLBM completion},
hidelinks,
pdfcreator={LaTeX via pandoc}}
\urlstyle{same} % disable monospaced font for URLs
\usepackage[margin=1in]{geometry}
\usepackage{graphicx}
\makeatletter
\def\maxwidth{\ifdim\Gin@nat@width>\linewidth\linewidth\else\Gin@nat@width\fi}
\def\maxheight{\ifdim\Gin@nat@height>\textheight\textheight\else\Gin@nat@height\fi}
\makeatother
% Scale images if necessary, so that they will not overflow the page
% margins by default, and it is still possible to overwrite the defaults
% using explicit options in \includegraphics[width, height, ...]{}
\setkeys{Gin}{width=\maxwidth,height=\maxheight,keepaspectratio}
% Set default figure placement to htbp
\makeatletter
\def\fps@figure{htbp}
\makeatother
\setlength{\emergencystretch}{3em} % prevent overfull lines
\providecommand{\tightlist}{%
\setlength{\itemsep}{0pt}\setlength{\parskip}{0pt}}
\setcounter{secnumdepth}{-\maxdimen} % remove section numbering
\newlength{\cslhangindent}
\setlength{\cslhangindent}{1.5em}
\newenvironment{cslreferences}%
{\setlength{\parindent}{0pt}%
\everypar{\setlength{\hangindent}{\cslhangindent}}\ignorespaces}%
{\par}
\title{Netclustering analysis with the CoOPLBM completion}
\author{}
\date{\vspace{-2.5em}}
\begin{document}
\maketitle
\hypertarget{context-of-this-analysis}{% \hypertarget{context-of-this-analysis}{%
\section{Context of this analysis}\label{context-of-this-analysis}} \subsubsection{Context of this
analysis}\label{context-of-this-analysis}}
After performing a netclustering on the raw data, we will see if the After performing a netclustering on the raw data, we will see if the
detect structure resulting in the clustering comes from the sampling detect structure resulting in the clustering comes from the sampling
effort. To test this we will use the CoOPLBM model by Anakok et al. effort. To test this we will use the CoOPLBM model by
(2022) to complete the data. \cite{anakokDisentanglingStructureEcological2022} to complete the data.
The CoOPLBM model assumes that the observed incidence matrix \(R\) is an The CoOPLBM model assumes that the observed incidence matrix \(R\) is an
element-wise product of an \(M\) matrix following an LBM and an \(N\) element-wise product of an \(M\) matrix following an LBM and an \(N\)
matrix which elements follow Poisson distributions independent on \(M\). matrix which elements follow Poisson distributions independent on \(M\).
The model gives us the \(\hat{M}\) matrix, which elements are: The model gives us the \(\widehat{M}\) matrix, the elements of which
are:
\[\widehat{M_{ij}} = \mathbb{P}(M_{ij} = 1)\]
Note that if \(R_{ij} = 1\) then \(\widehat{M_{ij}} = 1\)
\begin{itemize} \begin{itemize}
\tightlist \tightlist
@ -99,7 +31,7 @@ This \emph{completed matrix} can be used in different manners to be fed
to the colSBM model. to the colSBM model.
\hypertarget{threshold-based-completions}{% \hypertarget{threshold-based-completions}{%
\section{Threshold based \subsubsection{Threshold based
completions}\label{threshold-based-completions}} completions}\label{threshold-based-completions}}
With the thresholds, the infered incidence matrix obtained by CoOPLBM is With the thresholds, the infered incidence matrix obtained by CoOPLBM is
@ -110,32 +42,69 @@ used to generate a completed incidence matrix by the following procedure
\end{cases}\] \end{cases}\]
\hypertarget{completed-threshold}{% \hypertarget{completed-threshold}{%
\subsection{0.5 completed threshold}\label{completed-threshold}} \paragraph{0.5 completed threshold}\label{completed-threshold}}
Here, the completion threshold is set to \(0.5\). Here, the completion threshold is set to \(0.5\).
\hypertarget{ari-of-networks-clustering-0.5-threshold-vs-raw-data}{% First we will compute an ARI on the collection id given by the raw data
\subsubsection{ARI of networks clustering: 0.5 threshold vs raw and the completed matrix.
data}\label{ari-of-networks-clustering-0.5-threshold-vs-raw-data}}
\begin{longtable}[]{@{}lr@{}}
\toprule
& ARI with uncompleted data\tabularnewline
\midrule
\endhead
iid & 0.1142823\tabularnewline
pi & 0.0263660\tabularnewline
rho & 0.0933340\tabularnewline
pirho & 0.2158747\tabularnewline
\bottomrule
\end{longtable}
In the above table, one can see the network clustering obtained after
applying CoOPLBM has not much in common with the clustering of the
uncompleted data.
\hypertarget{number-of-sub-collections-and-details-of-each-sub-collection}{%
\subparagraph{Number of sub-collections and details of each
sub-collection}\label{number-of-sub-collections-and-details-of-each-sub-collection}}
\hypertarget{completed-threshold-1}{%
\subsubsection{0.2 completed threshold}\label{completed-threshold-1}}
The \(0.2\) threshold adds a lot of interactions compared to raw matrix.
\begin{longtable}[]{@{}lr@{}}
\toprule
& ARI with uncompleted data\tabularnewline
\midrule
\endhead
iid & 0.0429465\tabularnewline
pi & 0.0330057\tabularnewline
rho & 0.0187305\tabularnewline
pirho & 0.0357728\tabularnewline
\bottomrule
\end{longtable}
Same as for \(0.5\), after applying CoOPLBM the obtained clustering
doesn't match the uncompleted data.
\hypertarget{sample-based-completions}{% \hypertarget{sample-based-completions}{%
\section{Sample based completions}\label{sample-based-completions}} \subsubsection{Sample based
completions}\label{sample-based-completions}}
The \(M\) matrix is used to sample a new \(X\) matrix which elements are The \(M\) matrix is used to sample a new \(X\) matrix which elements are
the realisation of Bernoulli distributions of probability \(M_{i,j}\). the realisation of Bernoulli distributions of probability \(M_{i,j}\).
\[\mathbb{P}(X_{i,j} = 1) = M_{i,j} \] \[\mathbb{P}(X_{i,j} = 1) = M_{i,j} \]
\hypertarget{references}{% \begin{longtable}[]{@{}lr@{}}
\section*{References}\label{references}} \toprule
\addcontentsline{toc}{section}{References} & ARI with uncompleted data\tabularnewline
\midrule
\hypertarget{refs}{} \endhead
\begin{cslreferences} iid & 0.0148172\tabularnewline
\leavevmode\hypertarget{ref-anakokDisentanglingStructureEcological2022}{}% pi & 0.0265793\tabularnewline
Anakok, Emre, Pierre Barbillon, Colin Fontaine, and Elisa Thebault. rho & 0.0051536\tabularnewline
2022. ``Disentangling the Structure of Ecological Bipartite Networks pirho & 0.0152299\tabularnewline
from Observation Processes.'' arXiv. \bottomrule
\url{http://arxiv.org/abs/2211.16364}. \end{longtable}
\end{cslreferences}
\end{document}

View file

@ -0,0 +1,2 @@
# Application to \cite{doreRelativeEffectsAnthropogenic2021} data
\label{sec:application-to-dorerelativeeffectsanthropogenic2021-data}

View file

@ -0,0 +1,6 @@
\hypertarget{application-to-data}{%
\section{\texorpdfstring{Application to
\cite{doreRelativeEffectsAnthropogenic2021}
data}{Application to data}}\label{application-to-data}}
\label{sec:application-to-dorerelativeeffectsanthropogenic2021-data}

View file

@ -18,8 +18,8 @@ options(knitr.table.format = function() {
```{r require_lib, echo = FALSE, include=FALSE, warning=FALSE} ```{r require_lib, echo = FALSE, include=FALSE, warning=FALSE}
require("tidyverse") require("tidyverse")
require("knitr") require("knitr")
devtools::load_all("R/") require("colSBM")
source("real_data/temporary_plot.R") source("temporary_plot.R")
``` ```
```{r pretty_matrix_print, echo = FALSE, warning=FALSE} ```{r pretty_matrix_print, echo = FALSE, warning=FALSE}
@ -98,7 +98,7 @@ alpha_print <- function(unlisted_partition) {
``` ```
```{r taxonomy_functions, echo = FALSE, warning=FALSE} ```{r taxonomy_functions, echo = FALSE, warning=FALSE}
interaction_data <- read.table(file = "real_data/data/interaction-data.txt", sep = "\t", header = TRUE) interaction_data <- read.table(file = "data/interaction-data.txt", sep = "\t", header = TRUE)
insect_orders <- unique(interaction_data$insectorder) insect_orders <- unique(interaction_data$insectorder)
plant_family <- unique(interaction_data$plantorder) plant_family <- unique(interaction_data$plantorder)
@ -243,13 +243,13 @@ taxonomy_plot <- function(data, insects_or_plants, model, stack_or_fill) {
```{r load data, echo = FALSE, include = FALSE, warning=FALSE} ```{r load data, echo = FALSE, include = FALSE, warning=FALSE}
# All results # All results
iid_unlist <- extract_unlist_reorder("real_data/data/dore_collection_clustering_nb_run1_iid_123networks_24-05-23-21:40:42.Rds") iid_unlist <- extract_unlist_reorder("data/dore_collection_clustering_nb_run1_iid_123networks_24-05-23-21:40:42.Rds")
rho_unlist <- extract_unlist_reorder("real_data/data/dore_collection_clustering_nb_run1_rho_123networks_25-05-23-13:58:30.Rds") rho_unlist <- extract_unlist_reorder("data/dore_collection_clustering_nb_run1_rho_123networks_25-05-23-13:58:30.Rds")
pi_unlist <- extract_unlist_reorder("real_data/data/dore_collection_clustering_nb_run1_pi_123networks_25-05-23-17:31:25.Rds") pi_unlist <- extract_unlist_reorder("data/dore_collection_clustering_nb_run1_pi_123networks_25-05-23-17:31:25.Rds")
pirho_unlist <- extract_unlist_reorder("real_data/data/dore_collection_clustering_nb_run1_pirho_123networks_26-05-23-19:22:55.Rds") pirho_unlist <- extract_unlist_reorder("data/dore_collection_clustering_nb_run1_pirho_123networks_26-05-23-19:22:55.Rds")
``` ```
## Clustering avec le modèle iid ## Clustering avec le modèle iid
@ -267,11 +267,11 @@ alpha_print(iid_unlist)
``` ```
### Comparaison avec des infos supplémentaires ### Comparaison avec des infos supplémentaires
```{r supinfo, echo = FALSE} ```{r supinfo, echo = FALSE}
supinfo <- readxl::read_xlsx("real_data/data/supinfo.xlsx", sheet = 2) supinfo <- readxl::read_xlsx("data/supinfo.xlsx", sheet = 2)
interaction_data <- read.table(file = "real_data/data/interaction-data.txt", sep = "\t", header = TRUE) interaction_data <- read.table(file = "data/interaction-data.txt", sep = "\t", header = TRUE)
seq_ids_network_aggreg <- unique(interaction_data$id_network_aggreg) seq_ids_network_aggreg <- unique(interaction_data$id_network_aggreg)
incidence_matrices <- readRDS(file = "real_data/data/dore-matrices.Rds") incidence_matrices <- readRDS(file = "data/dore-matrices.Rds")
names_aggreg_networks <- names(incidence_matrices) names_aggreg_networks <- names(incidence_matrices)
vectorClusteringNet <- numeric(nrow(supinfo)) vectorClusteringNet <- numeric(nrow(supinfo))
for (k in 1:length(iid_unlist)) { for (k in 1:length(iid_unlist)) {

File diff suppressed because it is too large Load diff

View file

@ -1,6 +1,6 @@
require("ggplot2") require("ggplot2")
filenames <- list.files( filenames <- list.files(
path = "./simulation/data/", path = "./Rcodes/simulation/data/",
pattern = "inference_testing_2023-05*", pattern = "inference_testing_2023-05*",
full.names = TRUE full.names = TRUE
) )

View file

View file

@ -1,7 +1,7 @@
require("bettermc") require("bettermc")
require("gtools") require("gtools")
require("tictoc") require("tictoc")
devtools::load_all("R/") require("colSBM")
# Network param # Network param
nr <- 120 nr <- 120
@ -16,13 +16,13 @@ base_pi1 <- c(0.2, 0.4, 0.4, 0)
rho1 <- rep(0.25, 4) rho1 <- rep(0.25, 4)
pi2 <- rep(0.25, 4) pi2 <- rep(0.25, 4)
base_rho2 <- c(0, 1/3, 1/3, 1/3) base_rho2 <- c(0, 1 / 3, 1 / 3, 1 / 3)
pi1 <- matrix(unlist(combinat::permn(base_pi1)), byrow = TRUE, ncol = 4) pi1 <- matrix(unlist(combinat::permn(base_pi1)), byrow = TRUE, ncol = 4)
pi1 <- pi1[!duplicated(pi1), ][1:4, ] pi1 <- pi1[!duplicated(pi1), ]
rho2 <- matrix(unlist(combinat::permn(base_rho2)), byrow = TRUE, ncol = 4) rho2 <- matrix(unlist(combinat::permn(base_rho2)), byrow = TRUE, ncol = 4)
rho2 <- rho2[!duplicated(rho2),] rho2 <- rho2[!duplicated(rho2), ]
repetition <- seq.int(3) repetition <- seq.int(3)
@ -30,8 +30,10 @@ conditions <- tidyr::crossing(epsilon_alpha, pi1, rho2, repetition)
# Filter conditions to prevent the same blocks from being empty # Filter conditions to prevent the same blocks from being empty
conditions <- conditions[ conditions <- conditions[
!apply(conditions$pi1[, 1:4] == 0 & conditions$rho2[, 1:4] == 0, !apply(
1, any), conditions$pi1[, 1:4] == 0 & conditions$rho2[, 1:4] == 0,
1, any
),
] ]
# To speed up computations and debug adding an argument based selection # To speed up computations and debug adding an argument based selection
@ -58,18 +60,20 @@ if (arg[2] > nrow(conditions) | arg[2] < 1) {
choosed_conditions <- seq.int(from = arg[1], to = arg[2]) choosed_conditions <- seq.int(from = arg[1], to = arg[2])
conditions <- conditions[choosed_conditions,] conditions <- conditions[choosed_conditions, ]
tic() tic()
results <- bettermc::mclapply(seq_len(nrow(conditions)), function(s) { results <- bettermc::mclapply(seq_len(nrow(conditions)), function(s) {
ea <- conditions[s,]$epsilon_alpha ea <- conditions[s, ]$epsilon_alpha
current_pi1 <- conditions[s, ]$pi1 current_pi1 <- conditions[s, ]$pi1
current_rho2 <- conditions[s,]$rho2 current_rho2 <- conditions[s, ]$rho2
current_alpha <- base_alpha + matrix(c( current_alpha <- base_alpha + matrix(
c(
3 * ea, 2 * ea, ea, -ea, 3 * ea, 2 * ea, ea, -ea,
2 * ea, 2 * ea, - ea, ea, 2 * ea, 2 * ea, -ea, ea,
ea, - ea, ea, 2 * ea, ea, -ea, ea, 2 * ea,
- ea, ea, 2 * ea, 0), -ea, ea, 2 * ea, 0
),
byrow = TRUE, nrow = 4, ncol = 4 byrow = TRUE, nrow = 4, ncol = 4
) )
@ -78,25 +82,25 @@ results <- bettermc::mclapply(seq_len(nrow(conditions)), function(s) {
Cpi2 <- matrix(c(rho1, current_rho2), byrow = TRUE, nrow = M) > 0 Cpi2 <- matrix(c(rho1, current_rho2), byrow = TRUE, nrow = M) > 0
netlist_generated <- list( netlist_generated <- list(
generate_bipartite_network( generate_bipartite_collection(
nr, nc, conditions[s, ]$pi1, rho1, nr, nc, conditions[s, ]$pi1, rho1,
current_alpha current_alpha, M = 1, return_memberships = TRUE
), )[[1]],
generate_bipartite_network( generate_bipartite_collection(
nr, nc, pi2, conditions[s, ]$rho2, nr, nc, pi2, conditions[s, ]$rho2,
current_alpha current_alpha, M = 1, return_memberships = TRUE
) )[[1]]
) )
netlist <- lapply(seq_along(netlist_generated), function(m) { netlist <- lapply(seq_along(netlist_generated), function(m) {
return(netlist_generated[[m]]$incidence_matrix) return(netlist_generated[[m]]$incidence_matrix)
}) })
row_clusterings <- lapply(seq_along(netlist_generated), function(m) { row_clusterings <- lapply(seq_along(netlist_generated), function(m) {
return(netlist_generated[[m]]$row_clustering) return(netlist_generated[[m]]$row_blockmemberships)
}) })
col_clusterings <- lapply(seq_along(netlist_generated), function(m) { col_clusterings <- lapply(seq_along(netlist_generated), function(m) {
return(netlist_generated[[m]]$col_clustering) return(netlist_generated[[m]]$col_blockmemberships)
}) })
full_row_clustering <- as.vector(sapply( full_row_clustering <- as.vector(sapply(

View file

@ -1,6 +1,6 @@
require("ggplot2") require("ggplot2")
filenames <- list.files( filenames <- list.files(
path = "./simulation/data/", path = "./Rcodes/simulation/data/",
pattern = "model_selection_check_batch_15mai_3_rep_", pattern = "model_selection_check_batch_15mai_3_rep_",
full.names = TRUE full.names = TRUE
) )

Binary file not shown.

After

Width:  |  Height:  |  Size: 13 KiB

BIN
figure/iid_meso_plot-1.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 32 KiB

BIN
figure/iid_meso_plot-2.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 30 KiB

BIN
figure/iid_meso_plot-3.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 17 KiB

BIN
figure/iid_meso_plot-4.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 28 KiB

BIN
figure/iid_meso_plot-5.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 26 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 24 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 26 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 22 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 21 KiB

BIN
figure/pi_meso_plot-1.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 32 KiB

BIN
figure/pi_meso_plot-2.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 29 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 17 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 17 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 16 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 15 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 26 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 28 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 19 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 20 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 17 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 16 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 17 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 18 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 21 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 18 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 21 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 26 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 20 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 26 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 23 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 46 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 45 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 41 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 42 KiB

BIN
figure/rho_meso_plot-1.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 31 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 15 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 14 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 14 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 13 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 232 KiB

File diff suppressed because one or more lines are too long

After

Width:  |  Height:  |  Size: 866 KiB

66
presentation_UMR.tex Normal file
View file

@ -0,0 +1,66 @@
\section{Présentation}
L'UMR MIA Paris-Saclay est une entité de recherche qui regroupe des
statisticiens et des informaticiens spécialisés dans la modélisation et
l'apprentissage statistique et informatique appliqués à la biologie, l'écologie,
l'environnement, l'agronomie et l'agro-alimentaire. Elle est affiliée à
AgroParisTech, INRAE et l'Université Paris Saclay.
Les membres de cette unité possèdent des compétences variées en matière de
méthodes d'inférence statistique, telles que les modèles complexes, les modèles
à variables latentes, l'inférence bayésienne, l'apprentissage et la sélection de
modèle. Ils sont également experts en algorithmique, notamment en
généralisation, transfert de domaine et représentation des connaissances.
L'objectif de cette unité est de développer des méthodes statistiques et
informatiques originales, à la fois génériques et motivées par des
problématiques spécifiques dans le domaine des sciences du vivant. Les activités
de recherche s'appuient sur une solide culture dans les disciplines cibles,
telles que l'écologie, l'environnement, l'agro-alimentaire, la biologie
moléculaire et la biologie des systèmes.
L'unité est structurée en deux équipes de recherche : SOLsTIS (Statistical
mOdelling and Learning for environnemenT and lIfe Sciences) et EkINocs (Expert
Knowledge, INteractive modellINg and learnINg for understandINg and decisiOn
makINg in dINamic Complexe Systems).
Elle est rattachée au département MATHNUM d'INRAE et au département MMIP
d'AgroParisTech.
Les responsables au sein de l'unité sont : Julien Chiquet en tant que Directeur
d'unité, Sophie Donnet en tant que Directrice d'unité adjointe, Antoine
Cornuéjols en tant que Responsable de l'équipe EkINocs, et Sophie Donnet
et Pierre Barbillon en tant que Responsables de l'équipe SOLsTIS.
\newline
\emph{Source:~\cite{AccueilMIAParisSaclay}}\\
La figure \ref{fig:organigramme-umr} présente l'organigramme complet de l'unité.
\begin{sidewaysfigure}[h!]
\begin{center}
% \includegraphics[scale=0.4]{img/Organigramme_MIA-Paris-Saclay}
\includesvg[scale=0.6]{img/Organigramme_MIA-Paris-Saclay.svg}
\caption{Organigramme de l'UMR}
\label{fig:organigramme-umr}
\end{center}
\end{sidewaysfigure}
\section{Encadrement et vie en stage}
Au cours de mon stage, j'étais encadré par Pierre Barbillon et fréquemment en
discussion avec lui et Saint-Clair Chabert-Liddell dont j'ai poursuivi les
travaux.
Le contexte de travail, au sein des ingénieurs d'études, des doctorants, des
chercheurs et des maîtres de conférences, a été pour moi très enrichissant. Ce
stage s'inscrit dans la construction de mon parcours professionnel en validant
le désir que je présentais de faire de la recherche.
J'ai particulièrement apprécié la disponibilité de toutes les personnes de
l'unité qui n'ont jamais hésité à se rendre disponible pour répondre à mes
questions.
Les nombreux séminaires et le désir de partage de connaissances à travers des
formations internes et de l'auto-formation m'a vraiment plu et m'a ouvert à de
nouvelles problématiques passionnantes.
De plus j'ai beaucoup progressé dans les domaines abordés pendant mon
stage, et cela m'a rendu confiant dans le choix de faire le
master \emph{MathSV} pour l'année scolaire 2023-2024. Ce stage a donc été
déterminant et confirme l'orientation de mon parcours professionnel.

Binary file not shown.

View file

@ -1,5 +1,4 @@
\documentclass[12pt,a4paper]{report} \documentclass[12pt,a4paper]{report}
%====En-tête==== %====En-tête====
% Ajout des packages % Ajout des packages
\usepackage[english]{babel} % pour dire que le texte est en francais \usepackage[english]{babel} % pour dire que le texte est en francais
@ -18,11 +17,18 @@
\usepackage{tikz} % For graph plots \usepackage{tikz} % For graph plots
\usepackage{caption} % Figures \usepackage{caption} % Figures
\usepackage{subcaption} % And Subfigures \usepackage{subcaption} % And Subfigures
\usepackage{longtable}
\usepackage{rotating} % For allowing to rotate figures
\usepackage{svg} % To allow svg inclusions
%% Bibliography %% Bibliography
\usepackage[style=apa,citestyle=authoryear-comp]{biblatex} \usepackage[style=apa,citestyle=authoryear-comp]{biblatex}
\addbibresource{references.bib} \addbibresource{references.bib}
%% For good md to tex conversion
\providecommand{\tightlist}{%
\setlength{\itemsep}{0pt}\setlength{\parskip}{0pt}}
\usepackage{booktabs}
%% Tikz Related %% Tikz Related
\usetikzlibrary{calc,shapes,backgrounds,arrows,automata,shadows,positioning} \usetikzlibrary{calc,shapes,backgrounds,arrows,automata,shadows,positioning}
@ -62,14 +68,26 @@
% titre et auteur % titre et auteur
\title{Rapport de stage dans l'UMR MIA Paris-Saclay} \title{Rapport de stage dans l'UMR MIA Paris-Saclay}
\author{Louis Lacoste} \author{Louis Lacoste}
\IfFileExists{upquote.sty}{\usepackage{upquote}}{}
\begin{document} \begin{document}
\maketitle \maketitle
\tableofcontents \tableofcontents
\chapter{Présentation de l'UMR} \section{Remerciements}
\chapter{Context} Je tiens à remercier en premier lieu Pierre Barbillon pour son encadrement
remarquable, sa disponibilité, ses conseils avisés et sa gentillesse.
Saint-Clair Chabert-Liddell pour son accompagnement, ses remarques,
ses explications et le temps qu'il m'a consacré. Merci à Sophie Donnet, pour les
cours et les idées qu'elle m'a donné
% TODO Compléter les remerciements
\chapter{L'UMR MIA Paris-Saclay}
\include{presentation_UMR}
\chapter{Context of the study}
\section{Usage and importance of bipartite graphs} \section{Usage and importance of bipartite graphs}
\label{sec:usage-and-importance-of-bipartite-graphs} \label{sec:usage-and-importance-of-bipartite-graphs}
@ -253,6 +271,7 @@ This model supposes that:
\end{figure} \end{figure}
Parameters Parameters
% TODO fix parameters according to presentation
\begin{itemize} \begin{itemize}
\item $Q_1 = \{{\color{blueind}\bullet},{\color{cyanind}\bullet},{\color{electricblue}\bullet}\}$ blocks in rows \item $Q_1 = \{{\color{blueind}\bullet},{\color{cyanind}\bullet},{\color{electricblue}\bullet}\}$ blocks in rows
\item $Q_2 = \{{\color{burntorange}\bullet},{\color{goldenyellow}\bullet},{\color{yellow}\bullet}\}$ blocks in columns \item $Q_2 = \{{\color{burntorange}\bullet},{\color{goldenyellow}\bullet},{\color{yellow}\bullet}\}$ blocks in columns
@ -433,7 +452,7 @@ the column dimension.
For a given number of blocks $Q_1$, $Q_2$ and matrix $S^2$ ($S^1$ being in this case the matrix full of ones), the number of For a given number of blocks $Q_1$, $Q_2$ and matrix $S^2$ ($S^1$ being in this case the matrix full of ones), the number of
parameters is: parameters is:
\begin{equation*} \begin{equation*}
\text{NP}(\pi\text{-}colBiSBM) = (Q_1 - 1) + \sum_{m=1}^{M}\Bigg( \sum_{r=1}^{Q_2} S^2_{mr} - 1 \Bigg) + \sum_{\substack{q=1,\dots,Q_1 \\ r=1,\dots,Q_2}} \mathbb{1}_{{(S^{1\prime}S^2)}_{qr}>0} \text{NP}(\rho\text{-}colBiSBM) = (Q_1 - 1) + \sum_{m=1}^{M}\Bigg( \sum_{r=1}^{Q_2} S^2_{mr} - 1 \Bigg) + \sum_{\substack{q=1,\dots,Q_1 \\ r=1,\dots,Q_2}} \mathbb{1}_{{(S^{1\prime}S^2)}_{qr}>0}
\end{equation*} \end{equation*}
$\pi\rho$-colBiSBM model still assumes that the networks share a common connectivity $\pi\rho$-colBiSBM model still assumes that the networks share a common connectivity
@ -451,7 +470,7 @@ $\rho^m_r \in \left[ 0,1 \right], \sum_{r=1}^{Q_2} \rho^m_r = 1 $.
For a given number of blocks $Q_1$, $Q_2$ and matrices $S^1$, $S^2$, the number of For a given number of blocks $Q_1$, $Q_2$ and matrices $S^1$, $S^2$, the number of
parameters is: parameters is:
\begin{equation*} \begin{equation*}
\text{NP}(\pi\text{-}colBiSBM) = \sum_{m=1}^{M}\Bigg( \sum_{q=1}^{Q_1} S^1_{mq} - 1 \Bigg) + \sum_{m=1}^{M}\Bigg( \sum_{r=1}^{Q_2} S^2_{mr} - 1 \Bigg) + \sum_{\substack{q=1,\dots,Q_1 \\ r=1,\dots,Q_2}} \mathbb{1}_{{(S^{1\prime}S^2)}_{qr}>0} \text{NP}(\pi\rho\text{-}colBiSBM) = \sum_{m=1}^{M}\Bigg( \sum_{q=1}^{Q_1} S^1_{mq} - 1 \Bigg) + \sum_{m=1}^{M}\Bigg( \sum_{r=1}^{Q_2} S^2_{mr} - 1 \Bigg) + \sum_{\substack{q=1,\dots,Q_1 \\ r=1,\dots,Q_2}} \mathbb{1}_{{(S^{1\prime}S^2)}_{qr}>0}
\end{equation*} \end{equation*}
@ -463,34 +482,67 @@ we use a variatonal version of the Expectation Maximization (VEM) algorithm.
We maximize a variational lower bound of the log-likelihood of the observed data We maximize a variational lower bound of the log-likelihood of the observed data
by approximating $p(\bm{Z,W}|\bm{X};\bm{\theta})$ with a distribution on $\bm{Z}$ by approximating $p(\bm{Z,W}|\bm{X};\bm{\theta})$ with a distribution on $\bm{Z}$
and $\bm{W}$ named $\mathcal{R}$ issued from a family of factorizable distribution and $\bm{W}$ named $\mathcal{R}$ defined as
\parencite{daudinMixtureModelRandom2008}: $\mathcal{R} = \otimes_{m=1}^M \mathcal{R}_m$.\
\[ The lower bound is defined as:
\mathcal{J}(\mathcal{R};\bm{\theta}) \coloneqq \mathbb{E}_{\mathcal{R}}[\ell(\bm{X},\bm{Z},\bm{W};\bm{\theta})] + \mathcal{H}(\bm{Z,W}) \leq \ell(\bm{X};\bm{\theta}) \begin{equation*}
\] \mathcal{J}(\mathcal{R};\bm{\theta}) \coloneqq \sum_{m=1}^{M} \bigg( \mathbb{E}_{\mathcal{R}_m}[\ell(X^m,Z^m,W^m;\bm{\theta})] + \mathcal{H}(\mathcal{R}_m) \bigg) \leq \ell(\bm{X};\bm{\theta})
$\mathcal{H}$ is the entropy of the distribution. $\bm{Z}$ and $\bm{W}$ are \end{equation*}
$\bm{Z}$ and $\bm{W}$ are
redefined using the \emph{one-hot encoded} conversion (i.e., $Z_i^m = q redefined using the \emph{one-hot encoded} conversion (i.e., $Z_i^m = q
\rightarrow Z_{iq}^m = 1$ and $W_j^m = r \rightarrow W_{jr}^m = 1$) \rightarrow Z_{iq}^m = 1$ and $W_j^m = r \rightarrow W_{jr}^m = 1$).\\ % W_{jr\prime}^m pour r != r égal 0
We define $\tau_{iq}^{1,m} = \mathbb{P}_{\mathcal{R}}(Z_{iq}^m = 1|X_{ij}^m)$
and $\tau_{jr}^{2,m} = \mathbb{P}_{\mathcal{R}}(W_{jr}^m = 1|X_{ij}^m)$ and the When $\mathcal{R}_m$ is issued from the set of the factorizable distributions,
variational approximation is we denote
$\mathbb{P}_{\mathcal{R}} (Z_{iq}^m = 1, W_{jr}^m = 1|X_{ij}^m) = $\tau_{iq}^{1,m} = \mathbb{P}_{\mathcal{R}_m}(Z_{iq}^m = 1|X_{ij}^m)$
\mathbb{P}_{\mathcal{R}}(Z_{iq}^m = 1|X_{ij}^m) {\color{red}\times} \mathbb{P}_{\mathcal{R}}(W_{jr}^m = 1|X_{ij}^m) = \tau_{iq}^{1,m} {\color{red}\times} \tau_{jr}^{2,m}$. and $\tau_{jr}^{2,m} = \mathbb{P}_{\mathcal{R}_m}(W_{jr}^m = 1|X_{ij}^m)$, thus
we have:
$\mathbb{P}_{\mathcal{R}_m} (Z_{iq}^m = 1, W_{jr}^m = 1|X_{ij}^m) =
\mathbb{P}_{\mathcal{R}_m}(Z_{iq}^m = 1|X_{ij}^m) {\color{red}\times} \mathbb{P}_{\mathcal{R}_m}(W_{jr}^m = 1|X_{ij}^m) = \tau_{iq}^{1,m} {\color{red}\times} \tau_{jr}^{2,m}$.
The formula for the entropy per network is thus:
\begin{equation*}
\mathcal{H}(\mathcal{R}_m) = - \sum_{i=1}^{n_1} \tau^{1,m}_{i,q} \log \tau^{1,m}_{i,q} - \sum_{j=1}^{n_2} \tau^{2,m}_{j,r} \log \tau^{2,m}_{j,r}
\end{equation*}
And the expectation of the completed log-likelihood under the $\mathcal{R}_m$ variational distribution for network $m$ is:
\begin{align*}
\mathbb{E}_{\mathcal{R}_m}[\ell(X^m,Z^m,W^m;\bm{\theta})] = \sum_{i = 1}^{n_1^m}\sum_{j=1}^{n_2^m}\sum_{q \in \mathcal{Q}_{1,m}} \sum_{r \in \mathcal{Q}_{2,m}} \tau^{1,m}_{i,q} \tau^{2,m}_{j,r} \log f(X^{m}_{ij}; \alpha_{qr}) \\
+ \sum_{i=1}^{n_1^m} \sum_{q \in \mathcal{Q}_{1,m}} \tau^{1,m}_{i,q} \log \pi_{\color{black}q}^{\color{gray}m} + \sum_{j=1}^{n_2^m} \sum_{r \in \mathcal{Q}_{2,m}} \tau^{2,m}_{j,r} \log \rho_{\color{black}r}^{\color{gray}m}
\end{align*}
And thus the lower bound becomes:
\begin{align*}
\mathcal{J}(\bm{\tau};\bm{\theta}) \coloneqq \sum_{m=1}^{M} \bigg(\sum_{i = 1}^{n_1^m}\sum_{j=1}^{n_2^m}\sum_{q \in \mathcal{Q}_{1,m}} \sum_{r \in \mathcal{Q}_{2,m}} \tau^{1,m}_{i,q} \tau^{2,m}_{j,r} \log f(X^{m}_{ij}; \alpha_{qr}) \\
+ \sum_{i=1}^{n_1^m} \sum_{q \in \mathcal{Q}_{1,m}} \tau^{1,m}_{i,q} \log \pi_{\color{black}q}^{\color{gray}m} + \sum_{j=1}^{n_2^m} \sum_{r \in \mathcal{Q}_{2,m}} \tau^{2,m}_{j,r} \log \rho_{\color{black}r}^{\color{gray}m} \\
- \sum_{i=1}^{n_1} \tau^{1,m}_{i,q} \log \tau^{1,m}_{i,q} - \sum_{j=1}^{n_2} \tau^{2,m}_{j,r} \log \tau^{2,m}_{j,r} \bigg) \color{black}
\end{align*}
where we identify the variational distribution $\mathcal{R}$ with its parameter
$\bm{\tau}$. \\
% \begin{equation*}
% \mathcal{J}(\mathcal{R};\bm{\theta}) \coloneqq \mathbb{E}_{\mathcal{R}}[\ell(\bm{X},\bm{Z},\bm{W};\bm{\theta})] + \mathcal{H}(\bm{Z,W}) \leq \ell(\bm{X};\bm{\theta})
% \end{equation*}
% TODO Develop the formula % TODO Develop the formula
The VEM algorithm alternates between two steps, the variational E step and the M step. The VEM algorithm alternates between two steps, the variational E step and the M step.
The E steps consists in optimizing $\mathcal{J}(\mathcal{R};\bm{\theta})$ for a The E steps consists in optimizing $\mathcal{J}(\bm{\tau};\bm{\theta})$ for a
current value of $\bm{\theta}$ with respect to $\mathcal{R}$. And the M step current value of $\bm{\theta}$ with respect to $\bm{\tau}$. And the M step
consists of maximizing $\mathcal{J}(\mathcal{R};\bm{\theta})$ with respect to consists of maximizing $\mathcal{J}(\bm{\tau};\bm{\theta})$ with respect to
$\bm{\theta}$ and for a given variational distribution $\mathcal{R}$. $\bm{\theta}$ and for a given variational distribution $\bm{\tau}$.
\subsection{Variational E step} \subsection{Variational E step}
\label{ssec:variational-e-step} \label{ssec:variational-e-step}
At this step we maximize with respect to $\bm{\tau}$: At this step we maximize with respect to the variational distribution $\bm{\tau}$:
$$\widehat{\bm{\tau}}^{(t+1)} = \arg \max_{\bm{\tau}} \mathcal{J}(\mathcal{\bm{\tau}},\bm{\widehat{\theta}}^{(t)})$$ $$\widehat{\bm{\tau}}^{(t+1)} = \arg \max_{\bm{\tau}} \mathcal{J}(\mathcal{\bm{\tau}},\bm{\widehat{\theta}}^{(t)}).$$
And we obtain the following formulae for the $\bm{\tau^m}$: And we obtain the following formulae for the $\bm{\tau^m}$:
@ -498,10 +550,11 @@ And we obtain the following formulae for the $\bm{\tau^m}$:
\widehat{\tau}_{iq}^{1,m} \propto \widehat{\pi}_{q}^{m(t)} \prod_{j=1}^{n_2^m}\prod_{r\in\mathcal{Q}_2^m} f(X_{ij}^m;\widehat{\alpha}_{qr}^{(t)})^{\widehat{\tau}_{jr}^{2,m(t+1)}} & \forall i = 1, \dots , n_1^m, q \in \mathcal{Q}_1^m \\ \widehat{\tau}_{iq}^{1,m} \propto \widehat{\pi}_{q}^{m(t)} \prod_{j=1}^{n_2^m}\prod_{r\in\mathcal{Q}_2^m} f(X_{ij}^m;\widehat{\alpha}_{qr}^{(t)})^{\widehat{\tau}_{jr}^{2,m(t+1)}} & \forall i = 1, \dots , n_1^m, q \in \mathcal{Q}_1^m \\
\widehat{\tau}_{jr}^{2,m} \propto \widehat{\rho}_{r}^{m(t)} \prod_{i=1}^{n_1^m}\prod_{q\in\mathcal{Q}_1^m} f(X_{ij}^m;\widehat{\alpha}_{qr}^{(t)})^{\widehat{\tau}_{iq}^{1,m(t+1)}} & \forall j = 1, \dots , n_2^m, r \in \mathcal{Q}_2^m \widehat{\tau}_{jr}^{2,m} \propto \widehat{\rho}_{r}^{m(t)} \prod_{i=1}^{n_1^m}\prod_{q\in\mathcal{Q}_1^m} f(X_{ij}^m;\widehat{\alpha}_{qr}^{(t)})^{\widehat{\tau}_{iq}^{1,m(t+1)}} & \forall j = 1, \dots , n_2^m, r \in \mathcal{Q}_2^m
\end{align*} \end{align*}
which are used to update iteratively the values by a fixed point algorithm with
only one step.
From the above formulae we obtain for the Bernoulli distribution:
% TODO move to technical.tex % TODO move to technical.tex
% From the above formulae we obtain for the Bernoulli distribution:
% \begin{itemize} % \begin{itemize}
% \item[-] \textit{iid} : % \item[-] \textit{iid} :
% \[ \bm{\tau}^{m,1} = ~^{t}\pi + \exp((\text{Mask}^{m} \odot A^{m}) % \[ \bm{\tau}^{m,1} = ~^{t}\pi + \exp((\text{Mask}^{m} \odot A^{m})
@ -589,7 +642,7 @@ We obtain the following expression
\] \]
with pen the penalties.\\ with pen the penalties.\\
Using the formula $\mathbb{E}_{\widehat{\mathcal{R}}} [\ell(\bm{X,Z,W;\theta})] \approx \ell (\bm{X;\theta}) - \mathcal{H(\widehat{R})}$, Using the formula $\mathbb{E}_{\widehat{\mathcal{R}}} [\ell(\bm{X,Z,W;\theta})] \approx \ell (\bm{X;\theta}) - \mathcal{H(\widehat{R})}$,
it becomes evident, as highlighted in the existing literature, that the it becomes clearer, as highlighted in the existing literature, that the
Integrated Classified Likelihood (ICL) gives preference to well-separated blocks Integrated Classified Likelihood (ICL) gives preference to well-separated blocks
by imposing a penalty on the entropy of node grouping. However, the objective of by imposing a penalty on the entropy of node grouping. However, the objective of
our study extends beyond grouping nodes into coherent blocks. We also aim to our study extends beyond grouping nodes into coherent blocks. We also aim to
@ -994,7 +1047,7 @@ networks and then compute the dissimilarity matrix between all networks of the
collection. We obtain the collection $\mathcal{G} = \{\mathcal{M}\}$ the trivial collection. We obtain the collection $\mathcal{G} = \{\mathcal{M}\}$ the trivial
partition in a unique group. partition in a unique group.
Then using the \emph{KNN} we split the collection in two sub-collections with Then using the \emph{Kmeans} we split the collection in two sub-collections with
the dissimilarity matrix. The two sub-collections are fitted and we compute the dissimilarity matrix. The two sub-collections are fitted and we compute
the score of this new partition $\mathcal{G}^{*} = \{G_1, G_2\}$. the score of this new partition $\mathcal{G}^{*} = \{G_1, G_2\}$.
@ -1002,12 +1055,17 @@ If $Sc(\mathcal{G}^{*}) > Sc(\mathcal{G})$ then we repeat the same procedure on
$G_1$ and $G_2$. Else we return $\mathcal{G}$. $G_1$ and $G_2$. Else we return $\mathcal{G}$.
We illustrate our capacity to perform a partition of a collection for all We illustrate our capacity to perform a partition of a collection for all
colBiSBM models in \ref{ssec:network-clustering-of-simulated-networks}. colBiSBM models in \ref{sec:network-clustering-of-simulated-networks}.
\section{Simulation studies}\label{sec:simulation-studies} \chapter{Simulation studies}\label{chap:simulation-studies}
\subsection{Network clustering of simulated networks}\label{ssec:network-clustering-of-simulated-networks} \section{Network clustering of simulated networks}\label{sec:network-clustering-of-simulated-networks}
\section{Application to~\cite{doreRelativeEffectsAnthropogenic2021} data}\label{sec:application-to-dorerelativeeffectsanthropogenic2021-data} \chapter{Applications}
\include{Rcodes/real_data/application_dore_data}
% \include{Rcodes/real_data/presentation_dore}
\subsection{Completing raw data using CoOPLBM \parencite{anakokDisentanglingStructureEcological2022}}
\include{Rcodes/real_data/CoOPLBM_completion_analyze}
\printbibliography \printbibliography
\listoffigures \listoffigures

View file

@ -1,50 +1,56 @@
@misc{anakokDisentanglingStructureEcological2022, @online{AccueilMIAParisSaclay,
title = {Accueil | {{MIA Paris-Saclay}}},
url = {https://mia-ps.inrae.fr/},
urldate = {2023-07-03},
file = {/home/polarolouis/Zotero/storage/I7FWTZC3/mia-ps.inrae.fr.html}
}
@online{anakokDisentanglingStructureEcological2022,
title = {Disentangling the Structure of Ecological Bipartite Networks from Observation Processes}, title = {Disentangling the Structure of Ecological Bipartite Networks from Observation Processes},
author = {Anakok, Emre and Barbillon, Pierre and Fontaine, Colin and Thebault, Elisa}, author = {Anakok, Emre and Barbillon, Pierre and Fontaine, Colin and Thebault, Elisa},
year = {2022}, date = {2022-11-29},
month = nov,
number = {arXiv:2211.16364},
eprint = {2211.16364}, eprint = {2211.16364},
primaryclass = {stat}, eprinttype = {arxiv},
publisher = {{arXiv}}, eprintclass = {stat},
url = {http://arxiv.org/abs/2211.16364},
urldate = {2023-06-14}, urldate = {2023-06-14},
abstract = {The structure of a bipartite interaction network can be described by providing a clustering for each of the two types of nodes. Such clusterings are outputted by fitting a Latent Block Model (LBM) on an observed network that comes from a sampling of species interactions in the field. However, the sampling is limited and possibly uneven. This may jeopardize the fit of the LBM and then the description of the structure of the network by detecting structures which result from the sampling and not from actual underlying ecological phenomena. If the observed interaction network consists of a weighted bipartite network where the number of observed interactions between two species is available, the sampling efforts for all species can be estimated and used to correct the LBM fit. We propose to combine an observation model that accounts for sampling and an LBM for describing the structure of underlying possible ecological interactions. We develop an original inference procedure for this model, the efficiency of which is demonstrated in simulation studies. The practical interest in ecology of our model is highlighted on a large dataset of plant-pollinator network.}, abstract = {The structure of a bipartite interaction network can be described by providing a clustering for each of the two types of nodes. Such clusterings are outputted by fitting a Latent Block Model (LBM) on an observed network that comes from a sampling of species interactions in the field. However, the sampling is limited and possibly uneven. This may jeopardize the fit of the LBM and then the description of the structure of the network by detecting structures which result from the sampling and not from actual underlying ecological phenomena. If the observed interaction network consists of a weighted bipartite network where the number of observed interactions between two species is available, the sampling efforts for all species can be estimated and used to correct the LBM fit. We propose to combine an observation model that accounts for sampling and an LBM for describing the structure of underlying possible ecological interactions. We develop an original inference procedure for this model, the efficiency of which is demonstrated in simulation studies. The practical interest in ecology of our model is highlighted on a large dataset of plant-pollinator network.},
archiveprefix = {arxiv},
langid = {english}, langid = {english},
pubstate = {preprint},
keywords = {Statistics - Methodology}, keywords = {Statistics - Methodology},
file = {/home/polarolouis/Zotero/storage/LQ3FINZG/Anakok et al. - 2022 - Disentangling the structure of ecological bipartit.pdf} file = {/home/polarolouis/Zotero/storage/LQ3FINZG/Anakok et al. - 2022 - Disentangling the structure of ecological bipartit.pdf}
} }
@article{aubertModelbasedBiclusteringOverdispersed2021, @article{aubertModelbasedBiclusteringOverdispersed2021,
title = {Model-Based Biclustering for Overdispersed Count Data with Application in Microbial Ecology}, title = {Model-Based Biclustering for Overdispersed Count Data with Application in Microbial Ecology},
author = {Aubert, Julie and Schbath, Sophie and Robin, St{\'e}phane}, author = {Aubert, Julie and Schbath, Sophie and Robin, Stéphane},
year = {2021}, date = {2021},
journal = {Methods in Ecology and Evolution}, journaltitle = {Methods in Ecology and Evolution},
volume = {12}, volume = {12},
number = {6}, number = {6},
pages = {1050--1061}, pages = {1050--1061},
issn = {2041-210X}, issn = {2041-210X},
doi = {10.1111/2041-210X.13582}, doi = {10.1111/2041-210X.13582},
url = {https://onlinelibrary.wiley.com/doi/abs/10.1111/2041-210X.13582},
urldate = {2023-06-22}, urldate = {2023-06-22},
abstract = {Different studies have shown that microbial communities living in animals (humans included), in or around plants have a significant impact on health and disease of their host and on various services, such as adaptation under stressing environment. The basic input data to study microbiomes is a matrix representing abundance data of micro-organisms across different sampling units. Such a matrix typically corresponds to taxonomic profiles derived from the high-throughput sequencing of environmental samples. Biclustering is one way to study the interactions between the structure of micro-organism communities and the environmental samples they come from. We propose a latent block model (LBM) and an associated inference procedure for the biclustering of rows and columns of abundance matrices. The LBM assumes that micro-organisms (rows) and environmental samples (columns) can both be clustered into groups characterizing preferential interaction or avoidance. We use the Poisson\textendash Gamma distribution to model the overdispersion observed in microbial abundance data and introduce row and column effects to account for the sequencing effort in each sample and the mean abundance of each micro-organism. Because the latent variables are not independent conditionally on the observed ones, classical maximum likelihood inference is intractable. We then derive a variational-based inference algorithm and propose a strategy to select the number of biclusters. We illustrate the flexibility and performance of our approach both on a simulation study and on three ecological datasets. The model-based framework allows us to adapt to peculiarities of microbial ecological abundance data and allows us to explore relationships between entities of two different natures. We implemented our method in the cobiclust R package available on the CRAN and built a website with example of usage (https://julieaubert.github.io/cobiclust/cobiclust-example1.html).}, abstract = {Different studies have shown that microbial communities living in animals (humans included), in or around plants have a significant impact on health and disease of their host and on various services, such as adaptation under stressing environment. The basic input data to study microbiomes is a matrix representing abundance data of micro-organisms across different sampling units. Such a matrix typically corresponds to taxonomic profiles derived from the high-throughput sequencing of environmental samples. Biclustering is one way to study the interactions between the structure of micro-organism communities and the environmental samples they come from. We propose a latent block model (LBM) and an associated inference procedure for the biclustering of rows and columns of abundance matrices. The LBM assumes that micro-organisms (rows) and environmental samples (columns) can both be clustered into groups characterizing preferential interaction or avoidance. We use the PoissonGamma distribution to model the overdispersion observed in microbial abundance data and introduce row and column effects to account for the sequencing effort in each sample and the mean abundance of each micro-organism. Because the latent variables are not independent conditionally on the observed ones, classical maximum likelihood inference is intractable. We then derive a variational-based inference algorithm and propose a strategy to select the number of biclusters. We illustrate the flexibility and performance of our approach both on a simulation study and on three ecological datasets. The model-based framework allows us to adapt to peculiarities of microbial ecological abundance data and allows us to explore relationships between entities of two different natures. We implemented our method in the cobiclust R package available on the CRAN and built a website with example of usage (https://julieaubert.github.io/cobiclust/cobiclust-example1.html).},
copyright = {\textcopyright{} 2021 British Ecological Society},
langid = {english}, langid = {english},
keywords = {count data,latent block model,metabarcoding,microbial interactions,model-based biclustering,Poisson\textendash Gamma distribution,variational EM algorithm}, keywords = {count data,latent block model,metabarcoding,microbial interactions,model-based biclustering,PoissonGamma distribution,variational EM algorithm},
file = {/home/polarolouis/Zotero/storage/A4V9MJAF/Aubert et al. - 2021 - Model-based biclustering for overdispersed count d.pdf} file = {/home/polarolouis/Zotero/storage/A4V9MJAF/Aubert et al. - 2021 - Model-based biclustering for overdispersed count d.pdf}
} }
@article{biernackiAssessingMixtureModel2000, @article{biernackiAssessingMixtureModel2000,
title = {Assessing a Mixture Model for Clustering with the Integrated Completed Likelihood}, title = {Assessing a Mixture Model for Clustering with the Integrated Completed Likelihood},
author = {Biernacki, C. and Celeux, G. and Govaert, G.}, author = {Biernacki, C. and Celeux, G. and Govaert, G.},
year = {2000}, date = {2000-07},
month = jul, journaltitle = {IEEE Transactions on Pattern Analysis and Machine Intelligence},
journal = {IEEE Transactions on Pattern Analysis and Machine Intelligence},
volume = {22}, volume = {22},
number = {7}, number = {7},
pages = {719--725}, pages = {719--725},
issn = {1939-3539}, issn = {1939-3539},
doi = {10.1109/34.865189}, doi = {10.1109/34.865189},
abstract = {We propose an assessing method of mixture model in a cluster analysis setting with integrated completed likelihood. For this purpose, the observed data are assigned to unknown clusters using a maximum a posteriori operator. Then, the integrated completed likelihood (ICL) is approximated using the Bayesian information criterion (BIC). Numerical experiments on simulated and real data of the resulting ICL criterion show that it performs well both for choosing a mixture model and a relevant number of clusters. In particular, ICL appears to be more robust than BIC to violation of some of the mixture model assumptions and it can select a number of dusters leading to a sensible partitioning of the data.}, abstract = {We propose an assessing method of mixture model in a cluster analysis setting with integrated completed likelihood. For this purpose, the observed data are assigned to unknown clusters using a maximum a posteriori operator. Then, the integrated completed likelihood (ICL) is approximated using the Bayesian information criterion (BIC). Numerical experiments on simulated and real data of the resulting ICL criterion show that it performs well both for choosing a mixture model and a relevant number of clusters. In particular, ICL appears to be more robust than BIC to violation of some of the mixture model assumptions and it can select a number of dusters leading to a sensible partitioning of the data.},
eventtitle = {{{IEEE Transactions}} on {{Pattern Analysis}} and {{Machine Intelligence}}},
keywords = {Bayesian methods,Context modeling,Gaussian distribution,Numerical simulation,Probability distribution,Robustness}, keywords = {Bayesian methods,Context modeling,Gaussian distribution,Numerical simulation,Probability distribution,Robustness},
file = {/home/polarolouis/Zotero/storage/MK9H446U/Biernacki et al. - 2000 - Assessing a mixture model for clustering with the .pdf} file = {/home/polarolouis/Zotero/storage/MK9H446U/Biernacki et al. - 2000 - Assessing a mixture model for clustering with the .pdf}
} }
@ -52,35 +58,34 @@
@article{celisseConsistencyMaximumlikelihoodVariational2012, @article{celisseConsistencyMaximumlikelihoodVariational2012,
title = {Consistency of Maximum-Likelihood and Variational Estimators in the Stochastic Block Model}, title = {Consistency of Maximum-Likelihood and Variational Estimators in the Stochastic Block Model},
author = {Celisse, Alain and Daudin, Jean-Jacques and Pierre, Laurent}, author = {Celisse, Alain and Daudin, Jean-Jacques and Pierre, Laurent},
year = {2012}, date = {2012-01},
month = jan, journaltitle = {Electronic Journal of Statistics},
journal = {Electronic Journal of Statistics},
volume = {6}, volume = {6},
number = {none},
pages = {1847--1899}, pages = {1847--1899},
publisher = {{Institute of Mathematical Statistics and Bernoulli Society}}, publisher = {{Institute of Mathematical Statistics and Bernoulli Society}},
issn = {1935-7524, 1935-7524}, issn = {1935-7524, 1935-7524},
doi = {10.1214/12-EJS729}, doi = {10.1214/12-EJS729},
url = {https://projecteuclid.org/journals/electronic-journal-of-statistics/volume-6/issue-none/Consistency-of-maximum-likelihood-and-variational-estimators-in-the-stochastic/10.1214/12-EJS729.full},
urldate = {2023-06-06}, urldate = {2023-06-06},
abstract = {The stochastic block model (SBM) is a probabilistic model designed to describe heterogeneous directed and undirected graphs. In this paper, we address the asymptotic inference in SBM by use of maximum-likelihood and variational approaches. The identifiability of SBM is proved while asymptotic properties of maximum-likelihood and variational estimators are derived. In particular, the consistency of these estimators is settled for the probability of an edge between two vertices (and for the group proportions at the price of an additional assumption), which is to the best of our knowledge the first result of this type for variational estimators in random graphs.}, abstract = {The stochastic block model (SBM) is a probabilistic model designed to describe heterogeneous directed and undirected graphs. In this paper, we address the asymptotic inference in SBM by use of maximum-likelihood and variational approaches. The identifiability of SBM is proved while asymptotic properties of maximum-likelihood and variational estimators are derived. In particular, the consistency of these estimators is settled for the probability of an edge between two vertices (and for the group proportions at the price of an additional assumption), which is to the best of our knowledge the first result of this type for variational estimators in random graphs.},
issue = {none},
keywords = {62E17,62G05,62G20,62H30,Concentration inequalities,consistency,maximum likelihood estimators,Random graphs,Stochastic block model,variational estimators}, keywords = {62E17,62G05,62G20,62H30,Concentration inequalities,consistency,maximum likelihood estimators,Random graphs,Stochastic block model,variational estimators},
file = {/home/polarolouis/Zotero/storage/JNWRIYKG/celisse2012.pdf.pdf;/home/polarolouis/Zotero/storage/XG463B5I/Celisse et al. - 2012 - Consistency of maximum-likelihood and variational .pdf} file = {/home/polarolouis/Zotero/storage/JNWRIYKG/celisse2012.pdf.pdf;/home/polarolouis/Zotero/storage/XG463B5I/Celisse et al. - 2012 - Consistency of maximum-likelihood and variational .pdf}
} }
@misc{chabert-liddellLearningCommonStructures2023, @online{chabert-liddellLearningCommonStructures2023,
type = {Article}, type = {article},
title = {Learning Common Structures in a Collection of Networks. {{An}} Application to Food Webs}, title = {Learning Common Structures in a Collection of Networks. {{An}} Application to Food Webs},
author = {{Chabert-Liddell}, Saint-Clair and Barbillon, Pierre and Donnet, Sophie}, author = {Chabert-Liddell, Saint-Clair and Barbillon, Pierre and Donnet, Sophie},
year = {2023}, date = {2023-03-27},
month = mar,
number = {arXiv:2206.00560},
eprint = {2206.00560}, eprint = {2206.00560},
primaryclass = {stat}, eprinttype = {arxiv},
publisher = {{arXiv}}, eprintclass = {stat},
doi = {10.48550/arXiv.2206.00560}, doi = {10.48550/arXiv.2206.00560},
url = {http://arxiv.org/abs/2206.00560},
urldate = {2023-05-22}, urldate = {2023-05-22},
abstract = {Let a collection of networks represent interactions within several (social or ecological) systems. We pursue two objectives: identifying similarities in the topological structures that are held in common between the networks and clustering the collection into sub-collections of structurally homogeneous networks. We tackle these two questions with a probabilistic model based approach. We propose an extension of the Stochastic Block Model (SBM) adapted to the joint modeling of a collection of networks. The networks in the collection are assumed to be independent realizations of SBMs. The common connectivity structure is imposed through the equality of some parameters. The model parameters are estimated with a variational Expectation-Maximization (EM) algorithm. We derive an ad-hoc penalized likelihood criterion to select the number of blocks and to assess the adequacy of the consensus found between the structures of the different networks. This same criterion can also be used to cluster networks on the basis of their connectivity structure. It thus provides a partition of the collection into subsets of structurally homogeneous networks. The relevance of our proposition is assessed on two collections of ecological networks. First, an application to three stream food webs reveals the homogeneity of their structures and the correspondence between groups of species in different ecosystems playing equivalent ecological roles. Moreover, the joint analysis allows a finer analysis of the structure of smaller networks. Second, we cluster 67 food webs according to their connectivity structures and demonstrate that five mesoscale structures are sufficient to describe this collection.}, abstract = {Let a collection of networks represent interactions within several (social or ecological) systems. We pursue two objectives: identifying similarities in the topological structures that are held in common between the networks and clustering the collection into sub-collections of structurally homogeneous networks. We tackle these two questions with a probabilistic model based approach. We propose an extension of the Stochastic Block Model (SBM) adapted to the joint modeling of a collection of networks. The networks in the collection are assumed to be independent realizations of SBMs. The common connectivity structure is imposed through the equality of some parameters. The model parameters are estimated with a variational Expectation-Maximization (EM) algorithm. We derive an ad-hoc penalized likelihood criterion to select the number of blocks and to assess the adequacy of the consensus found between the structures of the different networks. This same criterion can also be used to cluster networks on the basis of their connectivity structure. It thus provides a partition of the collection into subsets of structurally homogeneous networks. The relevance of our proposition is assessed on two collections of ecological networks. First, an application to three stream food webs reveals the homogeneity of their structures and the correspondence between groups of species in different ecosystems playing equivalent ecological roles. Moreover, the joint analysis allows a finer analysis of the structure of smaller networks. Second, we cluster 67 food webs according to their connectivity structures and demonstrate that five mesoscale structures are sufficient to describe this collection.},
archiveprefix = {arxiv}, pubstate = {preprint},
keywords = {Statistics - Applications,Statistics - Methodology}, keywords = {Statistics - Applications,Statistics - Methodology},
file = {/home/polarolouis/Zotero/storage/M74TXGCF/Chabert-Liddell et al. - 2023 - Learning common structures in a collection of netw.pdf;/home/polarolouis/Zotero/storage/A35M8KNP/2206.html} file = {/home/polarolouis/Zotero/storage/M74TXGCF/Chabert-Liddell et al. - 2023 - Learning common structures in a collection of netw.pdf;/home/polarolouis/Zotero/storage/A35M8KNP/2206.html}
} }
@ -88,16 +93,17 @@
@article{daudinMixtureModelRandom2008, @article{daudinMixtureModelRandom2008,
title = {A Mixture Model for Random Graphs}, title = {A Mixture Model for Random Graphs},
author = {Daudin, J.-J. and Picard, F. and Robin, S.}, author = {Daudin, J.-J. and Picard, F. and Robin, S.},
year = {2008}, date = {2008-06-01},
month = jun, journaltitle = {Statistics and Computing},
journal = {Statistics and Computing}, shortjournal = {Stat Comput},
volume = {18}, volume = {18},
number = {2}, number = {2},
pages = {173--183}, pages = {173--183},
issn = {1573-1375}, issn = {1573-1375},
doi = {10.1007/s11222-007-9046-7}, doi = {10.1007/s11222-007-9046-7},
url = {https://doi.org/10.1007/s11222-007-9046-7},
urldate = {2023-06-16}, urldate = {2023-06-16},
abstract = {The Erd\"os\textendash R\'enyi model of a network is simple and possesses many explicit expressions for average and asymptotic properties, but it does not fit well to real-world networks. The vertices of those networks are often structured in unknown classes (functionally related proteins or social communities) with different connectivity properties. The stochastic block structures model was proposed for this purpose in the context of social sciences, using a Bayesian approach. We consider the same model in a frequentest statistical framework. We give the degree distribution and the clustering coefficient associated with this model, a variational method to estimate its parameters and a model selection criterion to select the number of classes. This estimation procedure allows us to deal with large networks containing thousands of vertices. The method is used to uncover the modular structure of a network of enzymatic reactions.}, abstract = {The Erdösnyi model of a network is simple and possesses many explicit expressions for average and asymptotic properties, but it does not fit well to real-world networks. The vertices of those networks are often structured in unknown classes (functionally related proteins or social communities) with different connectivity properties. The stochastic block structures model was proposed for this purpose in the context of social sciences, using a Bayesian approach. We consider the same model in a frequentest statistical framework. We give the degree distribution and the clustering coefficient associated with this model, a variational method to estimate its parameters and a model selection criterion to select the number of classes. This estimation procedure allows us to deal with large networks containing thousands of vertices. The method is used to uncover the modular structure of a network of enzymatic reactions.},
langid = {english}, langid = {english},
keywords = {Mixture models,Random graphs,Variational~method}, keywords = {Mixture models,Random graphs,Variational~method},
file = {/home/polarolouis/Zotero/storage/439HK27B/Daudin et al. - 2008 - A mixture model for random graphs.pdf;/home/polarolouis/Zotero/storage/HVVF5MNY/daudin2007.pdf.pdf} file = {/home/polarolouis/Zotero/storage/439HK27B/Daudin et al. - 2008 - A mixture model for random graphs.pdf;/home/polarolouis/Zotero/storage/HVVF5MNY/daudin2007.pdf.pdf}
@ -105,34 +111,35 @@
@article{desjardins-proulxEcologicalInteractionsNetflix2017, @article{desjardins-proulxEcologicalInteractionsNetflix2017,
title = {Ecological Interactions and the {{Netflix}} Problem}, title = {Ecological Interactions and the {{Netflix}} Problem},
author = {{Desjardins-Proulx}, Philippe and Laigle, Idaline and Poisot, Timoth{\'e}e and Gravel, Dominique}, author = {Desjardins-Proulx, Philippe and Laigle, Idaline and Poisot, Timothée and Gravel, Dominique},
year = {2017}, date = {2017-08-10},
month = aug, journaltitle = {PeerJ},
journal = {PeerJ}, shortjournal = {PeerJ},
volume = {5}, volume = {5},
pages = {e3644}, pages = {e3644},
publisher = {{PeerJ Inc.}}, publisher = {{PeerJ Inc.}},
issn = {2167-8359}, issn = {2167-8359},
doi = {10.7717/peerj.3644}, doi = {10.7717/peerj.3644},
url = {https://peerj.com/articles/3644},
urldate = {2023-06-15}, urldate = {2023-06-15},
abstract = {Species interactions are a key component of ecosystems but we generally have an incomplete picture of who-eats-who in a given community. Different techniques have been devised to predict species interactions using theoretical models or abundances. Here, we explore the K nearest neighbour approach, with a special emphasis on recommendation, along with a supervised machine learning technique. Recommenders are algorithms developed for companies like Netflix to predict whether a customer will like a product given the preferences of similar customers. These machine learning techniques are well-suited to study binary ecological interactions since they focus on positive-only data. By removing a prey from a predator, we find that recommenders can guess the missing prey around 50\% of the times on the first try, with up to 881 possibilities. Traits do not improve significantly the results for the K nearest neighbour, although a simple test with a supervised learning approach (random forests) show we can predict interactions with high accuracy using only three traits per species. This result shows that binary interactions can be predicted without regard to the ecological community given only three variables: body mass and two variables for the species' phylogeny. These techniques are complementary, as recommenders can predict interactions in the absence of traits, using only information about other species' interactions, while supervised learning algorithms such as random forests base their predictions on traits only but do not exploit other species' interactions. Further work should focus on developing custom similarity measures specialized for ecology to improve the KNN algorithms and using richer data to capture indirect relationships between species.}, abstract = {Species interactions are a key component of ecosystems but we generally have an incomplete picture of who-eats-who in a given community. Different techniques have been devised to predict species interactions using theoretical models or abundances. Here, we explore the K nearest neighbour approach, with a special emphasis on recommendation, along with a supervised machine learning technique. Recommenders are algorithms developed for companies like Netflix to predict whether a customer will like a product given the preferences of similar customers. These machine learning techniques are well-suited to study binary ecological interactions since they focus on positive-only data. By removing a prey from a predator, we find that recommenders can guess the missing prey around 50\% of the times on the first try, with up to 881 possibilities. Traits do not improve significantly the results for the K nearest neighbour, although a simple test with a supervised learning approach (random forests) show we can predict interactions with high accuracy using only three traits per species. This result shows that binary interactions can be predicted without regard to the ecological community given only three variables: body mass and two variables for the species phylogeny. These techniques are complementary, as recommenders can predict interactions in the absence of traits, using only information about other species interactions, while supervised learning algorithms such as random forests base their predictions on traits only but do not exploit other species interactions. Further work should focus on developing custom similarity measures specialized for ecology to improve the KNN algorithms and using richer data to capture indirect relationships between species.},
langid = {english}, langid = {english},
file = {/home/polarolouis/Zotero/storage/3L7JALP4/Desjardins-Proulx et al. - 2017 - Ecological interactions and the Netflix problem.pdf} file = {/home/polarolouis/Zotero/storage/3L7JALP4/Desjardins-Proulx et al. - 2017 - Ecological interactions and the Netflix problem.pdf}
} }
@article{doreRelativeEffectsAnthropogenic2021, @article{doreRelativeEffectsAnthropogenic2021,
title = {Relative Effects of Anthropogenic Pressures, Climate, and Sampling Design on the Structure of Pollination Networks at the Global Scale}, title = {Relative Effects of Anthropogenic Pressures, Climate, and Sampling Design on the Structure of Pollination Networks at the Global Scale},
author = {Dor{\'e}, Ma{\"e}l and Fontaine, Colin and Th{\'e}bault, Elisa}, author = {Doré, Maël and Fontaine, Colin and Thébault, Elisa},
year = {2021}, date = {2021},
journal = {Global Change Biology}, journaltitle = {Global Change Biology},
volume = {27}, volume = {27},
number = {6}, number = {6},
pages = {1266--1280}, pages = {1266--1280},
issn = {1365-2486}, issn = {1365-2486},
doi = {10.1111/gcb.15474}, doi = {10.1111/gcb.15474},
url = {https://onlinelibrary.wiley.com/doi/abs/10.1111/gcb.15474},
urldate = {2023-06-21}, urldate = {2023-06-21},
abstract = {Pollinators provide crucial ecosystem services that underpin to wild plant reproduction and yields of insect-pollinated crops. Understanding the relative impacts of anthropogenic pressures and climate on the structure of plant\textendash pollinator interaction networks is vital considering ongoing global change and pollinator decline. Our ability to predict the consequences of global change for pollinator assemblages worldwide requires global syntheses, but these analytical approaches may be hindered by variable methods among studies that either invalidate comparisons or mask biological phenomena. Here we conducted a synthetic analysis that assesses the relative impact of anthropogenic pressures and climatic variability, and accounts for heterogeneity in sampling methodology to reveal network responses at the global scale. We analyzed an extensive dataset, comprising 295 networks over 123 locations all over the world, and reporting over 50,000 interactions between flowering plant species and their insect visitors. Our study revealed that anthropogenic pressures correlate with an increase in generalism in pollination networks while pollinator richness and taxonomic composition are more related to climatic variables with an increase in dipteran pollinator richness associated with cooler temperatures. The contrasting response of species richness and generalism of the plant\textendash pollinator networks stresses the importance of considering interaction network structure alongside diversity in ecological monitoring. In addition, differences in sampling design explained more variation than anthropogenic pressures or climate on both pollination networks richness and generalism, highlighting the crucial need to report and incorporate sampling design in macroecological comparative studies of pollination networks. As a whole, our study reveals a potential human impact on pollination networks at a global scale. However, further research is needed to evaluate potential consequences of loss of specialist species and their unique ecological interactions and evolutionary pathways on the ecosystem pollination function at a global scale.}, abstract = {Pollinators provide crucial ecosystem services that underpin to wild plant reproduction and yields of insect-pollinated crops. Understanding the relative impacts of anthropogenic pressures and climate on the structure of plantpollinator interaction networks is vital considering ongoing global change and pollinator decline. Our ability to predict the consequences of global change for pollinator assemblages worldwide requires global syntheses, but these analytical approaches may be hindered by variable methods among studies that either invalidate comparisons or mask biological phenomena. Here we conducted a synthetic analysis that assesses the relative impact of anthropogenic pressures and climatic variability, and accounts for heterogeneity in sampling methodology to reveal network responses at the global scale. We analyzed an extensive dataset, comprising 295 networks over 123 locations all over the world, and reporting over 50,000 interactions between flowering plant species and their insect visitors. Our study revealed that anthropogenic pressures correlate with an increase in generalism in pollination networks while pollinator richness and taxonomic composition are more related to climatic variables with an increase in dipteran pollinator richness associated with cooler temperatures. The contrasting response of species richness and generalism of the plantpollinator networks stresses the importance of considering interaction network structure alongside diversity in ecological monitoring. In addition, differences in sampling design explained more variation than anthropogenic pressures or climate on both pollination networks richness and generalism, highlighting the crucial need to report and incorporate sampling design in macroecological comparative studies of pollination networks. As a whole, our study reveals a potential human impact on pollination networks at a global scale. However, further research is needed to evaluate potential consequences of loss of specialist species and their unique ecological interactions and evolutionary pathways on the ecosystem pollination function at a global scale.},
copyright = {\textcopyright{} 2020 John Wiley \& Sons Ltd},
langid = {english}, langid = {english},
keywords = {anthropogenic pressures,climate,connectance,data,generalism,human impacts,plant-pollinator,pollination networks,richness,sampling effects,specialization}, keywords = {anthropogenic pressures,climate,connectance,data,generalism,human impacts,plant-pollinator,pollination networks,richness,sampling effects,specialization},
file = {/home/polarolouis/Zotero/storage/89ZXBJQP/10.1111@gcb.15474.pdf.pdf;/home/polarolouis/Zotero/storage/IVR6RGG7/Doré et al. - 2021 - Relative effects of anthropogenic pressures, clima.pdf;/home/polarolouis/Zotero/storage/WSJ4DV98/gcb.html} file = {/home/polarolouis/Zotero/storage/89ZXBJQP/10.1111@gcb.15474.pdf.pdf;/home/polarolouis/Zotero/storage/IVR6RGG7/Doré et al. - 2021 - Relative effects of anthropogenic pressures, clima.pdf;/home/polarolouis/Zotero/storage/WSJ4DV98/gcb.html}
@ -141,31 +148,31 @@
@article{govaertEMAlgorithmBlock2005, @article{govaertEMAlgorithmBlock2005,
title = {An {{EM}} Algorithm for the Block Mixture Model}, title = {An {{EM}} Algorithm for the Block Mixture Model},
author = {Govaert, G. and Nadif, M.}, author = {Govaert, G. and Nadif, M.},
year = {2005}, date = {2005-04},
month = apr, journaltitle = {IEEE Transactions on Pattern Analysis and Machine Intelligence},
journal = {IEEE Transactions on Pattern Analysis and Machine Intelligence},
volume = {27}, volume = {27},
number = {4}, number = {4},
pages = {643--647}, pages = {643--647},
issn = {1939-3539}, issn = {1939-3539},
doi = {10.1109/TPAMI.2005.69}, doi = {10.1109/TPAMI.2005.69},
abstract = {Although many clustering procedures aim to construct an optimal partition of objects or, sometimes, of variables, there are other methods, called block clustering methods, which consider simultaneously the two sets and organize the data into homogeneous blocks. Recently, we have proposed a new mixture model called block mixture model which takes into account this situation. This model allows one to embed simultaneous clustering of objects and variables in a mixture approach. We have studied this probabilistic model under the classification likelihood approach and developed a new algorithm for simultaneous partitioning based on the classification EM algorithm. In this paper, we consider the block clustering problem under the maximum likelihood approach and the goal of our contribution is to estimate the parameters of this model. Unfortunately, the application of the EM algorithm for the block mixture model cannot be made directly; difficulties arise due to the dependence structure in the model and approximations are required. Using a variational approximation, we propose a generalized EM algorithm to estimate the parameters of the block mixture model and, to illustrate our approach, we study the case of binary data by using a Bernoulli block mixture.}, abstract = {Although many clustering procedures aim to construct an optimal partition of objects or, sometimes, of variables, there are other methods, called block clustering methods, which consider simultaneously the two sets and organize the data into homogeneous blocks. Recently, we have proposed a new mixture model called block mixture model which takes into account this situation. This model allows one to embed simultaneous clustering of objects and variables in a mixture approach. We have studied this probabilistic model under the classification likelihood approach and developed a new algorithm for simultaneous partitioning based on the classification EM algorithm. In this paper, we consider the block clustering problem under the maximum likelihood approach and the goal of our contribution is to estimate the parameters of this model. Unfortunately, the application of the EM algorithm for the block mixture model cannot be made directly; difficulties arise due to the dependence structure in the model and approximations are required. Using a variational approximation, we propose a generalized EM algorithm to estimate the parameters of the block mixture model and, to illustrate our approach, we study the case of binary data by using a Bernoulli block mixture.},
eventtitle = {{{IEEE Transactions}} on {{Pattern Analysis}} and {{Machine Intelligence}}},
keywords = {Approximation algorithms,Classification algorithms,Clustering algorithms,Clustering methods,Data mining,EM algorithm,Index Terms- Block mixture model,Maximum likelihood estimation,Parameter estimation,Partitioning algorithms,Self organizing feature maps,Sparse matrices,variational approximation.}, keywords = {Approximation algorithms,Classification algorithms,Clustering algorithms,Clustering methods,Data mining,EM algorithm,Index Terms- Block mixture model,Maximum likelihood estimation,Parameter estimation,Partitioning algorithms,Self organizing feature maps,Sparse matrices,variational approximation.},
file = {/home/polarolouis/Zotero/storage/6IG45HH2/govaert2005.pdf.pdf;/home/polarolouis/Zotero/storage/TL8M3XRF/Govaert et Nadif - 2005 - An EM algorithm for the block mixture model.pdf;/home/polarolouis/Zotero/storage/2Y48IB26/1401917.html} file = {/home/polarolouis/Zotero/storage/6IG45HH2/govaert2005.pdf.pdf;/home/polarolouis/Zotero/storage/TL8M3XRF/Govaert et Nadif - 2005 - An EM algorithm for the block mixture model.pdf;/home/polarolouis/Zotero/storage/2Y48IB26/1401917.html}
} }
@article{govaertLatentBlockModel2010, @article{govaertLatentBlockModel2010,
title = {Latent {{Block Model}} for {{Contingency Table}}}, title = {Latent {{Block Model}} for {{Contingency Table}}},
author = {Govaert, G{\'e}rard and Nadif, Mohamed}, author = {Govaert, Gérard and Nadif, Mohamed},
year = {2010}, date = {2010-01-13},
month = jan, journaltitle = {Communications in Statistics - Theory and Methods},
journal = {Communications in Statistics - Theory and Methods},
volume = {39}, volume = {39},
number = {3}, number = {3},
pages = {416--425}, pages = {416--425},
publisher = {{Taylor \& Francis}}, publisher = {{Taylor \& Francis}},
issn = {0361-0926}, issn = {0361-0926},
doi = {10.1080/03610920903140197}, doi = {10.1080/03610920903140197},
url = {https://doi.org/10.1080/03610920903140197},
urldate = {2023-06-15}, urldate = {2023-06-15},
abstract = {Although many clustering procedures aim to construct an optimal partition of objects or, sometimes, variables, there are other methods, called block clustering methods, which simultaneously consider the two sets and organize the data into homogeneous blocks. This kind of method has practical importance in a wide variety of applications such as text and market basket data analysis. Typically, the data that arise in these applications are arranged as a two-way contingency table. Using Poisson distributions, a latent block model for these data is proposed and, setting it under the maximum likelihood approach and the classification maximum likelihood approach, various algorithms are provided. Their performances are evaluated and compared to a simple use of EM or CEM applied separately on the rows and columns of the contingency table.}, abstract = {Although many clustering procedures aim to construct an optimal partition of objects or, sometimes, variables, there are other methods, called block clustering methods, which simultaneously consider the two sets and organize the data into homogeneous blocks. This kind of method has practical importance in a wide variety of applications such as text and market basket data analysis. Typically, the data that arise in these applications are arranged as a two-way contingency table. Using Poisson distributions, a latent block model for these data is proposed and, setting it under the maximum likelihood approach and the classification maximum likelihood approach, various algorithms are provided. Their performances are evaluated and compared to a simple use of EM or CEM applied separately on the rows and columns of the contingency table.},
keywords = {62H17,62H30,Block clustering,Block Poisson mixture model,CEM algorithm,Contingency table,EM algorithm}, keywords = {62H17,62H30,Block clustering,Block Poisson mixture model,CEM algorithm,Contingency table,EM algorithm},
@ -176,14 +183,15 @@
title = {Stochastic Blockmodels: {{First}} Steps}, title = {Stochastic Blockmodels: {{First}} Steps},
shorttitle = {Stochastic Blockmodels}, shorttitle = {Stochastic Blockmodels},
author = {Holland, Paul W. and Laskey, Kathryn Blackmond and Leinhardt, Samuel}, author = {Holland, Paul W. and Laskey, Kathryn Blackmond and Leinhardt, Samuel},
year = {1983}, date = {1983-06-01},
month = jun, journaltitle = {Social Networks},
journal = {Social Networks}, shortjournal = {Social Networks},
volume = {5}, volume = {5},
number = {2}, number = {2},
pages = {109--137}, pages = {109--137},
issn = {0378-8733}, issn = {0378-8733},
doi = {10.1016/0378-8733(83)90021-7}, doi = {10.1016/0378-8733(83)90021-7},
url = {https://www.sciencedirect.com/science/article/pii/0378873383900217},
urldate = {2023-06-15}, urldate = {2023-06-15},
abstract = {A stochastic model is proposed for social networks in which the actors in a network are partitioned into subgroups called blocks. The model provides a stochastic generalization of the blockmodel. Estimation techniques are developed for the special case of a single relation social network, with blocks specified a priori. An extension of the model allows for tendencies toward reciprocation of ties beyond those explained by the partition. The extended model provides a one degree-of-freedom test of the model. A numerical example from the social network literature is used to illustrate the methods.}, abstract = {A stochastic model is proposed for social networks in which the actors in a network are partitioned into subgroups called blocks. The model provides a stochastic generalization of the blockmodel. Estimation techniques are developed for the special case of a single relation social network, with blocks specified a priori. An extension of the model allows for tendencies toward reciprocation of ties beyond those explained by the partition. The extended model provides a one degree-of-freedom test of the model. A numerical example from the social network literature is used to illustrate the methods.},
langid = {english}, langid = {english},
@ -192,46 +200,45 @@
@article{kaszewska-gilasGlobalStudiesHostParasite2021, @article{kaszewska-gilasGlobalStudiesHostParasite2021,
title = {Global {{Studies}} of the {{Host-Parasite Relationships}} between {{Ectoparasitic Mites}} of the {{Family Syringophilidae}} and {{Birds}} of the {{Order Columbiformes}}}, title = {Global {{Studies}} of the {{Host-Parasite Relationships}} between {{Ectoparasitic Mites}} of the {{Family Syringophilidae}} and {{Birds}} of the {{Order Columbiformes}}},
author = {{Kaszewska-Gilas}, Katarzyna and Kosicki, Jakub Ziemowit and Hromada, Martin and Skoracki, Maciej}, author = {Kaszewska-Gilas, Katarzyna and Kosicki, Jakub Ziemowit and Hromada, Martin and Skoracki, Maciej},
year = {2021}, date = {2021-12},
month = dec, journaltitle = {Animals},
journal = {Animals},
volume = {11}, volume = {11},
number = {12}, number = {12},
pages = {3392}, pages = {3392},
publisher = {{Multidisciplinary Digital Publishing Institute}}, publisher = {{Multidisciplinary Digital Publishing Institute}},
issn = {2076-2615}, issn = {2076-2615},
doi = {10.3390/ani11123392}, doi = {10.3390/ani11123392},
url = {https://www.mdpi.com/2076-2615/11/12/3392},
urldate = {2023-06-15}, urldate = {2023-06-15},
abstract = {The quill mites belonging to the family Syringophilidae (Acari: Prostigmata: Cheyletoidea) are obligate ectoparasites of birds. They inhabit different types of the quills, where they spend their whole life cycle. In this paper, we conducted a global study of syringophilid mites associated with columbiform birds. We examined 772 pigeon and dove individuals belonging to 112 species (35\% world fauna) from all zoogeographical regions (except Madagascan) where Columbiformes occur. We measured the prevalence (IP) and the confidence interval (CI) for all infested host species. IP ranges between 4.2 and 66.7 (CI 0.2\textendash 100). We applied a bipartite analysis to determine host\textendash parasite interaction, network indices, and host specificity on species and whole network levels. The Syringophilidae\textendash Columbiformes network was composed of 25 mite species and 65 host species. The bipartite network was characterized by a high network level specialization H2{${'}$} = 0.93, high nestedness N = 0.908, connectance C = 0.90, and high modularity Q = 0.83, with 20 modules. Moreover, we reconstructed the phylogeny of the quill mites associated with columbiform birds on the generic level. Analysis shows two distinct clades: Meitingsunes + Psittaciphilus, and Peristerophila + Terratosyringophilus.}, abstract = {The quill mites belonging to the family Syringophilidae (Acari: Prostigmata: Cheyletoidea) are obligate ectoparasites of birds. They inhabit different types of the quills, where they spend their whole life cycle. In this paper, we conducted a global study of syringophilid mites associated with columbiform birds. We examined 772 pigeon and dove individuals belonging to 112 species (35\% world fauna) from all zoogeographical regions (except Madagascan) where Columbiformes occur. We measured the prevalence (IP) and the confidence interval (CI) for all infested host species. IP ranges between 4.2 and 66.7 (CI 0.2100). We applied a bipartite analysis to determine hostparasite interaction, network indices, and host specificity on species and whole network levels. The SyringophilidaeColumbiformes network was composed of 25 mite species and 65 host species. The bipartite network was characterized by a high network level specialization H2 = 0.93, high nestedness N = 0.908, connectance C = 0.90, and high modularity Q = 0.83, with 20 modules. Moreover, we reconstructed the phylogeny of the quill mites associated with columbiform birds on the generic level. Analysis shows two distinct clades: Meitingsunes + Psittaciphilus, and Peristerophila + Terratosyringophilus.},
copyright = {http://creativecommons.org/licenses/by/3.0/}, issue = {12},
langid = {english}, langid = {english},
keywords = {Acari,biodiversity,bipartite-example,network,pigeons and doves,quill mites}, keywords = {Acari,biodiversity,bipartite-example,network,pigeons and doves,quill mites},
file = {/home/polarolouis/Zotero/storage/VXVQ5CPH/Kaszewska-Gilas et al. - 2021 - Global Studies of the Host-Parasite Relationships .pdf} file = {/home/polarolouis/Zotero/storage/VXVQ5CPH/Kaszewska-Gilas et al. - 2021 - Global Studies of the Host-Parasite Relationships .pdf}
} }
@misc{larousseDefinitionsBipartiBipartite, @online{larousseDefinitionsBipartiBipartite,
title = {{D\'efinitions : biparti, bipartite - Dictionnaire de fran\c{c}ais Larousse}}, title = {Définitions : biparti, bipartite - Dictionnaire de français Larousse},
shorttitle = {{D\'efinitions}}, shorttitle = {Définitions},
author = {Larousse, {\'E}ditions}, author = {Larousse, Éditions},
url = {https://www.larousse.fr/dictionnaires/francais/biparti/9503},
urldate = {2023-06-17}, urldate = {2023-06-17},
abstract = {biparti, bipartite - D\'efinitions Fran\c{c}ais : Retrouvez la d\'efinition de biparti, bipartite, ainsi que les difficult\'es... - synonymes, homonymes, difficult\'es, citations.}, abstract = {biparti, bipartite - Définitions Français : Retrouvez la définition de biparti, bipartite, ainsi que les difficultés... - synonymes, homonymes, difficultés, citations.},
howpublished = {https://www.larousse.fr/dictionnaires/francais/biparti/9503},
langid = {french}, langid = {french},
file = {/home/polarolouis/Zotero/storage/MA2VH6NX/9503.html} file = {/home/polarolouis/Zotero/storage/MA2VH6NX/9503.html}
} }
@article{maeldoreMaelDorePollinationNetworks2020, @article{maeldoreMaelDorePollinationNetworks2020,
title = {{{MaelDore}}/{{Pollination}}\_networks: {{R}} Scripts for {{Dor\'e}} et al., 2020 - {{Relative}} Effects of Anthropogenic Pressures, Climate, and Sampling Design on the Structure of Pollination Networks at the Global Scale}, title = {{{MaelDore}}/{{Pollination}}\_networks: {{R}} Scripts for {{Doré}} et al., 2020 - {{Relative}} Effects of Anthropogenic Pressures, Climate, and Sampling Design on the Structure of Pollination Networks at the Global Scale},
shorttitle = {{{MaelDore}}/{{Pollination}}\_networks}, shorttitle = {{{MaelDore}}/{{Pollination}}\_networks},
author = {MaelDore}, author = {MaelDore},
year = {2020}, date = {2020-11-25},
month = nov,
publisher = {{Zenodo}}, publisher = {{Zenodo}},
doi = {10.5281/ZENODO.4290503}, doi = {10.5281/ZENODO.4290503},
url = {https://zenodo.org/record/4290503},
urldate = {2023-06-21}, urldate = {2023-06-21},
abstract = {R scripts for Dor\'e et al., 2020 - Relative effects of anthropogenic pressures, climate, and sampling design on the structure of pollination networks at the global scale}, abstract = {R scripts for Doré et al., 2020 - Relative effects of anthropogenic pressures, climate, and sampling design on the structure of pollination networks at the global scale},
copyright = {Open Access},
keywords = {data,plant-pollinator} keywords = {data,plant-pollinator}
} }
@ -239,32 +246,34 @@
title = {Bipartite Graphs in Systems Biology and Medicine: A Survey of Methods and Applications}, title = {Bipartite Graphs in Systems Biology and Medicine: A Survey of Methods and Applications},
shorttitle = {Bipartite Graphs in Systems Biology and Medicine}, shorttitle = {Bipartite Graphs in Systems Biology and Medicine},
author = {Pavlopoulos, Georgios A and Kontou, Panagiota I and Pavlopoulou, Athanasia and Bouyioukos, Costas and Markou, Evripides and Bagos, Pantelis G}, author = {Pavlopoulos, Georgios A and Kontou, Panagiota I and Pavlopoulou, Athanasia and Bouyioukos, Costas and Markou, Evripides and Bagos, Pantelis G},
year = {2018}, date = {2018-04-01},
month = apr, journaltitle = {GigaScience},
journal = {GigaScience}, shortjournal = {GigaScience},
volume = {7}, volume = {7},
number = {4}, number = {4},
pages = {giy014}, pages = {giy014},
issn = {2047-217X}, issn = {2047-217X},
doi = {10.1093/gigascience/giy014}, doi = {10.1093/gigascience/giy014},
url = {https://doi.org/10.1093/gigascience/giy014},
urldate = {2023-06-15}, urldate = {2023-06-15},
abstract = {The latest advances in high-throughput techniques during the past decade allowed the systems biology field to expand significantly. Today, the focus of biologists has shifted from the study of individual biological components to the study of complex biological systems and their dynamics at a larger scale. Through the discovery of novel bioentity relationships, researchers reveal new information about biological functions and processes. Graphs are widely used to represent bioentities such as proteins, genes, small molecules, ligands, and others such as nodes and their connections as edges within a network. In this review, special focus is given to the usability of bipartite graphs and their impact on the field of network biology and medicine. Furthermore, their topological properties and how these can be applied to certain biological case studies are discussed. Finally, available methodologies and software are presented, and useful insights on how bipartite graphs can shape the path toward the solution of challenging biological problems are provided.}, abstract = {The latest advances in high-throughput techniques during the past decade allowed the systems biology field to expand significantly. Today, the focus of biologists has shifted from the study of individual biological components to the study of complex biological systems and their dynamics at a larger scale. Through the discovery of novel bioentity relationships, researchers reveal new information about biological functions and processes. Graphs are widely used to represent bioentities such as proteins, genes, small molecules, ligands, and others such as nodes and their connections as edges within a network. In this review, special focus is given to the usability of bipartite graphs and their impact on the field of network biology and medicine. Furthermore, their topological properties and how these can be applied to certain biological case studies are discussed. Finally, available methodologies and software are presented, and useful insights on how bipartite graphs can shape the path toward the solution of challenging biological problems are provided.},
file = {/home/polarolouis/Zotero/storage/2KJFL3SB/Pavlopoulos et al. - 2018 - Bipartite graphs in systems biology and medicine .pdf;/home/polarolouis/Zotero/storage/A2Y2EGPA/pavlopoulos2018.pdf.pdf;/home/polarolouis/Zotero/storage/UK2MK5FW/pavlopoulos2018.pdf.pdf;/home/polarolouis/Zotero/storage/XP7G4PZF/4875933.html} file = {/home/polarolouis/Zotero/storage/2KJFL3SB/Pavlopoulos et al. - 2018 - Bipartite graphs in systems biology and medicine .pdf;/home/polarolouis/Zotero/storage/A2Y2EGPA/pavlopoulos2018.pdf.pdf;/home/polarolouis/Zotero/storage/UK2MK5FW/pavlopoulos2018.pdf.pdf;/home/polarolouis/Zotero/storage/XP7G4PZF/4875933.html}
} }
@article{ramos-jilibertoTopologicalChangeAndean2010, @article{ramos-jilibertoTopologicalChangeAndean2010,
title = {Topological Change of {{Andean}} Plant\textendash Pollinator Networks along an Altitudinal Gradient}, title = {Topological Change of {{Andean}} PlantPollinator Networks along an Altitudinal Gradient},
author = {{Ramos-Jiliberto}, Rodrigo and Dom{\'i}nguez, Daniela and Espinoza, Claudia and L{\'o}pez, Gioconda and Valdovinos, Fernanda S. and Bustamante, Ramiro O. and Medel, Rodrigo}, author = {Ramos-Jiliberto, Rodrigo and Domínguez, Daniela and Espinoza, Claudia and López, Gioconda and Valdovinos, Fernanda S. and Bustamante, Ramiro O. and Medel, Rodrigo},
year = {2010}, date = {2010-03-01},
month = mar, journaltitle = {Ecological Complexity},
journal = {Ecological Complexity}, shortjournal = {Ecological Complexity},
volume = {7}, volume = {7},
number = {1}, number = {1},
pages = {86--90}, pages = {86--90},
issn = {1476-945X}, issn = {1476-945X},
doi = {10.1016/j.ecocom.2009.06.001}, doi = {10.1016/j.ecocom.2009.06.001},
url = {https://www.sciencedirect.com/science/article/pii/S1476945X09000622},
urldate = {2023-06-15}, urldate = {2023-06-15},
abstract = {Pollination interaction networks exhibit structural regularities across a wide range of natural environments. Long-tailed degree distribution, nestedness, and modularity are the most prevalent topological patterns found in most bipartite networks analyzed up to day. In this work we evaluate the variation of these topological properties along an altitudinal gradient. To this end, we examined four plant\textendash pollinator networks from the Chilean Andes at 33\textdegree S, in range from 1800 to 3600m elevation. Our results indicate that network topology is strongly and systematically affected by elevation. At increasing altitude, the number of potential visitors per plant decreased, and species' degree distributions are closer to random expectations. On the other hand, the nested structure of mutualistic interactions systematically decreased with elevation, and network modularity was significantly higher than random expectations over the entire altitudinal range. In addition, at increasing elevations the pollination networks were organized in fewer and more strongly connected modules. Our results suggest that the severe abiotic conditions found at increased elevations translate into less organized pollination networks.}, abstract = {Pollination interaction networks exhibit structural regularities across a wide range of natural environments. Long-tailed degree distribution, nestedness, and modularity are the most prevalent topological patterns found in most bipartite networks analyzed up to day. In this work we evaluate the variation of these topological properties along an altitudinal gradient. To this end, we examined four plantpollinator networks from the Chilean Andes at 33°S, in range from 1800 to 3600m elevation. Our results indicate that network topology is strongly and systematically affected by elevation. At increasing altitude, the number of potential visitors per plant decreased, and species degree distributions are closer to random expectations. On the other hand, the nested structure of mutualistic interactions systematically decreased with elevation, and network modularity was significantly higher than random expectations over the entire altitudinal range. In addition, at increasing elevations the pollination networks were organized in fewer and more strongly connected modules. Our results suggest that the severe abiotic conditions found at increased elevations translate into less organized pollination networks.},
langid = {english}, langid = {english},
keywords = {bipartite-example,Chile,Complexity,Degree distribution,Modularity,Mutualistic networks,Nestedness,Power law}, keywords = {bipartite-example,Chile,Complexity,Degree distribution,Modularity,Mutualistic networks,Nestedness,Power law},
file = {/home/polarolouis/Zotero/storage/ATY3ZP2X/Ramos-Jiliberto et al. - 2010 - Topological change of Andean plantpollinator netw.pdf;/home/polarolouis/Zotero/storage/HPBGUP65/ramos-jiliberto2010.pdf.pdf;/home/polarolouis/Zotero/storage/I33MZQQ7/ramos-jiliberto2010.pdf.pdf;/home/polarolouis/Zotero/storage/YJX8XBNW/S1476945X09000622.html} file = {/home/polarolouis/Zotero/storage/ATY3ZP2X/Ramos-Jiliberto et al. - 2010 - Topological change of Andean plantpollinator netw.pdf;/home/polarolouis/Zotero/storage/HPBGUP65/ramos-jiliberto2010.pdf.pdf;/home/polarolouis/Zotero/storage/I33MZQQ7/ramos-jiliberto2010.pdf.pdf;/home/polarolouis/Zotero/storage/YJX8XBNW/S1476945X09000622.html}
@ -273,14 +282,15 @@
@article{snijdersEstimationPredictionStochastic1997, @article{snijdersEstimationPredictionStochastic1997,
title = {Estimation and {{Prediction}} for {{Stochastic Blockmodels}} for {{Graphs}} with {{Latent Block Structure}}}, title = {Estimation and {{Prediction}} for {{Stochastic Blockmodels}} for {{Graphs}} with {{Latent Block Structure}}},
author = {Snijders, Tom A.B. and Nowicki, Krzysztof}, author = {Snijders, Tom A.B. and Nowicki, Krzysztof},
year = {1997}, date = {1997-01-01},
month = jan, journaltitle = {Journal of Classification},
journal = {Journal of Classification}, shortjournal = {J. of Classification},
volume = {14}, volume = {14},
number = {1}, number = {1},
pages = {75--100}, pages = {75--100},
issn = {1432-1343}, issn = {1432-1343},
doi = {10.1007/s003579900004}, doi = {10.1007/s003579900004},
url = {https://doi.org/10.1007/s003579900004},
urldate = {2023-06-15}, urldate = {2023-06-15},
abstract = {blockmodeling for graphs is proposed. The model assumes that the vertices of the graph are partitioned into two unknown blocks and that the probability of an edge between two vertices depends only on the blocks to which they belong. Statistical procedures are derived for estimating the probabilities of edges and for predicting the block structure from observations of the edge pattern only. ML estimators can be computed using the EM algorithm, but this strategy is practical only for small graphs. A Bayesian estimator, based on the Gibbs sampling, is proposed. This estimator is practical also for large graphs. When ML estimators are used, the block structure can be predicted based on predictive likelihood. When Gibbs sampling is used, the block structure can be predicted from posterior predictive probabilities. A side result is that when the number of vertices tends to infinity while the probabilities remain constant, the block structure can be recovered correctly with probability tending to 1.}, abstract = {blockmodeling for graphs is proposed. The model assumes that the vertices of the graph are partitioned into two unknown blocks and that the probability of an edge between two vertices depends only on the blocks to which they belong. Statistical procedures are derived for estimating the probabilities of edges and for predicting the block structure from observations of the edge pattern only. ML estimators can be computed using the EM algorithm, but this strategy is practical only for small graphs. A Bayesian estimator, based on the Gibbs sampling, is proposed. This estimator is practical also for large graphs. When ML estimators are used, the block structure can be predicted based on predictive likelihood. When Gibbs sampling is used, the block structure can be predicted from posterior predictive probabilities. A side result is that when the number of vertices tends to infinity while the probabilities remain constant, the block structure can be recovered correctly with probability tending to 1.},
langid = {english}, langid = {english},
@ -288,63 +298,66 @@
file = {/home/polarolouis/Zotero/storage/2GYRASW5/snijders1997.pdf.pdf;/home/polarolouis/Zotero/storage/JJNQV32Y/Snijders et Nowicki - 1997 - Estimation and Prediction for Stochastic Blockmode.pdf;/home/polarolouis/Zotero/storage/LXGG9SRP/snijders1997.pdf.pdf} file = {/home/polarolouis/Zotero/storage/2GYRASW5/snijders1997.pdf.pdf;/home/polarolouis/Zotero/storage/JJNQV32Y/Snijders et Nowicki - 1997 - Estimation and Prediction for Stochastic Blockmode.pdf;/home/polarolouis/Zotero/storage/LXGG9SRP/snijders1997.pdf.pdf}
} }
@misc{thebaultDatabasePlantpollinatorNetworks2020, @dataset{thebaultDatabasePlantpollinatorNetworks2020,
title = {A Database of Plant-Pollinator Networks}, title = {A Database of Plant-Pollinator Networks},
author = {Th{\'e}bault, Elisa and Fontaine, Colin}, author = {Thébault, Elisa and Fontaine, Colin},
year = {2020}, date = {2020-12-01},
month = dec,
publisher = {{Zenodo}}, publisher = {{Zenodo}},
doi = {10.5281/zenodo.4300427}, doi = {10.5281/zenodo.4300427},
url = {https://zenodo.org/record/4300427},
urldate = {2023-06-21}, urldate = {2023-06-21},
abstract = {This database assembles different published datasets of observed interaction networks between plants and pollinators, which were extracted from articles, theses and existing online databases. Each row in the data table corresponds to an interaction between a plant and a pollinator species reported at a given site by a given publication.}, abstract = {This database assembles different published datasets of observed interaction networks between plants and pollinators, which were extracted from articles, theses and existing online databases. Each row in the data table corresponds to an interaction between a plant and a pollinator species reported at a given site by a given publication.},
version = {1},
keywords = {diversity,flower visitors,mutualistic network,plant-pollinator interaction} keywords = {diversity,flower visitors,mutualistic network,plant-pollinator interaction}
} }
@misc{thebaultelisaDatabasePlantpollinatorNetworks2020, @dataset{thebaultelisaDatabasePlantpollinatorNetworks2020,
title = {A Database of Plant-Pollinator Networks}, title = {A Database of Plant-Pollinator Networks},
author = {Th{\'e}bault, Elisa and Fontaine, Colin}, author = {Thébault, Elisa and Fontaine, Colin},
year = {2020}, date = {2020-12-01},
month = dec,
publisher = {{Zenodo}}, publisher = {{Zenodo}},
doi = {10.5281/ZENODO.4300427}, doi = {10.5281/ZENODO.4300427},
url = {https://zenodo.org/record/4300427},
urldate = {2023-06-21}, urldate = {2023-06-21},
abstract = {This database assembles different published datasets of observed interaction networks between plants and pollinators, which were extracted from articles, theses and existing online databases. Each row in the data table corresponds to an interaction between a plant and a pollinator species reported at a given site by a given publication.}, abstract = {This database assembles different published datasets of observed interaction networks between plants and pollinators, which were extracted from articles, theses and existing online databases. Each row in the data table corresponds to an interaction between a plant and a pollinator species reported at a given site by a given publication.},
copyright = {Creative Commons Attribution 4.0 International, Open Access}, version = {1},
keywords = {data,diversity,flower visitors,mutualistic network,plant-pollinator,plant-pollinator interaction} keywords = {data,diversity,flower visitors,mutualistic network,plant-pollinator,plant-pollinator interaction}
} }
@misc{thebaultelisaDatabasePlantpollinatorNetworks2022, @dataset{thebaultelisaDatabasePlantpollinatorNetworks2022,
title = {A Database of Plant-Pollinator Networks}, title = {A Database of Plant-Pollinator Networks},
author = {Th{\'e}bault, Elisa and Fontaine, Colin}, author = {Thébault, Elisa and Fontaine, Colin},
year = {2022}, editora = {Doré, Maël and Parra, Santiago},
month = jun, editoratype = {collaborator},
date = {2022-06-10},
publisher = {{Zenodo}}, publisher = {{Zenodo}},
doi = {10.5281/ZENODO.6630184}, doi = {10.5281/ZENODO.6630184},
url = {https://zenodo.org/record/6630184},
urldate = {2023-06-21}, urldate = {2023-06-21},
abstract = {This database assembles different published datasets of observed interaction networks between plants and pollinators, which were extracted from articles, theses and existing online databases. Each row in the data table corresponds to an interaction between a plant and a pollinator species reported at a given site by a given publication.}, abstract = {This database assembles different published datasets of observed interaction networks between plants and pollinators, which were extracted from articles, theses and existing online databases. Each row in the data table corresponds to an interaction between a plant and a pollinator species reported at a given site by a given publication.},
collaborator = {Dor{\'e}, Ma{\"e}l and Parra, Santiago}, version = {2},
copyright = {Creative Commons Attribution 4.0 International, Open Access},
keywords = {data,diversity,flower visitors,mutualistic network,plant-pollinator,plant-pollinator interaction} keywords = {data,diversity,flower visitors,mutualistic network,plant-pollinator,plant-pollinator interaction}
} }
@misc{thebaultelisaDatabasePlantpollinatorNetworks2022a, @dataset{thebaultelisaDatabasePlantpollinatorNetworks2022a,
title = {A Database of Plant-Pollinator Networks}, title = {A Database of Plant-Pollinator Networks},
author = {Th{\'e}bault, Elisa and Fontaine, Colin}, author = {Thébault, Elisa and Fontaine, Colin},
year = {2022}, editora = {Doré, Maël and Parra, Santiago},
month = jun, editoratype = {collaborator},
date = {2022-06-10},
publisher = {{Zenodo}}, publisher = {{Zenodo}},
doi = {10.5281/ZENODO.4300426}, doi = {10.5281/ZENODO.4300426},
url = {https://zenodo.org/record/4300426},
urldate = {2023-06-21}, urldate = {2023-06-21},
abstract = {This database assembles different published datasets of observed interaction networks between plants and pollinators, which were extracted from articles, theses and existing online databases. Each row in the data table corresponds to an interaction between a plant and a pollinator species reported at a given site by a given publication.}, abstract = {This database assembles different published datasets of observed interaction networks between plants and pollinators, which were extracted from articles, theses and existing online databases. Each row in the data table corresponds to an interaction between a plant and a pollinator species reported at a given site by a given publication.},
collaborator = {Dor{\'e}, Ma{\"e}l and Parra, Santiago}, version = {2},
copyright = {Creative Commons Attribution 4.0 International, Open Access},
keywords = {data,diversity,flower visitors,mutualistic network,plant-pollinator,plant-pollinator interaction} keywords = {data,diversity,flower visitors,mutualistic network,plant-pollinator,plant-pollinator interaction}
} }
@misc{WebLifeEcological, @online{WebLifeEcological,
title = {Web of {{Life}}: Ecological Networks Database}, title = {Web of {{Life}}: Ecological Networks Database},
url = {https://www.web-of-life.es/map.php},
urldate = {2023-06-17}, urldate = {2023-06-17},
howpublished = {https://www.web-of-life.es/map.php},
keywords = {networks,site}, keywords = {networks,site},
file = {/home/polarolouis/Zotero/storage/9WZE8QLQ/map.html} file = {/home/polarolouis/Zotero/storage/9WZE8QLQ/map.html}
} }