First commit

This commit is contained in:
Louis Lacoste 2024-06-28 10:49:49 +02:00
commit 753179f341
420 changed files with 101172 additions and 0 deletions

301
.gitignore vendored Normal file
View file

@ -0,0 +1,301 @@
## Core latex/pdflatex auxiliary files:
*.aux
*.lof
*.log
*.lot
*.fls
*.out
*.toc
*.fmt
*.fot
*.cb
*.cb2
.*.lb
## Intermediate documents:
*.dvi
*.xdv
*-converted-to.*
# these rules might exclude image files for figures etc.
# *.ps
# *.eps
# *.pdf
## Generated if empty string is given at "Please type another file name for output:"
.pdf
## Bibliography auxiliary files (bibtex/biblatex/biber):
*.bbl
*.bcf
*.blg
*-blx.aux
*-blx.bib
*.run.xml
## Build tool auxiliary files:
*.fdb_latexmk
*.synctex
*.synctex(busy)
*.synctex.gz
*.synctex.gz(busy)
*.pdfsync
## Build tool directories for auxiliary files
# latexrun
latex.out/
## Auxiliary and intermediate files from other packages:
# algorithms
*.alg
*.loa
# achemso
acs-*.bib
# amsthm
*.thm
# beamer
*.nav
*.pre
*.snm
*.vrb
# changes
*.soc
# comment
*.cut
# cprotect
*.cpt
# elsarticle (documentclass of Elsevier journals)
*.spl
# endnotes
*.ent
# fixme
*.lox
# feynmf/feynmp
*.mf
*.mp
*.t[1-9]
*.t[1-9][0-9]
*.tfm
#(r)(e)ledmac/(r)(e)ledpar
*.end
*.?end
*.[1-9]
*.[1-9][0-9]
*.[1-9][0-9][0-9]
*.[1-9]R
*.[1-9][0-9]R
*.[1-9][0-9][0-9]R
*.eledsec[1-9]
*.eledsec[1-9]R
*.eledsec[1-9][0-9]
*.eledsec[1-9][0-9]R
*.eledsec[1-9][0-9][0-9]
*.eledsec[1-9][0-9][0-9]R
# glossaries
*.acn
*.acr
*.glg
*.glo
*.gls
*.glsdefs
*.lzo
*.lzs
*.slg
*.slo
*.sls
# uncomment this for glossaries-extra (will ignore makeindex's style files!)
# *.ist
# gnuplot
*.gnuplot
*.table
# gnuplottex
*-gnuplottex-*
# gregoriotex
*.gaux
*.glog
*.gtex
# htlatex
*.4ct
*.4tc
*.idv
*.lg
*.trc
*.xref
# hyperref
*.brf
# knitr
*-concordance.tex
# TODO Uncomment the next line if you use knitr and want to ignore its generated tikz files
# *.tikz
*-tikzDictionary
# listings
*.lol
# luatexja-ruby
*.ltjruby
# makeidx
*.idx
*.ilg
*.ind
# minitoc
*.maf
*.mlf
*.mlt
*.mtc[0-9]*
*.slf[0-9]*
*.slt[0-9]*
*.stc[0-9]*
# minted
_minted*
*.pyg
# morewrites
*.mw
# newpax
*.newpax
# nomencl
*.nlg
*.nlo
*.nls
# pax
*.pax
# pdfpcnotes
*.pdfpc
# sagetex
*.sagetex.sage
*.sagetex.py
*.sagetex.scmd
# scrwfile
*.wrt
# svg
svg-inkscape/
# sympy
*.sout
*.sympy
sympy-plots-for-*.tex/
# pdfcomment
*.upa
*.upb
# pythontex
*.pytxcode
pythontex-files-*/
# tcolorbox
*.listing
# thmtools
*.loe
# TikZ & PGF
*.dpth
*.md5
*.auxlock
# titletoc
*.ptc
# todonotes
*.tdo
# vhistory
*.hst
*.ver
# easy-todo
*.lod
# xcolor
*.xcp
# xmpincl
*.xmpi
# xindy
*.xdy
# xypic precompiled matrices and outlines
*.xyc
*.xyd
# endfloat
*.ttt
*.fff
# Latexian
TSWLatexianTemp*
## Editors:
# WinEdt
*.bak
*.sav
# Texpad
.texpadtmp
# LyX
*.lyx~
# Kile
*.backup
# gummi
.*.swp
# KBibTeX
*~[0-9]*
# TeXnicCenter
*.tps
# auto folder when using emacs and auctex
./auto/*
*.el
# expex forward references with \gathertags
*-tags.tex
# standalone packages
*.sta
# Makeindex log files
*.lpz
# xwatermark package
*.xwm
# REVTeX puts footnotes in the bibliography by default, unless the nofootinbib
# option is specified. Footnotes are the stored in a file with suffix Notes.bib.
# Uncomment the next line to have this generated file ignored.
#*Notes.bib

27
.pre-commit-config.yaml Normal file
View file

@ -0,0 +1,27 @@
repos:
- repo: https://github.com/jonasbb/pre-commit-latex-hooks
rev: v1.4.0
hooks:
- id: american-eg-ie
- id: cleveref-capitalization
- id: consistent-spelling
args:
[
"--emph=et al.",
"--emph=a priori",
"--emph=a posteriori",
'--regex=naive=\bna(i|\\"i)ve',
]
#- id: csquotes
# - id: ensure-labels-for-sections
- id: no-space-in-cite
# - id: tilde-cite
# - id: unique-labels
- id: cleveref-instead-of-autoref
- repo: https://github.com/pre-commit/pre-commit-hooks
rev: v3.3.0
hooks:
- id: check-merge-conflict
- id: check-yaml
- id: trailing-whitespace
files: ".*\\.(?:tex|py)$"

142
.vscode/settings.json vendored Normal file
View file

@ -0,0 +1,142 @@
{
"emeraldwalk.runonsave": {
"commands": [
{
"match": ".*\\.Rmd",
"isAsync": true,
"cmd": "/bin/bash -c \"Rscript Rmd2Latex-fragment.R '${file}'\""
},
{
"match": ".*",
"isAsync": true,
"cmd": "echo ${file} "
}
]
},
"latex-workshop.latex.tools": [
{
"name": "latexmk",
"command": "latexmk",
"args": [
"-shell-escape",
"-synctex=1",
"-interaction=nonstopmode",
"-file-line-error",
"-pdf",
"-outdir=%OUTDIR%",
"%DOC%"
],
"env": {}
},
{
"name": "lualatexmk",
"command": "latexmk",
"args": [
"-synctex=1",
"-interaction=nonstopmode",
"-file-line-error",
"-lualatex",
"-outdir=%OUTDIR%",
"%DOC%"
],
"env": {}
},
{
"name": "xelatexmk",
"command": "latexmk",
"args": [
"-synctex=1",
"-interaction=nonstopmode",
"-file-line-error",
"-xelatex",
"-outdir=%OUTDIR%",
"%DOC%"
],
"env": {}
},
{
"name": "latexmk_rconly",
"command": "latexmk",
"args": [
"%DOC%"
],
"env": {}
},
{
"name": "pdflatex",
"command": "pdflatex",
"args": [
"-synctex=1",
"-interaction=nonstopmode",
"-file-line-error",
"%DOC%"
],
"env": {}
},
{
"name": "bibtex",
"command": "bibtex",
"args": [
"%DOCFILE%"
],
"env": {}
},
{
"name": "rnw2tex",
"command": "Rscript",
"args": [
"-e",
"knitr::opts_knit$set(concordance = TRUE); knitr::knit('%DOCFILE_EXT%')"
],
"env": {}
},
{
"name": "jnw2tex",
"command": "julia",
"args": [
"-e",
"using Weave; weave(\"%DOC_EXT%\", doctype=\"tex\")"
],
"env": {}
},
{
"name": "jnw2texminted",
"command": "julia",
"args": [
"-e",
"using Weave; weave(\"%DOC_EXT%\", doctype=\"texminted\")"
],
"env": {}
},
{
"name": "pnw2tex",
"command": "pweave",
"args": [
"-f",
"tex",
"%DOC_EXT%"
],
"env": {}
},
{
"name": "pnw2texminted",
"command": "pweave",
"args": [
"-f",
"texminted",
"%DOC_EXT%"
],
"env": {}
},
{
"name": "tectonic",
"command": "tectonic",
"args": [
"--synctex",
"--keep-logs",
"%DOC%.tex"
],
"env": {}
}
]
}

5
README.md Normal file
View file

@ -0,0 +1,5 @@
# Report for my 6 months intership at MIA Paris-Saclay Lab
I summarized in this report and presentation the work I performed at MIA Paris-Saclay Lab.
The following report may contain inaccurate observations, or wrong interpretations. If you notice such errors, don't hesitate to report them to me.

View file

@ -0,0 +1,287 @@
---
output:
md_document:
citation_package: biblatex
---
\subsection{Completing raw data using CoOPLBM~\parencite{anakokDisentanglingStructureEcological2022}}
```{r, setup, include=FALSE, warning=FALSE}
knitr::opts_chunk$set(echo = FALSE, dpi = 300)
```
```{r libraries, echo = FALSE, include=FALSE}
require("colSBM")
require("aricode")
require("ggplot2")
```
```{r useful_functions, echo = FALSE}
if (getwd() == "/home/polarolouis/Nextcloud/Documents/APT/CEI/Stage Recherche Mathématiques/Depuis PC Portable/Stage MIA 2023/rapport-MIA-2023") {
path_to_add <- "Rcodes/real_data/"
} else {
path_to_add <- ""
}
extract_unlist <- function(data) {
readRDS(data) |>
extract_best_bipartite_partition() |>
unlist()
}
show_groups <- function(collection_list) {
lapply(collection_list, function(collection) {
collection$Q
})
}
partition_BICL <- function(collection_list) {
sum(sapply(collection_list, function(collection) {
collection$BICL
}))
}
extract_collection_clustering_id <- function(collection_list) {
out_dataframe <- setNames(data.frame(matrix(nrow = 0, ncol = 2)), c("netid", "collection_id"))
for (col_idx in seq_along(collection_list)) {
for (net_id in collection_list[[col_idx]]$net_id) {
out_dataframe[nrow(out_dataframe) + 1, ] <- c(net_id, col_idx)
}
}
out_dataframe
}
extract_full_models <- function(model_collections_list) {
return(do.call(
"rbind",
lapply(
seq_along(model_collections_list),
function(idx) {
model_name <- names(model_collections_list)[idx]
df <- extract_collection_clustering_id(model_collections_list[[idx]])
return(cbind(df, model = rep(model_name, length(df$netid))))
}
)
))
}
reorder_match_netids <- function(dataframe, target) {
df <- dataframe[match(target, dataframe$netid), ]
return(df)
}
extract_full_reorder <- function(model_collections_list, target) {
unordered <- extract_full_models(model_collections_list)
return(do.call("rbind", lapply(
names(model_collections_list),
function(model_name) {
return(reorder_match_netids(unordered[which(unordered$model == model_name), ], target))
}
)))
}
```
```{r data_importation, echo = FALSE}
# Uncompleted
uncompleted_model_list <- list(
"iid" = extract_unlist(paste0(path_to_add, "data/dore_uncompleted_collection_clustering_nb_run_1_iid_70_networks_08-06-23-16:31:17.Rds")),
"pi" = extract_unlist(paste0(path_to_add, "data/dore_uncompleted_collection_clustering_nb_run_1_pi_70_networks_08-06-23-16:52:16.Rds")),
"rho" = extract_unlist(paste0(path_to_add, "data/dore_uncompleted_collection_clustering_nb_run_1_rho_70_networks_08-06-23-16:49:58.Rds")),
"pirho" = extract_unlist(paste0(path_to_add, "data/dore_uncompleted_collection_clustering_nb_run_1_pirho_70_networks_08-06-23-16:41:33.Rds"))
)
# Below we will need to have the netid in the same order so we choose to use the
# uncompleted iid model order as reference
netid_order <- extract_collection_clustering_id(uncompleted_model_list$iid)$netid
model_order <- c("iid", "pi", "rho", "pirho")
uncompleted_clusterings <- extract_full_reorder(uncompleted_model_list, netid_order)
# 0.2 threshold
point_2_model_list <- list(
"iid" = extract_unlist(paste0(path_to_add, "data/dore_point_2_completed_collection_clustering_nb_run_1_iid_70_networks_07-06-23-18:40:10.Rds")),
"pi" = extract_unlist(paste0(path_to_add, "data/dore_point_2_completed_collection_clustering_nb_run_1_pi_70_networks_07-06-23-19:22:19.Rds")),
"rho" = extract_unlist(paste0(path_to_add, "data/dore_point_2_completed_collection_clustering_nb_run_1_rho_70_networks_07-06-23-20:03:53.Rds")),
"pirho" = extract_unlist(paste0(path_to_add, "data/dore_point_2_completed_collection_clustering_nb_run_1_pirho_70_networks_07-06-23-21:09:12.Rds"))
)
point_2_clusterings <- extract_full_reorder(point_2_model_list, netid_order)
# 0.5 threshold
point_5_model_list <- list(
"iid" = extract_unlist(paste0(path_to_add, "data/dore_point_5_completed_collection_clustering_nb_run_1_iid_70_networks_07-06-23-19:19:53.Rds")),
"pi" = extract_unlist(paste0(path_to_add, "data/dore_point_5_completed_collection_clustering_nb_run_1_pi_70_networks_07-06-23-21:31:20.Rds")),
"rho" = extract_unlist(paste0(path_to_add, "data/dore_point_5_completed_collection_clustering_nb_run_1_rho_70_networks_07-06-23-21:03:50.Rds")),
"pirho" = extract_unlist(paste0(path_to_add, "data/dore_point_5_completed_collection_clustering_nb_run_1_pirho_70_networks_07-06-23-21:13:10.Rds"))
)
point_5_clusterings <- extract_full_reorder(point_5_model_list, netid_order)
# Uniform re-sampled
random_model_list <- list(
"iid" = extract_unlist(paste0(path_to_add, "data/dore_random_completed_collection_clustering_nb_run_1_iid_70_networks_07-06-23-21:44:14.Rds")),
"pi" = extract_unlist(paste0(path_to_add, "data/dore_random_completed_collection_clustering_nb_run_1_pi_70_networks_07-06-23-22:52:47.Rds")),
"rho" = extract_unlist(paste0(path_to_add, "data/dore_random_completed_collection_clustering_nb_run_1_rho_70_networks_08-06-23-18:16:04.Rds")),
"pirho" = extract_unlist(paste0(path_to_add, "data/dore_random_completed_collection_clustering_nb_run_1_pirho_70_networks_07-06-23-23:07:08.Rds"))
)
random_clusterings <- extract_full_reorder(random_model_list, netid_order)
```
### Context of this analysis
After performing a netclustering on the raw data, we will see if the detect
structure resulting in the clustering comes from the sampling effort. To test
this we will use the CoOPLBM model by~\cite{anakokDisentanglingStructureEcological2022} to complete the data.
\emph{Note:}~\cite{anakokDisentanglingStructureEcological2022} provided data
for the networks for which the method was applicable, this explains that
there are fewer networks in the collections.
The CoOPLBM model assumes that the observed incidence matrix $R$ is an
element-wise product of an $M$ matrix following an LBM and an $N$ matrix which
elements follow Poisson distributions independent on $M$.
The model gives us the $\widehat{M}$ matrix, the elements of which are:
$$\widehat{M_{ij}} = \mathbb{P}(M_{ij} = 1)$$
Note that if $R_{ij} = 1$ then $\widehat{M_{ij}} = 1$
- 1 if the interaction was observed
- a probability, that there should be an interaction but it wasn't observed
This *completed matrix* can be used in different manners to be fed to the colSBM
model.
### Threshold based completions
With the thresholds, the infered incidence matrix obtained by
CoOPLBM is used to generate a completed incidence matrix by the following
procedure :
$$X_{ij} = \begin{cases}
1 & \text{if the value is over the threshold} \\
0 & \text{else} \\
\end{cases}$$
#### 0.5 completed threshold
```{r useful-functions, echo = FALSE, include=FALSE}
ARI_netclustering_models <- function(
clustering_compare,
uncompleted_clustering = uncompleted_clustering,
models = c("iid", "pi", "rho", "pirho"),
models_names = c("$iid\\text{-}colSBM$", "$\\pi\\text{-}colSBM$", "$\\rho\\text{-}colSBM$", "$\\pi\\rho\\text{-}colSBM$")) {
out <- sapply(models, function(model) {
ARI(
uncompleted_clusterings[
which(uncompleted_clusterings$model == model),
]$collection_id,
clustering_compare[
which(clustering_compare$model == model),
]$collection_id
)
})
names(out) <- models_names
out
}
```
Here, the completion threshold is set to $0.5$.
First we will compute an ARI on the collection id given by the raw data and the
completed matrix.
```{r 0.5_ARI, echo = FALSE}
knitr::kable(ARI_netclustering_models(point_5_clusterings),
col.names = c("ARI with uncompleted data"),
escape = FALSE,
booktabs = TRUE,
digits = 2,
position = "h!",
caption = "\\label{tab:ari-table-0-5-completed} Table of ARI between 0.5 completed data and uncompleted data"
)
```
In the table \ref{tab:ari-table-0-5-completed}, one can see the network clustering obtained after applying
CoOPLBM has not much in common with the clustering of the uncompleted data. Thus
we can think that the completion changed significantly the interactions in
the collections.
##### Number of sub-collections and details of each sub-collection
```{r 0.5_partition_numbers, echo = FALSE}
```
##### Supplementary information
```{r supinfo, echo = FALSE}
supinfo <- readxl::read_xlsx(paste0(path_to_add, "data/supinfo.xlsx"), sheet = 2)
interaction_data <- read.table(file = paste0(path_to_add, "data/interaction-data.txt"), sep = "\t", header = TRUE)
seq_ids_network_aggreg <- unique(interaction_data$id_network_aggreg)
incidence_matrices <- readRDS(file = paste0(path_to_add, "data/dore-matrices.Rds"))
names_aggreg_networks <- names(incidence_matrices)
get_vector_clustering_net <- function(unlisted_model) {
vectorClusteringNet <- numeric(nrow(supinfo))
for (k in 1:length(unlisted_model)) {
idclust <- match(unlisted_model[[k]]$net_id, names_aggreg_networks)
supinfoclust <- match(seq_ids_network_aggreg[idclust], supinfo$Idweb)
vectorClusteringNet[supinfoclust] <- k
}
vectorClusteringNet
# [! vectorClusteringNet %in% 0] # Filtering the network not present in uncompleted data
}
```
```{r boxplot-function, echo = FALSE}
supinfo_boxplot <- function(supinfo_vector, parameter, pretty_name) {
return(ggplot(supinfo) +
aes(
x = factor(supinfo_vector), y = parameter,
fill = as.factor(supinfo_vector), group = as.factor(supinfo_vector)
) +
geom_boxplot() +
theme(axis.text.x = element_text(angle = 90, vjust = 0.5)) +
labs(
x = "Collection number", y = pretty_name,
fill = "Collection number"
))
}
```
```{r, supinfo_point_5}
supinfo_point_5_iid <- get_vector_clustering_net(point_5_model_list[["iid"]])
```
### 0.2 completed threshold
The $0.2$ threshold adds a lot of interactions compared to raw matrix.
```{r 0.2_ARI, echo = FALSE, results="asis"}
knitr::kable(ARI_netclustering_models(point_2_clusterings),
col.names = c("ARI with uncompleted data"),
escape = FALSE,
booktabs = TRUE,
digits = 2,
position = "h!",
caption = "\\label{tab:ari-table-0-2-completed} Table of ARI between 0.2 completed data and uncompleted data"
)
```
Same as for $0.5$, after applying CoOPLBM the obtained clustering doesn't match
the uncompleted data.
### Sample based completions
The $M$ matrix is used to sample a new $X$ matrix which elements are the
realisation of Bernoulli distributions of probability $M_{i,j}$.
$$\mathbb{P}(X_{i,j} = 1) = M_{i,j} $$
```{r random_ARI, echo = FALSE, results="asis"}
knitr::kable(ARI_netclustering_models(random_clusterings),
col.names = c("ARI with uncompleted data"),
escape = FALSE,
booktabs = TRUE,
digits = 2,
position = "h!",
caption = "\\label{tab:ari-table-random-completed} Table of ARI between
randomly completed data and uncompleted data"
)
```

View file

@ -0,0 +1,135 @@
\subsection{Completing raw data using CoOPLBM~\parencite{anakokDisentanglingStructureEcological2022}}
\hypertarget{context-of-this-analysis}{%
\subsubsection{Context of this
analysis}\label{context-of-this-analysis}}
After performing a netclustering on the raw data, we will see if the
detect structure resulting in the clustering comes from the sampling
effort. To test this we will use the CoOPLBM model
by\textasciitilde{}\cite{anakokDisentanglingStructureEcological2022} to
complete the data.
\emph{Note:}\textasciitilde{}\cite{anakokDisentanglingStructureEcological2022}
provided data for the networks for which the method was applicable, this
explains that there are fewer networks in the collections.
The CoOPLBM model assumes that the observed incidence matrix \(R\) is an
element-wise product of an \(M\) matrix following an LBM and an \(N\)
matrix which elements follow Poisson distributions independent on \(M\).
The model gives us the \(\widehat{M}\) matrix, the elements of which
are:
\[\widehat{M_{ij}} = \mathbb{P}(M_{ij} = 1)\]
Note that if \(R_{ij} = 1\) then \(\widehat{M_{ij}} = 1\)
\begin{itemize}
\tightlist
\item
1 if the interaction was observed
\item
a probability, that there should be an interaction but it wasn't
observed
\end{itemize}
This \emph{completed matrix} can be used in different manners to be fed
to the colSBM model.
\hypertarget{threshold-based-completions}{%
\subsubsection{Threshold based
completions}\label{threshold-based-completions}}
With the thresholds, the infered incidence matrix obtained by CoOPLBM is
used to generate a completed incidence matrix by the following procedure
: \[X_{ij} = \begin{cases}
1 & \text{if the value is over the threshold} \\
0 & \text{else} \\
\end{cases}\]
\hypertarget{completed-threshold}{%
\paragraph{0.5 completed threshold}\label{completed-threshold}}
Here, the completion threshold is set to \(0.5\).
First we will compute an ARI on the collection id given by the raw data
and the completed matrix.
\begin{table}[h!]
\caption{\label{tab:0.5_ARI}\label{tab:ari-table-0-5-completed} Table of ARI between 0.5 completed data and uncompleted data}
\centering
\begin{tabular}[t]{lr}
\toprule
& ARI with uncompleted data\\
\midrule
$iid\text{-}colSBM$ & 0.11\\
$\pi\text{-}colSBM$ & 0.03\\
$\rho\text{-}colSBM$ & 0.09\\
$\pi\rho\text{-}colSBM$ & 0.22\\
\bottomrule
\end{tabular}
\end{table}
In the table \ref{tab:ari-table-0-5-completed}, one can see the network
clustering obtained after applying CoOPLBM has not much in common with
the clustering of the uncompleted data. Thus we can think that the
completion changed significantly the interactions in the collections.
\hypertarget{number-of-sub-collections-and-details-of-each-sub-collection}{%
\subparagraph{Number of sub-collections and details of each
sub-collection}\label{number-of-sub-collections-and-details-of-each-sub-collection}}
\hypertarget{supplementary-information}{%
\subparagraph{Supplementary
information}\label{supplementary-information}}
\hypertarget{completed-threshold-1}{%
\subsubsection{0.2 completed threshold}\label{completed-threshold-1}}
The \(0.2\) threshold adds a lot of interactions compared to raw matrix.
\begin{table}[h!]
\caption{\label{tab:0.2_ARI}\label{tab:ari-table-0-2-completed} Table of ARI between 0.2 completed data and uncompleted data}
\centering
\begin{tabular}[t]{lr}
\toprule
& ARI with uncompleted data\\
\midrule
$iid\text{-}colSBM$ & 0.04\\
$\pi\text{-}colSBM$ & 0.03\\
$\rho\text{-}colSBM$ & 0.02\\
$\pi\rho\text{-}colSBM$ & 0.04\\
\bottomrule
\end{tabular}
\end{table}
Same as for \(0.5\), after applying CoOPLBM the obtained clustering
doesn't match the uncompleted data.
\hypertarget{sample-based-completions}{%
\subsubsection{Sample based
completions}\label{sample-based-completions}}
The \(M\) matrix is used to sample a new \(X\) matrix which elements are
the realisation of Bernoulli distributions of probability \(M_{i,j}\).
\[\mathbb{P}(X_{i,j} = 1) = M_{i,j} \]
\begin{table}[h!]
\caption{\label{tab:random_ARI}\label{tab:ari-table-random-completed} Table of ARI between
randomly completed data and uncompleted data}
\centering
\begin{tabular}[t]{lr}
\toprule
& ARI with uncompleted data\\
\midrule
$iid\text{-}colSBM$ & 0.01\\
$\pi\text{-}colSBM$ & 0.03\\
$\rho\text{-}colSBM$ & 0.01\\
$\pi\rho\text{-}colSBM$ & 0.02\\
\bottomrule
\end{tabular}
\end{table}

View file

@ -0,0 +1,158 @@
# Application to \cite{doreRelativeEffectsAnthropogenic2021} data
\label{sec:application-to-dorerelativeeffectsanthropogenic2021-data}
```{r, setup, include=FALSE, warning=FALSE}
knitr::opts_chunk$set(echo = FALSE, dpi = 300)
```
```{r}
# import fix
if (getwd() == "/home/polarolouis/Nextcloud/Documents/APT/CEI/Stage Recherche Mathématiques/Depuis PC Portable/Stage MIA 2023/rapport-MIA-2023") {
path_to_add <- "Rcodes/real_data/"
} else {
path_to_add <- ""
}
```
```{r require_lib, echo = FALSE, include=FALSE, warning=FALSE}
require("tidyverse")
require("knitr")
require("colSBM")
require("ggplot2")
require("patchwork")
source(paste0(path_to_add, "temporary_plot.R"))
```
```{r better_collection_extraction, echo = FALSE, warning=FALSE}
extract_unlist_reorder <- function(clustering_data_path) {
clustering <- readRDS(clustering_data_path)
if (!is.list(clustering)) {
clustering <- list(clustering)
}
best_partition <- extract_best_bipartite_partition(clustering)
best_partition_unlist <- unlist(best_partition)
if (!is.list(best_partition_unlist)) {
best_partition_unlist <- list(best_partition_unlist)
}
size <- length(best_partition_unlist)
return(setNames(lapply(best_partition_unlist, function(collection) {
reorder_parameters(collection)
}), paste(rep("Collection", size), seq_len(size))))
}
```
```{r load data, echo = FALSE, include = FALSE, warning=FALSE}
# All results
iid_unlist <- extract_unlist_reorder(paste0(path_to_add, "data/dore_collection_clustering_nb_run1_iid_123networks_24-05-23-21:40:42.Rds"))
```
Here we apply the network clustering procedure (we refer to it as *netclustering*)
to the data from \cite{doreRelativeEffectsAnthropogenic2021}. These data are
plant-pollinator bipartite networks from differents areas and times.
In a second part we will use additional information for the networks to
try to identify the impact and correlations with the observed structures.
## Netclustering with the $iid\text{-}colBiSBM$ model
We obtained the more interpretable results with $iid\text{-}colBiSBM$ model.
This resulted in `r length(iid_unlist)` collections to partition the $M = 123$
networks.
```{r meso-plots, echo = FALSE, results='asis'}
#| fig.cap=sapply(seq_along(iid_unlist), function(idx) paste0("Collection N°", idx)),
##| fig.cap = "Structure of the collections in the partition and respective proportions of blocks",
##| fig.subcap = sapply(seq_along(iid_unlist), function(idx) paste0("Collection N°", idx)),
#| fig.asp = 0.5
meso_print <- function(unlisted_partition) {
for (idx in seq_along(unlisted_partition)) {
print(plot(unlisted_partition[[idx]], type = "meso", mixture = TRUE) + ggtitle(paste("Collection ", idx)))
cat("\\newline")
}
}
meso_print(iid_unlist)
```
In all the obtained collections the structure is the classical nested structure.
As this is a well-known structure for plant-pollinator data this tends to
indicate that we are not going in a wrong direction.
The \nth{3} collection consists of only one network, indicating that for this
model, the small76 network was really different of all the others.
One reason might be that it's the oldest network and maybe the data collection
protocol is different.
## Comparison with additional information
```{r supinfo, echo = FALSE}
supinfo <- readxl::read_xlsx(paste0(path_to_add, "data/supinfo.xlsx"), sheet = 2)
interaction_data <- read.table(file = paste0(path_to_add, "data/interaction-data.txt"), sep = "\t", header = TRUE)
seq_ids_network_aggreg <- unique(interaction_data$id_network_aggreg)
incidence_matrices <- readRDS(file = paste0(path_to_add, "data/dore-matrices.Rds"))
names_aggreg_networks <- names(incidence_matrices)
vectorClusteringNet <- numeric(nrow(supinfo))
for (k in 1:length(iid_unlist)) {
idclust <- match(iid_unlist[[k]]$net_id, names_aggreg_networks)
supinfoclust <- match(seq_ids_network_aggreg[idclust], supinfo$Idweb)
vectorClusteringNet[supinfoclust] <- k
}
```
Using supplementary information we obtain the following boxplots.
```{r boxplot-function, echo = FALSE}
supinfo_boxplot <- function(parameter, pretty_name) {
return(ggplot(supinfo) +
aes(
x = vectorClusteringNet, y = parameter,
fill = as.factor(vectorClusteringNet), group = as.factor(vectorClusteringNet)
) +
geom_boxplot() +
labs(
x = "Collection number", y = pretty_name,
fill = "Collection number"
))
}
```
```{r boxplots_annual_timespan, echo = FALSE}
#| fig.cap = "\\label{fig:boxplot-annual-time-span}Boxplot of annual time span in function of the collection number"
ggplot(supinfo) +
aes(x = vectorClusteringNet, y = Annual_time_span,
fill = as.factor(vectorClusteringNet), group = as.factor(vectorClusteringNet)) +
geom_boxplot() +
labs(
x = "Collection number", y = "Annual time span",
fill = "Collection number"
)
```
The annual time span is the number of days the sampling period lasted. So we can
thus see in figure \ref{fig:boxplot-annual-time-span} that collections 1 and 4 were
sampled for a larger period of time than collections 2 and 5.
This could explain observed differences in the structure detected : the
"checkerboard" appearance of the alpha matrices representations may represent
interactions that only occurs at a given period of time.
Thus the shorter time period doesn't capture such interactions.
```{r boxplot_rainfall, echo = FALSE}
#| fig.cap = "\\label{fig:boxplot-total-rainfall}Boxplot of total rainfall in function of the collection number"
supinfo_boxplot(supinfo$Tot_Rainfall_IPCC, "Total rainfall")
```
There seems to be the same trend for the total rainfall.
```{r boxplot_sampling_effort, echo = FALSE}
#| fig.cap = "\\label{fig:boxplot-sampling-effort}Boxplot of the sampling effort in function of the collection number"
supinfo_boxplot(supinfo$Sampling_effort, "Sampling effort")
```
The sampling effort seems to be quite higher for collection 5 and a little
higher for collection 2. And collection 1 and 4 have the inverse trend. The
separation between collections 1,4 and 2,5 seems to still hold. And the sampling
effort is related to the sampling time that is why it's higher for the
collections that were sampled for a shorter time period.

View file

@ -0,0 +1,80 @@
\hypertarget{application-to-data}{%
\section{\texorpdfstring{Application to
\cite{doreRelativeEffectsAnthropogenic2021}
data}{Application to data}}\label{application-to-data}}
\label{sec:application-to-dorerelativeeffectsanthropogenic2021-data}
Here we apply the network clustering procedure (we refer to it as
\emph{netclustering}) to the data from
\cite{doreRelativeEffectsAnthropogenic2021}. These data are
plant-pollinator bipartite networks from differents areas and times.
In a second part we will use additional information for the networks to
try to identify the impact and correlations with the observed
structures.
\hypertarget{netclustering-with-the-iidtext-colbisbm-model}{%
\subsection{\texorpdfstring{Netclustering with the
\(iid\text{-}colBiSBM\)
model}{Netclustering with the iid\textbackslash text\{-\}colBiSBM model}}\label{netclustering-with-the-iidtext-colbisbm-model}}
We obtained the more interpretable results with \(iid\text{-}colBiSBM\)
model. This resulted in 5 collections to partition the \(M = 123\)
networks.
\includegraphics{./img/22d3409f045c956ffc0773e508871c61db4ad1e9.png}\newline\includegraphics{./img/2859d1c94af6539cced6aee6ee6bf6d49498518d.png}\newline\includegraphics{./img/037bcbcbc85f8a9562f98706ad7766c4099516ef.png}\newline\includegraphics{./img/f730f05cb60a7cdc837102601660f03edd767a60.png}\newline\includegraphics{./img/90d21c2459f68c2a6bc6cce93f9f1e10c3f0fef5.png}\newline
In all the obtained collections the structure is the classical nested
structure. As this is a well-known structure for plant-pollinator data
this tends to indicate that we are not going in a wrong direction.
The \nth{3} collection consists of only one network, indicating that for
this model, the small76 network was really different of all the others.
One reason might be that it's the oldest network and maybe the data
collection protocol is different.
\hypertarget{comparison-with-additional-information}{%
\subsection{Comparison with additional
information}\label{comparison-with-additional-information}}
Using supplementary information we obtain the following boxplots.
\begin{figure}
\centering
\includegraphics{./img/de77b630fb66744d3a3ed68e45be765532d1eb0f.png}
\caption{\label{fig:boxplot-annual-time-span}Boxplot of annual time span
in function of the collection number}
\end{figure}
The annual time span is the number of days the sampling period lasted.
So we can thus see in figure \ref{fig:boxplot-annual-time-span} that
collections 1 and 4 were sampled for a larger period of time than
collections 2 and 5. This could explain observed differences in the
structure detected : the ``checkerboard'' appearance of the alpha
matrices representations may represent interactions that only occurs at
a given period of time. Thus the shorter time period doesn't capture
such interactions.
\begin{figure}
\centering
\includegraphics{./img/5bbc4b4b07c0e990a3ae2755165958ffbf517902.png}
\caption{\label{fig:boxplot-total-rainfall}Boxplot of total rainfall in
function of the collection number}
\end{figure}
There seems to be the same trend for the total rainfall.
\begin{figure}
\centering
\includegraphics{./img/c75a33aa046b6f1bbcff45268346c4ec39067917.png}
\caption{\label{fig:boxplot-sampling-effort}Boxplot of the sampling
effort in function of the collection number}
\end{figure}
The sampling effort seems to be quite higher for collection 5 and a
little higher for collection 2. And collection 1 and 4 have the inverse
trend. The separation between collections 1,4 and 2,5 seems to still
hold. And the sampling effort is related to the sampling time that is
why it's higher for the collections that were sampled for a shorter time
period.

BIN
Rcodes/real_data/data/Data.rds Executable file

Binary file not shown.

File diff suppressed because it is too large Load diff

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,162 @@
publi;web;latitude;longitude;locationtot;feedingguild1;feedingguild2;samplingyear
adaime;adaime2017_A;2.5;-51.5;terra firme forest, floodable areas and savannas, Cal<61>oene municipality, Amapa State, Brazil;fruit eater;NoOrthoptera;2013
adaime;adaime2017_B;3.5;-51.8;terra firme forest, floodable areas and savannas, Oiapoque municipality, Amapa State, Brazil;fruit eater;NoOrthoptera;2013
Aldryhim;Aldryhim1985;24;-46; Saudi Arabia, Saudi Arabia;sap sucker;NoOrthoptera;1995
basset;basset1996;-7.4;146.73;grasslands and forest patches, dominated by secondary forests, Mt Kaindi, Wau, Papua New Guinea;leaf chewing;NoOrthoptera;1993
bergamini;bergamini2016;-17.6;-43.6;grasslands, Brazilian Cerrado savannas, Brazil;flower eater;NoOrthoptera;2003
bluthgen;bluthgen2006;4.97;117.8;mature lowland evergreen dipterocarp forest , Danun Valley conservation area, Danum Valley, Sabah, Malaysia;leaf chewing;NoOrthoptera;2004
bodner;bodner2009;-3.97;-79.98;pristine montane forest, Reserva biologica San Francisco, Zamora-Chinchipe, Ecuador ;leaf chewing;NoOrthoptera;2006
Brehm;Brehm2003;-3.97;-79.08;primary disturbed montane rainforest and early successional stages after human disturbances, Estacion Cientifica San Francisco, Zamora Chinchipe, South Ecuador;mixed;NoOrthoptera;2000
Brown;Brown2019;14;99.7;lowland seasonal evergreen rainforest, Khao Chong Botanical Garden, Thailand;mixed;NoOrthoptera;2015
coley;coley2006;9;-80;moist tropical lowland forest, Barro Colorado Island, Panama;leaf chewing;NoOrthoptera;1999
cuevas-reyes;cuevas-reyes;19.49;-104.99;tropical deciduous forest and patches of tropical riparian forest, Chamela-Cuixmala Biosphere reserve, Pacific coast of Jalisco, Mexico;gall maker;NoOrthoptera;2001
dyer&gentry;dyer&gentry2002;10.43;-83.98;wetland, La Selva biological station, Costa Rica;leaf chewing;NoOrthoptera;2002
garcia&robledo;garcia&robledo2013;10.43;-83.98;aseasonal tropical wet forest, La Sevla Biological Station, Costa Rica;leaf chewing;NoOrthoptera;2011
Hackett2019;Hackett2019_HH;50.72;-1.75;ocean-adjacent peninsula consisting of a mosaic of habitats, Hengistbury Head, UK;mixed;NoOrthoptera;2013
Hackett2019;Hackett2019_TP;-46.59;169.43;ocean-adjacent peninsula consisting of a mosaic of habitats, Tautuku Peninsula, New Zealand;mixed;NoOrthoptera;2015
heleno;heleno2009;37.78;-25.22;native vegetation was cleared for pastures, replaced by production forest or taken over by weeds, Serra da Tronqueira, Sao Miguel, Azores archipelago, Portugal;mixed;NoOrthoptera;2006
hemidi;hemidi2013;34.8;5.73; Biskra, Alg<6C>rie;sap sucker;NoOrthoptera;2011
henneman&memmott;henneman&memmott1;22.09;-159.56;isolated from agricultural areas, Alakai Swamp, island of Kauai, Hawaii;leaf chewing;NoOrthoptera;2000
henneman&memmott;henneman&memmott2;22.09;-159.56;wetland, Alakai Swamp, island of Kauai, Hawaii;leaf chewing;NoOrthoptera;2000
Ibanez;Ibanez2013;45.03;6.4;subalpine grasslands, Vilar d'Ar<41>ne, central french alps;leaf chewing;Orthoptera;2011
Idechil;Idechil2007;7.5;134.5; Republic of Palau, islands of Palau;sap sucker;NoOrthoptera;2004
janzen;janzen1980;10.8;-85.7;deciduous forest with moister habitats , Santa Rosa National Park, Costa Rica;frugivore_seed predator;NoOrthoptera;1980
janzen;janzen2003;10.83;-85.6;rainforest, Area de conservation Guanacaste, Costa Rica;leaf chewing;NoOrthoptera;2003
joern;joern_altuda;30.4;-103.6;mountainous area with arid grassland plant species, Altuda and Marathon, near Alpine, Texas;leaf chewing;Orthoptera;1975
joern;joern_marathon;30.4;-103.6;arid grassland, Alpine, Texas, USA;leaf chewing;Orthoptera;1975
Joern1985;Joern1985;41.578544;-101.707547;Arapaho Prairie, upland Sand Hills grassland, Arthur County, Nebraska, USA;leaf chewing;Orthoptera;1985
kim&choi;kim&choi2021_A;35.41;127.48;temperate forest, mixed deciduous forest, Sangseonam hornbeam tree forest, South Korea;leaf chewing;NoOrthoptera;2016
kim&choi;kim&choi2021_B;35.37;127.57;temperate forest, mixed deciduous forest, Mt Jirisan, banseon oak tree forest, South Korea;leaf chewing;NoOrthoptera;2016
Knuff;Knuff2019;47.998955;8.205064;managed forests, southern Black Forest, southwestern Germany;gall maker;NoOrthoptera;2017
kollars;kollars2013;48.32;18.1;alluvial soil with high content of clay fraction, Botanical garden of Slovak University of Agriculture, Nitra, Solavakia;mixed;NoOrthoptera;2012
Konig2022;Konig2022;47.592457;12.940233;open grassland sites on calcareous bedrock, National Park Berchtesgaden and Lower Franconia, Bavaria, Germany;leaf chewing;Orthoptera;2019
lewis;lewis2002;17.07;-88.69;moist tropical forest, Chiquibul Forest reserve, Cayo district, Belize, Central America;leaf miner;NoOrthoptera;1998
Macfadyen;Macfadyen_A1;51.4;-2.69;temperate grassland, 10 organic farms in UK, south west England;mixed;NoOrthoptera;2006
Macfadyen;Macfadyen_A10;51;-2.49;temperate grassland, 10 organic farms in UK, south west England;mixed;NoOrthoptera;2006
Macfadyen;Macfadyen_A2;51.55;-2.49;temperate grassland, 10 organic farms in UK, south west England;mixed;NoOrthoptera;2006
Macfadyen;Macfadyen_A3;51.51;-2.42;temperate grassland, 10 organic farms in UK, south west England;mixed;NoOrthoptera;2006
Macfadyen;Macfadyen_A4;51.52;-2.49;temperate grassland, 10 organic farms in UK, south west England;mixed;NoOrthoptera;2006
Macfadyen;Macfadyen_A5;51.51;-2.5;temperate grassland, 10 organic farms in UK, south west England;mixed;NoOrthoptera;2006
Macfadyen;Macfadyen_A6;51.52;-2.48;temperate grassland, 10 organic farms in UK, south west England;mixed;NoOrthoptera;2006
Macfadyen;Macfadyen_A7;51.32;-2.35;temperate grassland, 10 organic farms in UK, south west England;mixed;NoOrthoptera;2006
Macfadyen;Macfadyen_A8;51.36;-2.47;temperate grassland, 10 organic farms in UK, south west England;mixed;NoOrthoptera;2006
Macfadyen;Macfadyen_A9;51.32;-2.34;temperate grassland, 10 organic farms in UK, south west England;mixed;NoOrthoptera;2006
Macfadyen;Macfadyen_B1;5.421563;-2.6801987;temperate grassland, 10 organic farms in UK, south west England;mixed;NoOrthoptera;2006
Macfadyen;Macfadyen_B2;51.291623;-2.6028854;pasture fields, 10 organic farms in UK, south west England;mixed;NoOrthoptera;2006
Macfadyen;Macfadyen_B3;51.29728;-2.4066504;pasture fields, 10 organic farms in UK, south west England;mixed;NoOrthoptera;2006
Macfadyen;Macfadyen_B4;51.126799;-2.3271997;pasture fields, 10 organic farms in UK, south west England;mixed;NoOrthoptera;2006
Macfadyen;Macfadyen_B5;51.069394;-2.4322512;pasture fields, 10 organic farms in UK, south west England;mixed;NoOrthoptera;2006
Macfadyen;Macfadyen_B6;51.614902;-2.3436984;pasture fields, 10 organic farms in UK, south west England;mixed;NoOrthoptera;2006
Macfadyen;Macfadyen_B7;51.694362;-2.1084028;pasture fields, 10 organic farms in UK, south west England;mixed;NoOrthoptera;2006
Macfadyen;Macfadyen_B8;51.638776;-2.1769368;pasture fields, 10 organic farms in UK, south west England;mixed;NoOrthoptera;2006
Macfadyen;Macfadyen_B9;51.685258;-2.0148078;pasture fields, 10 organic farms in UK, south west England;mixed;NoOrthoptera;2006
Macfadyen;Macfadyen_B10;51.142195;-2.2890993;pasture fields, 10 organic farms in UK, south west England;mixed;NoOrthoptera;2006
martins;martins2020;-17.6;-43.6;grasslands, Brazilian Cerrado , Brazil;flower eater;NoOrthoptera;2005
Masetti;Masetti2004_A;44.16;12.22;near a 5 yr hedgerow, potato, wheat, alfalfa and wine grapes, Centro Ricerche produzioni Vegetali, Bologna Province, Italia;leaf miner;NoOrthoptera;1999
Masetti;Masetti2004_B;45;12;near a 4 yr hedgerow, corn, wheat and beets, Bologna Province, Italia, Bologna Province, Italia;leaf miner;NoOrthoptera;1999
Masetti;Masetti2004_C;45;12;near a 6 yr hederow and a ditch, Bologna Province, Italia, Bologna Province, Italia;leaf miner;NoOrthoptera;1999
massa;massa2001_A;37.6;14; Sicily, Italy;leaf miner;NoOrthoptera;2000
massa;massa2001_B;30;35; Al Bahhath, Aqaba and Dana Village, Jordan;leaf miner;NoOrthoptera;1999
memmott;memmott1994;10.83;-85.7;tropical dry forest, Santa Rosa National Park, Guanacaste, Costa Rica;leaf miner;NoOrthoptera;1990
muller;muller1999;51.4;-0.64;heavily grazed field, Silwood Park, Berkshire, England;sap sucker;NoOrthoptera;1995
nakagawa;nakagawa_1;4.33;113.83;humult and udult soils, Lambir Hills National Park, Sarawak, Malaysia;seed predator;NoOrthoptera;1998
nakagawa;nakagawa_2;4.33;113.83;tropical lowland forest, Lambir Hills National Park, Malaysia;seed predator;NoOrthoptera;1998
novotny1;novotny2005;-5.23;145.4;primary and secondary forests, Madang province, Papua New Guinea;fruit eater;NoOrthoptera;2001
novotny2;novotny2012;-5.23;145.4;tropical lowland forest, Madang province, Papua New Guinea;mixed;NoOrthoptera;2008
pearson&altermatt;pearson&altermatt2013;48.54;9.04; Baden W<>rttemberg, Germany;leaf chewing;NoOrthoptera;2005
Peralta;Peralta2017;-41.49228;173.022973;native Nothofagaceae forest and exotic pine plantations, native and exotic plantation forests, New Zealand;leaf chewing;NoOrthoptera;2011
Pitteloud;Pitteloud2020_B1;46.25788;7.02235;alpine open grasslands, open grasslands in Swiss Alps, Switzerland;leaf chewing;Orthoptera;2016
Pitteloud;Pitteloud2020_B2;46.26914;7.03241;alpine open grasslands, open grasslands in Swiss Alps, Switzerland;leaf chewing;Orthoptera;2016
Pitteloud;Pitteloud2020_B3;46.27424;7.04833;alpine open grasslands, open grasslands in Swiss Alps, Switzerland;leaf chewing;Orthoptera;2016
Pitteloud;Pitteloud2020_B4;46.28629;7.0957;alpine open grasslands, open grasslands in Swiss Alps, Switzerland;leaf chewing;Orthoptera;2016
Pitteloud;Pitteloud2020_B5.1;46.28567;7.12451;alpine open grasslands, open grasslands in Swiss Alps, Switzerland;leaf chewing;Orthoptera;2016
Pitteloud;Pitteloud2020_B5.2;46.29149;7.11251;alpine open grasslands, open grasslands in Swiss Alps, Switzerland;leaf chewing;Orthoptera;2016
Pitteloud;Pitteloud2020_B6;46.26865;7.10755;alpine open grasslands, open grasslands in Swiss Alps, Switzerland;leaf chewing;Orthoptera;2016
Pitteloud;Pitteloud2020_B7;46.27683;7.15518;alpine open grasslands, open grasslands in Swiss Alps, Switzerland;leaf chewing;Orthoptera;2016
Pitteloud;Pitteloud2020_B8;46.26852;7.16232;alpine open grasslands, open grasslands in Swiss Alps, Switzerland;leaf chewing;Orthoptera;2016
Pitteloud;Pitteloud2020_C1;46.86999;9.5166;alpine open grasslands, open grasslands in Swiss Alps, Switzerland;leaf chewing;Orthoptera;2016
Pitteloud;Pitteloud2020_C2;46.87022;9.50919;alpine open grasslands, open grasslands in Swiss Alps, Switzerland;leaf chewing;Orthoptera;2016
Pitteloud;Pitteloud2020_C3;46.8743;9.5083;alpine open grasslands, open grasslands in Swiss Alps, Switzerland;leaf chewing;Orthoptera;2016
Pitteloud;Pitteloud2020_C4;46.87021;9.48978;alpine open grasslands, open grasslands in Swiss Alps, Switzerland;leaf chewing;Orthoptera;2016
Pitteloud;Pitteloud2020_C5;46.87841;9.49396;alpine open grasslands, open grasslands in Swiss Alps, Switzerland;leaf chewing;Orthoptera;2016
Pitteloud;Pitteloud2020_C6;46.88867;9.48989;alpine open grasslands, open grasslands in Swiss Alps, Switzerland;leaf chewing;Orthoptera;2016
Pitteloud;Pitteloud2020_C7;46.88946;9.48001;alpine open grasslands, open grasslands in Swiss Alps, Switzerland;leaf chewing;Orthoptera;2016
Pitteloud;Pitteloud2020_C8;46.89054;9.47624;alpine open grasslands, open grasslands in Swiss Alps, Switzerland;leaf chewing;Orthoptera;2016
Pitteloud;Pitteloud2020_F1;46.4732;8.81948;alpine open grasslands, open grasslands in Swiss Alps, Switzerland;leaf chewing;Orthoptera;2016
Pitteloud;Pitteloud2020_F2;46.4887;8.79271;alpine open grasslands, open grasslands in Swiss Alps, Switzerland;leaf chewing;Orthoptera;2016
Pitteloud;Pitteloud2020_F3;46.4951;8.78085;alpine open grasslands, open grasslands in Swiss Alps, Switzerland;leaf chewing;Orthoptera;2016
Pitteloud;Pitteloud2020_F4;46.5028;8.7816;alpine open grasslands, open grasslands in Swiss Alps, Switzerland;leaf chewing;Orthoptera;2016
Pitteloud;Pitteloud2020_F5;46.50577;8.7947;alpine open grasslands, open grasslands in Swiss Alps, Switzerland;leaf chewing;Orthoptera;2016
Pitteloud;Pitteloud2020_F6;46.50891;8.77677;alpine open grasslands, open grasslands in Swiss Alps, Switzerland;leaf chewing;Orthoptera;2016
Pitteloud;Pitteloud2020_F7;46.51244;8.77881;alpine open grasslands, open grasslands in Swiss Alps, Switzerland;leaf chewing;Orthoptera;2016
Pitteloud;Pitteloud2020_F8;46.51553;8.78595;alpine open grasslands, open grasslands in Swiss Alps, Switzerland;leaf chewing;Orthoptera;2016
Pitteloud;Pitteloud2020_G1;46.63493;7.90025;alpine open grasslands, open grasslands in Swiss Alps, Switzerland;leaf chewing;Orthoptera;2016
Pitteloud;Pitteloud2020_G2;46.63957;7.9796;alpine open grasslands, open grasslands in Swiss Alps, Switzerland;leaf chewing;Orthoptera;2016
Pitteloud;Pitteloud2020_G3;46.64264;7.97271;alpine open grasslands, open grasslands in Swiss Alps, Switzerland;leaf chewing;Orthoptera;2016
Pitteloud;Pitteloud2020_G4;46.64737;7.97086;alpine open grasslands, open grasslands in Swiss Alps, Switzerland;leaf chewing;Orthoptera;2016
Pitteloud;Pitteloud2020_G5;46.64949;7.9509;alpine open grasslands, open grasslands in Swiss Alps, Switzerland;leaf chewing;Orthoptera;2016
Pitteloud;Pitteloud2020_G6;46.65319;7.94267;alpine open grasslands, open grasslands in Swiss Alps, Switzerland;leaf chewing;Orthoptera;2016
Pitteloud;Pitteloud2020_G7;46.65756;7.99056;alpine open grasslands, open grasslands in Swiss Alps, Switzerland;leaf chewing;Orthoptera;2016
Pitteloud;Pitteloud2020_G8;46.66035;7.9805;alpine open grasslands, open grasslands in Swiss Alps, Switzerland;leaf chewing;Orthoptera;2016
Pitteloud;Pitteloud2020_M1;46.12583;7.08096;alpine open grasslands, open grasslands in Swiss Alps, Switzerland;leaf chewing;Orthoptera;2016
Pitteloud;Pitteloud2020_M2;46.06652;7.14868;alpine open grasslands, open grasslands in Swiss Alps, Switzerland;leaf chewing;Orthoptera;2016
Pitteloud;Pitteloud2020_M3;46.05299;7.15534;alpine open grasslands, open grasslands in Swiss Alps, Switzerland;leaf chewing;Orthoptera;2016
Pitteloud;Pitteloud2020_M4;46.01957;7.1621;alpine open grasslands, open grasslands in Swiss Alps, Switzerland;leaf chewing;Orthoptera;2016
Pitteloud;Pitteloud2020_M5;46.02743;7.17717;alpine open grasslands, open grasslands in Swiss Alps, Switzerland;leaf chewing;Orthoptera;2016
Pitteloud;Pitteloud2020_M6;46.03024;7.17648;alpine open grasslands, open grasslands in Swiss Alps, Switzerland;leaf chewing;Orthoptera;2016
Pitteloud;Pitteloud2020_M7;46.03573;7.17791;alpine open grasslands, open grasslands in Swiss Alps, Switzerland;leaf chewing;Orthoptera;2016
Pitteloud;Pitteloud2020_M8;46.03712;7.18249;alpine open grasslands, open grasslands in Swiss Alps, Switzerland;leaf chewing;Orthoptera;2016
Pitteloud;Pitteloud2020_S1;46.31328;7.58466;alpine open grasslands, open grasslands in Swiss Alps, Switzerland;leaf chewing;Orthoptera;2016
Pitteloud;Pitteloud2020_S2;46.32012;7.57929;alpine open grasslands, open grasslands in Swiss Alps, Switzerland;leaf chewing;Orthoptera;2016
Pitteloud;Pitteloud2020_S3;46.3251;7.5532;alpine open grasslands, open grasslands in Swiss Alps, Switzerland;leaf chewing;Orthoptera;2016
Pitteloud;Pitteloud2020_S4;46.33103;7.56003;alpine open grasslands, open grasslands in Swiss Alps, Switzerland;leaf chewing;Orthoptera;2016
Pitteloud;Pitteloud2020_S5;46.3353;7.53348;alpine open grasslands, open grasslands in Swiss Alps, Switzerland;leaf chewing;Orthoptera;2016
Pitteloud;Pitteloud2020_S6;46.34787;7.5386;alpine open grasslands, open grasslands in Swiss Alps, Switzerland;leaf chewing;Orthoptera;2016
Pitteloud;Pitteloud2020_S7;46.34687;7.52317;alpine open grasslands, open grasslands in Swiss Alps, Switzerland;leaf chewing;Orthoptera;2016
Pitteloud;Pitteloud2020_S8;46.35071;7.52571;alpine open grasslands, open grasslands in Swiss Alps, Switzerland;leaf chewing;Orthoptera;2016
pocock;pocock2012_aphid;51.31;-2.32;mixed farm: 50% arable, 50% grass pasture or ley, Norwood Farm, Somerset, UK;mixed;NoOrthoptera;2008
prado;prado2004;-19;-43;high plant diversity and endemism, Campos rupestres, Brazil;flower eater;NoOrthoptera;1996
Saavedra;Saavedra2017_initial_A;19.575694;-105.035162;tropical dry forest succession, Chamela<6C>Cuixmala Biosphere Reserve, forest succession - recently abandoned pastures, early successional forests abandoned and intermediate successional forests abandoned, Mexico;leaf chewing;NoOrthoptera;2010
Saavedra;Saavedra2017_initial_B;19.572095;-105.054989;tropical dry forest succession, Chamela<6C>Cuixmala Biosphere Reserve, forest succession - recently abandoned pastures, early successional forests abandoned and intermediate successional forests abandoned, Mexico;leaf chewing;NoOrthoptera;2010
Saavedra;Saavedra2017_initial_C;19.416939;-104.895751;tropical dry forest succession, Chamela<6C>Cuixmala Biosphere Reserve, forest succession - recently abandoned pastures, early successional forests abandoned and intermediate successional forests abandoned, Mexico;leaf chewing;NoOrthoptera;2010
Saavedra;Saavedra2017_middle_A;19.610383;-105.027566;tropical dry forest succession, Chamela<6C>Cuixmala Biosphere Reserve, forest succession - recently abandoned pastures, early successional forests abandoned and intermediate successional forests abandoned, Mexico;leaf chewing;NoOrthoptera;2010
Saavedra;Saavedra2017_middle_B;19.578039;-105.040484;tropical dry forest succession, Chamela<6C>Cuixmala Biosphere Reserve, forest succession - recently abandoned pastures, early successional forests abandoned and intermediate successional forests abandoned, Mexico;leaf chewing;NoOrthoptera;2010
Saavedra;Saavedra2017_middle_C;19.509043;-104.9256;tropical dry forest succession, Chamela<6C>Cuixmala Biosphere Reserve, forest succession - recently abandoned pastures, early successional forests abandoned and intermediate successional forests abandoned, Mexico;leaf chewing;NoOrthoptera;2010
Saavedra;Saavedra2017_late_A;19.603551;-105.090781;tropical dry forest succession, Chamela<6C>Cuixmala Biosphere Reserve, forest succession - recently abandoned pastures, early successional forests abandoned and intermediate successional forests abandoned, Mexico;leaf chewing;NoOrthoptera;2010
Saavedra;Saavedra2017_late_B;19.596921;-105.033532;tropical dry forest succession, Chamela<6C>Cuixmala Biosphere Reserve, forest succession - recently abandoned pastures, early successional forests abandoned and intermediate successional forests abandoned, Mexico;leaf chewing;NoOrthoptera;2010
Saavedra;Saavedra2017_late_C;19.498545;-104.931694;tropical dry forest succession, Chamela<6C>Cuixmala Biosphere Reserve, forest succession - recently abandoned pastures, early successional forests abandoned and intermediate successional forests abandoned, Mexico;leaf chewing;NoOrthoptera;2010
Sanjaya2016;Sanjaya2016;-6.862444;107.595211;Botanical Garden UPI, Bandung, Indonesia;fruit eater;NoOrthoptera;2012
Santos;Santos2006;-29.8;-51.77; Taquari, Rio Grande do Sul, Bazil;leaf miner;NoOrthoptera;2004
SantosdeAraujo2019;SantosdeAraujo2019;48.318611;18.081944;Nitra City Park, Nitra, Slovakia;gall maker;NoOrthoptera;2008
seifert;seifert2020_A;42.71;141.6;temperate lowland forest, Tomakomai, Hokkaido, Japan;leaf chewing;NoOrthoptera;2015
seifert;seifert2020_B;38.9;-78.41;temperate lowland forest, Toms Brook, Virginia, USA;leaf chewing;NoOrthoptera;2017
seifert;seifert2020_C;48.7;16.95;temperate lowland forest, Lanzhot , Czech Republic;leaf chewing;NoOrthoptera;2015
shimada;shimada2020;35.039324;135.187705;paddy fields and grasslands, agricultural landscape, Japan;sap sucker;NoOrthoptera;2019
Shinohara;Shinohara2019_Abandoned_1;35.495752;135.893636;seminatural grasslands, agricultural landscape, Wakasa town, Fukui Prefecture in central Japan, Japan;mixed;NoOrthoptera;2016
Shinohara;Shinohara2019_Abandoned_2;35.507486;135.909609;seminatural grasslands, agricultural landscape, Wakasa town, Fukui Prefecture in central Japan, Japan;mixed;NoOrthoptera;2016
Shinohara;Shinohara2019_Abandoned_3;35.489386;135.897067;seminatural grasslands, agricultural landscape, Wakasa town, Fukui Prefecture in central Japan, Japan;mixed;NoOrthoptera;2016
Shinohara;Shinohara2019_Abandoned_4;35.565453;135.908289;seminatural grasslands, agricultural landscape, Wakasa town, Fukui Prefecture in central Japan, Japan;mixed;NoOrthoptera;2016
Shinohara;Shinohara2019_Extensively_managed_1;35.498043;135.895563;seminatural grasslands, agricultural landscape, Wakasa town, Fukui Prefecture in central Japan, Japan;mixed;NoOrthoptera;2016
Shinohara;Shinohara2019_Extensively_managed_2;35.50826;135.908896;seminatural grasslands, agricultural landscape, Wakasa town, Fukui Prefecture in central Japan, Japan;mixed;NoOrthoptera;2016
Shinohara;Shinohara2019_Extensively_managed_3;35.489949;135.89785;seminatural grasslands, agricultural landscape, Wakasa town, Fukui Prefecture in central Japan, Japan;mixed;NoOrthoptera;2016
Shinohara;Shinohara2019_Extensively_managed_4;35.561526;135.905618;seminatural grasslands, agricultural landscape, Wakasa town, Fukui Prefecture in central Japan, Japan;mixed;NoOrthoptera;2016
Shinohara;Shinohara2019_Intensively_managed_1;35.498448;135.898817;seminatural grasslands, agricultural landscape, Wakasa town, Fukui Prefecture in central Japan, Japan;mixed;NoOrthoptera;2016
Shinohara;Shinohara2019_Intensively_managed_2;35.509203;135.907169;seminatural grasslands, agricultural landscape, Wakasa town, Fukui Prefecture in central Japan, Japan;mixed;NoOrthoptera;2016
Shinohara;Shinohara2019_Intensively_managed_3;35.489111;135.89987;seminatural grasslands, agricultural landscape, Wakasa town, Fukui Prefecture in central Japan, Japan;mixed;NoOrthoptera;2016
Shinohara;Shinohara2019_Intensively_managed_4;35.563264;135.907279;seminatural grasslands, agricultural landscape, Wakasa town, Fukui Prefecture in central Japan, Japan;mixed;NoOrthoptera;2016
Silveira2021;Silveira2021_nonpreserved;-16.788637;-43.880394;Neotropical savanna areas in anthropized landscapes, northern Minas Gerais State, Brazil;mixed;mixed;2019
Silveira2021;Silveira2021_preserved;-17.358135;-44.259766;Neotropical savanna areas in preserved landscapes, northern Minas Gerais State, Brazil;mixed;mixed;2019
stary;stary2008;49.51;14.93;managed forests, Techobuz, Czech;sap sucker;NoOrthoptera;2007
sugiura;sugiura2020;26.67;142.98;many endemic organisms: vascular plants and insects, Ogasawara Village, Tokyo metropolitan, Japan;mixed;NoOrthoptera;2008
Sugiura2010;Sugiura2010;27.073072;142.217277;subtropical Ogasawara islands, Japan;mixed;NoOrthoptera;2008
Szpeiner2008;Szpeiner2008;-31.403573;-64.144999;ornemental plants, Cordoba city, Argentina;sap sucker;NoOrthoptera;2002
Tahar2015;Tahar2015;34.834667;5.738862;Biskra province, Algeria;sap sucker;NoOrthoptera;2014
tavakilian;tavakilian1997;5.41;-52.97;swamp forests to mixed forests on well-drained soil, Sinnamary River Basin, French Guiana;deadwood eater;NoOrthoptera;1993
tcharntke;tcharntke;49;8.32; Karlsruhe, southwest Germany;leaf miner;NoOrthoptera;1989
Todorov2014;Todorov2014;42.157088;24.82175;Experimentally Field of Agriculture University, Agro-ecosystems, Plovdiv Region, Bulgaria;sap sucker;NoOrthoptera;2006
ueckert;ueckert;40.17;-103.21;mixed-grass prairie, 27 km north of Akron, Washington county, Colorado;leaf chewing;Orthoptera;1968
Volf2017;Volf2017_Tomakomai;42.716667;141.6;Tomakomai forest, Japan;mixed;NoOrthoptera;2014
Volf2017;Volf2017_Lanzhot;48.8;17.083333;Lanzhot forest, Czech Republic;mixed;NoOrthoptera;2014
Volf2017;Volf2017_Mikulcice;48.683333;16.933333;Mikulcice forest, Czech Republic;mixed;NoOrthoptera;2014
Xi2020;Xi2020;32.763004;102.518629;alpine meadow in the eastern Qinghai-Tibetan Plateau, Hongyuan County, Sichuan Province, China;seed predator;NoOrthoptera;2016
Zhu;Zhu2018;23.45;111.883333;subtropical forest, subtropical forest located within Heishiding Nature Reserve, south China, China;leaf chewing;NoOrthoptera;2014
1 publi web latitude longitude locationtot feedingguild1 feedingguild2 samplingyear
2 adaime adaime2017_A 2.5 -51.5 terra firme forest, floodable areas and savannas, Cal�oene municipality, Amapa State, Brazil fruit eater NoOrthoptera 2013
3 adaime adaime2017_B 3.5 -51.8 terra firme forest, floodable areas and savannas, Oiapoque municipality, Amapa State, Brazil fruit eater NoOrthoptera 2013
4 Aldryhim Aldryhim1985 24 -46 Saudi Arabia, Saudi Arabia sap sucker NoOrthoptera 1995
5 basset basset1996 -7.4 146.73 grasslands and forest patches, dominated by secondary forests, Mt Kaindi, Wau, Papua New Guinea leaf chewing NoOrthoptera 1993
6 bergamini bergamini2016 -17.6 -43.6 grasslands, Brazilian Cerrado savannas, Brazil flower eater NoOrthoptera 2003
7 bluthgen bluthgen2006 4.97 117.8 mature lowland evergreen dipterocarp forest , Danun Valley conservation area, Danum Valley, Sabah, Malaysia leaf chewing NoOrthoptera 2004
8 bodner bodner2009 -3.97 -79.98 pristine montane forest, Reserva biologica San Francisco, Zamora-Chinchipe, Ecuador leaf chewing NoOrthoptera 2006
9 Brehm Brehm2003 -3.97 -79.08 primary disturbed montane rainforest and early successional stages after human disturbances, Estacion Cientifica San Francisco, Zamora Chinchipe, South Ecuador mixed NoOrthoptera 2000
10 Brown Brown2019 14 99.7 lowland seasonal evergreen rainforest, Khao Chong Botanical Garden, Thailand mixed NoOrthoptera 2015
11 coley coley2006 9 -80 moist tropical lowland forest, Barro Colorado Island, Panama leaf chewing NoOrthoptera 1999
12 cuevas-reyes cuevas-reyes 19.49 -104.99 tropical deciduous forest and patches of tropical riparian forest, Chamela-Cuixmala Biosphere reserve, Pacific coast of Jalisco, Mexico gall maker NoOrthoptera 2001
13 dyer&gentry dyer&gentry2002 10.43 -83.98 wetland, La Selva biological station, Costa Rica leaf chewing NoOrthoptera 2002
14 garcia&robledo garcia&robledo2013 10.43 -83.98 aseasonal tropical wet forest, La Sevla Biological Station, Costa Rica leaf chewing NoOrthoptera 2011
15 Hackett2019 Hackett2019_HH 50.72 -1.75 ocean-adjacent peninsula consisting of a mosaic of habitats, Hengistbury Head, UK mixed NoOrthoptera 2013
16 Hackett2019 Hackett2019_TP -46.59 169.43 ocean-adjacent peninsula consisting of a mosaic of habitats, Tautuku Peninsula, New Zealand mixed NoOrthoptera 2015
17 heleno heleno2009 37.78 -25.22 native vegetation was cleared for pastures, replaced by production forest or taken over by weeds, Serra da Tronqueira, Sao Miguel, Azores archipelago, Portugal mixed NoOrthoptera 2006
18 hemidi hemidi2013 34.8 5.73 Biskra, Alg�rie sap sucker NoOrthoptera 2011
19 henneman&memmott henneman&memmott1 22.09 -159.56 isolated from agricultural areas, Alakai Swamp, island of Kauai, Hawaii leaf chewing NoOrthoptera 2000
20 henneman&memmott henneman&memmott2 22.09 -159.56 wetland, Alakai Swamp, island of Kauai, Hawaii leaf chewing NoOrthoptera 2000
21 Ibanez Ibanez2013 45.03 6.4 subalpine grasslands, Vilar d'Ar�ne, central french alps leaf chewing Orthoptera 2011
22 Idechil Idechil2007 7.5 134.5 Republic of Palau, islands of Palau sap sucker NoOrthoptera 2004
23 janzen janzen1980 10.8 -85.7 deciduous forest with moister habitats , Santa Rosa National Park, Costa Rica frugivore_seed predator NoOrthoptera 1980
24 janzen janzen2003 10.83 -85.6 rainforest, Area de conservation Guanacaste, Costa Rica leaf chewing NoOrthoptera 2003
25 joern joern_altuda 30.4 -103.6 mountainous area with arid grassland plant species, Altuda and Marathon, near Alpine, Texas leaf chewing Orthoptera 1975
26 joern joern_marathon 30.4 -103.6 arid grassland, Alpine, Texas, USA leaf chewing Orthoptera 1975
27 Joern1985 Joern1985 41.578544 -101.707547 Arapaho Prairie, upland Sand Hills grassland, Arthur County, Nebraska, USA leaf chewing Orthoptera 1985
28 kim&choi kim&choi2021_A 35.41 127.48 temperate forest, mixed deciduous forest, Sangseonam hornbeam tree forest, South Korea leaf chewing NoOrthoptera 2016
29 kim&choi kim&choi2021_B 35.37 127.57 temperate forest, mixed deciduous forest, Mt Jirisan, banseon oak tree forest, South Korea leaf chewing NoOrthoptera 2016
30 Knuff Knuff2019 47.998955 8.205064 managed forests, southern Black Forest, southwestern Germany gall maker NoOrthoptera 2017
31 kollars kollars2013 48.32 18.1 alluvial soil with high content of clay fraction, Botanical garden of Slovak University of Agriculture, Nitra, Solavakia mixed NoOrthoptera 2012
32 Konig2022 Konig2022 47.592457 12.940233 open grassland sites on calcareous bedrock, National Park Berchtesgaden and Lower Franconia, Bavaria, Germany leaf chewing Orthoptera 2019
33 lewis lewis2002 17.07 -88.69 moist tropical forest, Chiquibul Forest reserve, Cayo district, Belize, Central America leaf miner NoOrthoptera 1998
34 Macfadyen Macfadyen_A1 51.4 -2.69 temperate grassland, 10 organic farms in UK, south west England mixed NoOrthoptera 2006
35 Macfadyen Macfadyen_A10 51 -2.49 temperate grassland, 10 organic farms in UK, south west England mixed NoOrthoptera 2006
36 Macfadyen Macfadyen_A2 51.55 -2.49 temperate grassland, 10 organic farms in UK, south west England mixed NoOrthoptera 2006
37 Macfadyen Macfadyen_A3 51.51 -2.42 temperate grassland, 10 organic farms in UK, south west England mixed NoOrthoptera 2006
38 Macfadyen Macfadyen_A4 51.52 -2.49 temperate grassland, 10 organic farms in UK, south west England mixed NoOrthoptera 2006
39 Macfadyen Macfadyen_A5 51.51 -2.5 temperate grassland, 10 organic farms in UK, south west England mixed NoOrthoptera 2006
40 Macfadyen Macfadyen_A6 51.52 -2.48 temperate grassland, 10 organic farms in UK, south west England mixed NoOrthoptera 2006
41 Macfadyen Macfadyen_A7 51.32 -2.35 temperate grassland, 10 organic farms in UK, south west England mixed NoOrthoptera 2006
42 Macfadyen Macfadyen_A8 51.36 -2.47 temperate grassland, 10 organic farms in UK, south west England mixed NoOrthoptera 2006
43 Macfadyen Macfadyen_A9 51.32 -2.34 temperate grassland, 10 organic farms in UK, south west England mixed NoOrthoptera 2006
44 Macfadyen Macfadyen_B1 5.421563 -2.6801987 temperate grassland, 10 organic farms in UK, south west England mixed NoOrthoptera 2006
45 Macfadyen Macfadyen_B2 51.291623 -2.6028854 pasture fields, 10 organic farms in UK, south west England mixed NoOrthoptera 2006
46 Macfadyen Macfadyen_B3 51.29728 -2.4066504 pasture fields, 10 organic farms in UK, south west England mixed NoOrthoptera 2006
47 Macfadyen Macfadyen_B4 51.126799 -2.3271997 pasture fields, 10 organic farms in UK, south west England mixed NoOrthoptera 2006
48 Macfadyen Macfadyen_B5 51.069394 -2.4322512 pasture fields, 10 organic farms in UK, south west England mixed NoOrthoptera 2006
49 Macfadyen Macfadyen_B6 51.614902 -2.3436984 pasture fields, 10 organic farms in UK, south west England mixed NoOrthoptera 2006
50 Macfadyen Macfadyen_B7 51.694362 -2.1084028 pasture fields, 10 organic farms in UK, south west England mixed NoOrthoptera 2006
51 Macfadyen Macfadyen_B8 51.638776 -2.1769368 pasture fields, 10 organic farms in UK, south west England mixed NoOrthoptera 2006
52 Macfadyen Macfadyen_B9 51.685258 -2.0148078 pasture fields, 10 organic farms in UK, south west England mixed NoOrthoptera 2006
53 Macfadyen Macfadyen_B10 51.142195 -2.2890993 pasture fields, 10 organic farms in UK, south west England mixed NoOrthoptera 2006
54 martins martins2020 -17.6 -43.6 grasslands, Brazilian Cerrado , Brazil flower eater NoOrthoptera 2005
55 Masetti Masetti2004_A 44.16 12.22 near a 5 yr hedgerow, potato, wheat, alfalfa and wine grapes, Centro Ricerche produzioni Vegetali, Bologna Province, Italia leaf miner NoOrthoptera 1999
56 Masetti Masetti2004_B 45 12 near a 4 yr hedgerow, corn, wheat and beets, Bologna Province, Italia, Bologna Province, Italia leaf miner NoOrthoptera 1999
57 Masetti Masetti2004_C 45 12 near a 6 yr hederow and a ditch, Bologna Province, Italia, Bologna Province, Italia leaf miner NoOrthoptera 1999
58 massa massa2001_A 37.6 14 Sicily, Italy leaf miner NoOrthoptera 2000
59 massa massa2001_B 30 35 Al Bahhath, Aqaba and Dana Village, Jordan leaf miner NoOrthoptera 1999
60 memmott memmott1994 10.83 -85.7 tropical dry forest, Santa Rosa National Park, Guanacaste, Costa Rica leaf miner NoOrthoptera 1990
61 muller muller1999 51.4 -0.64 heavily grazed field, Silwood Park, Berkshire, England sap sucker NoOrthoptera 1995
62 nakagawa nakagawa_1 4.33 113.83 humult and udult soils, Lambir Hills National Park, Sarawak, Malaysia seed predator NoOrthoptera 1998
63 nakagawa nakagawa_2 4.33 113.83 tropical lowland forest, Lambir Hills National Park, Malaysia seed predator NoOrthoptera 1998
64 novotny1 novotny2005 -5.23 145.4 primary and secondary forests, Madang province, Papua New Guinea fruit eater NoOrthoptera 2001
65 novotny2 novotny2012 -5.23 145.4 tropical lowland forest, Madang province, Papua New Guinea mixed NoOrthoptera 2008
66 pearson&altermatt pearson&altermatt2013 48.54 9.04 Baden W�rttemberg, Germany leaf chewing NoOrthoptera 2005
67 Peralta Peralta2017 -41.49228 173.022973 native Nothofagaceae forest and exotic pine plantations, native and exotic plantation forests, New Zealand leaf chewing NoOrthoptera 2011
68 Pitteloud Pitteloud2020_B1 46.25788 7.02235 alpine open grasslands, open grasslands in Swiss Alps, Switzerland leaf chewing Orthoptera 2016
69 Pitteloud Pitteloud2020_B2 46.26914 7.03241 alpine open grasslands, open grasslands in Swiss Alps, Switzerland leaf chewing Orthoptera 2016
70 Pitteloud Pitteloud2020_B3 46.27424 7.04833 alpine open grasslands, open grasslands in Swiss Alps, Switzerland leaf chewing Orthoptera 2016
71 Pitteloud Pitteloud2020_B4 46.28629 7.0957 alpine open grasslands, open grasslands in Swiss Alps, Switzerland leaf chewing Orthoptera 2016
72 Pitteloud Pitteloud2020_B5.1 46.28567 7.12451 alpine open grasslands, open grasslands in Swiss Alps, Switzerland leaf chewing Orthoptera 2016
73 Pitteloud Pitteloud2020_B5.2 46.29149 7.11251 alpine open grasslands, open grasslands in Swiss Alps, Switzerland leaf chewing Orthoptera 2016
74 Pitteloud Pitteloud2020_B6 46.26865 7.10755 alpine open grasslands, open grasslands in Swiss Alps, Switzerland leaf chewing Orthoptera 2016
75 Pitteloud Pitteloud2020_B7 46.27683 7.15518 alpine open grasslands, open grasslands in Swiss Alps, Switzerland leaf chewing Orthoptera 2016
76 Pitteloud Pitteloud2020_B8 46.26852 7.16232 alpine open grasslands, open grasslands in Swiss Alps, Switzerland leaf chewing Orthoptera 2016
77 Pitteloud Pitteloud2020_C1 46.86999 9.5166 alpine open grasslands, open grasslands in Swiss Alps, Switzerland leaf chewing Orthoptera 2016
78 Pitteloud Pitteloud2020_C2 46.87022 9.50919 alpine open grasslands, open grasslands in Swiss Alps, Switzerland leaf chewing Orthoptera 2016
79 Pitteloud Pitteloud2020_C3 46.8743 9.5083 alpine open grasslands, open grasslands in Swiss Alps, Switzerland leaf chewing Orthoptera 2016
80 Pitteloud Pitteloud2020_C4 46.87021 9.48978 alpine open grasslands, open grasslands in Swiss Alps, Switzerland leaf chewing Orthoptera 2016
81 Pitteloud Pitteloud2020_C5 46.87841 9.49396 alpine open grasslands, open grasslands in Swiss Alps, Switzerland leaf chewing Orthoptera 2016
82 Pitteloud Pitteloud2020_C6 46.88867 9.48989 alpine open grasslands, open grasslands in Swiss Alps, Switzerland leaf chewing Orthoptera 2016
83 Pitteloud Pitteloud2020_C7 46.88946 9.48001 alpine open grasslands, open grasslands in Swiss Alps, Switzerland leaf chewing Orthoptera 2016
84 Pitteloud Pitteloud2020_C8 46.89054 9.47624 alpine open grasslands, open grasslands in Swiss Alps, Switzerland leaf chewing Orthoptera 2016
85 Pitteloud Pitteloud2020_F1 46.4732 8.81948 alpine open grasslands, open grasslands in Swiss Alps, Switzerland leaf chewing Orthoptera 2016
86 Pitteloud Pitteloud2020_F2 46.4887 8.79271 alpine open grasslands, open grasslands in Swiss Alps, Switzerland leaf chewing Orthoptera 2016
87 Pitteloud Pitteloud2020_F3 46.4951 8.78085 alpine open grasslands, open grasslands in Swiss Alps, Switzerland leaf chewing Orthoptera 2016
88 Pitteloud Pitteloud2020_F4 46.5028 8.7816 alpine open grasslands, open grasslands in Swiss Alps, Switzerland leaf chewing Orthoptera 2016
89 Pitteloud Pitteloud2020_F5 46.50577 8.7947 alpine open grasslands, open grasslands in Swiss Alps, Switzerland leaf chewing Orthoptera 2016
90 Pitteloud Pitteloud2020_F6 46.50891 8.77677 alpine open grasslands, open grasslands in Swiss Alps, Switzerland leaf chewing Orthoptera 2016
91 Pitteloud Pitteloud2020_F7 46.51244 8.77881 alpine open grasslands, open grasslands in Swiss Alps, Switzerland leaf chewing Orthoptera 2016
92 Pitteloud Pitteloud2020_F8 46.51553 8.78595 alpine open grasslands, open grasslands in Swiss Alps, Switzerland leaf chewing Orthoptera 2016
93 Pitteloud Pitteloud2020_G1 46.63493 7.90025 alpine open grasslands, open grasslands in Swiss Alps, Switzerland leaf chewing Orthoptera 2016
94 Pitteloud Pitteloud2020_G2 46.63957 7.9796 alpine open grasslands, open grasslands in Swiss Alps, Switzerland leaf chewing Orthoptera 2016
95 Pitteloud Pitteloud2020_G3 46.64264 7.97271 alpine open grasslands, open grasslands in Swiss Alps, Switzerland leaf chewing Orthoptera 2016
96 Pitteloud Pitteloud2020_G4 46.64737 7.97086 alpine open grasslands, open grasslands in Swiss Alps, Switzerland leaf chewing Orthoptera 2016
97 Pitteloud Pitteloud2020_G5 46.64949 7.9509 alpine open grasslands, open grasslands in Swiss Alps, Switzerland leaf chewing Orthoptera 2016
98 Pitteloud Pitteloud2020_G6 46.65319 7.94267 alpine open grasslands, open grasslands in Swiss Alps, Switzerland leaf chewing Orthoptera 2016
99 Pitteloud Pitteloud2020_G7 46.65756 7.99056 alpine open grasslands, open grasslands in Swiss Alps, Switzerland leaf chewing Orthoptera 2016
100 Pitteloud Pitteloud2020_G8 46.66035 7.9805 alpine open grasslands, open grasslands in Swiss Alps, Switzerland leaf chewing Orthoptera 2016
101 Pitteloud Pitteloud2020_M1 46.12583 7.08096 alpine open grasslands, open grasslands in Swiss Alps, Switzerland leaf chewing Orthoptera 2016
102 Pitteloud Pitteloud2020_M2 46.06652 7.14868 alpine open grasslands, open grasslands in Swiss Alps, Switzerland leaf chewing Orthoptera 2016
103 Pitteloud Pitteloud2020_M3 46.05299 7.15534 alpine open grasslands, open grasslands in Swiss Alps, Switzerland leaf chewing Orthoptera 2016
104 Pitteloud Pitteloud2020_M4 46.01957 7.1621 alpine open grasslands, open grasslands in Swiss Alps, Switzerland leaf chewing Orthoptera 2016
105 Pitteloud Pitteloud2020_M5 46.02743 7.17717 alpine open grasslands, open grasslands in Swiss Alps, Switzerland leaf chewing Orthoptera 2016
106 Pitteloud Pitteloud2020_M6 46.03024 7.17648 alpine open grasslands, open grasslands in Swiss Alps, Switzerland leaf chewing Orthoptera 2016
107 Pitteloud Pitteloud2020_M7 46.03573 7.17791 alpine open grasslands, open grasslands in Swiss Alps, Switzerland leaf chewing Orthoptera 2016
108 Pitteloud Pitteloud2020_M8 46.03712 7.18249 alpine open grasslands, open grasslands in Swiss Alps, Switzerland leaf chewing Orthoptera 2016
109 Pitteloud Pitteloud2020_S1 46.31328 7.58466 alpine open grasslands, open grasslands in Swiss Alps, Switzerland leaf chewing Orthoptera 2016
110 Pitteloud Pitteloud2020_S2 46.32012 7.57929 alpine open grasslands, open grasslands in Swiss Alps, Switzerland leaf chewing Orthoptera 2016
111 Pitteloud Pitteloud2020_S3 46.3251 7.5532 alpine open grasslands, open grasslands in Swiss Alps, Switzerland leaf chewing Orthoptera 2016
112 Pitteloud Pitteloud2020_S4 46.33103 7.56003 alpine open grasslands, open grasslands in Swiss Alps, Switzerland leaf chewing Orthoptera 2016
113 Pitteloud Pitteloud2020_S5 46.3353 7.53348 alpine open grasslands, open grasslands in Swiss Alps, Switzerland leaf chewing Orthoptera 2016
114 Pitteloud Pitteloud2020_S6 46.34787 7.5386 alpine open grasslands, open grasslands in Swiss Alps, Switzerland leaf chewing Orthoptera 2016
115 Pitteloud Pitteloud2020_S7 46.34687 7.52317 alpine open grasslands, open grasslands in Swiss Alps, Switzerland leaf chewing Orthoptera 2016
116 Pitteloud Pitteloud2020_S8 46.35071 7.52571 alpine open grasslands, open grasslands in Swiss Alps, Switzerland leaf chewing Orthoptera 2016
117 pocock pocock2012_aphid 51.31 -2.32 mixed farm: 50% arable, 50% grass pasture or ley, Norwood Farm, Somerset, UK mixed NoOrthoptera 2008
118 prado prado2004 -19 -43 high plant diversity and endemism, Campos rupestres, Brazil flower eater NoOrthoptera 1996
119 Saavedra Saavedra2017_initial_A 19.575694 -105.035162 tropical dry forest succession, Chamela�Cuixmala Biosphere Reserve, forest succession - recently abandoned pastures, early successional forests abandoned and intermediate successional forests abandoned, Mexico leaf chewing NoOrthoptera 2010
120 Saavedra Saavedra2017_initial_B 19.572095 -105.054989 tropical dry forest succession, Chamela�Cuixmala Biosphere Reserve, forest succession - recently abandoned pastures, early successional forests abandoned and intermediate successional forests abandoned, Mexico leaf chewing NoOrthoptera 2010
121 Saavedra Saavedra2017_initial_C 19.416939 -104.895751 tropical dry forest succession, Chamela�Cuixmala Biosphere Reserve, forest succession - recently abandoned pastures, early successional forests abandoned and intermediate successional forests abandoned, Mexico leaf chewing NoOrthoptera 2010
122 Saavedra Saavedra2017_middle_A 19.610383 -105.027566 tropical dry forest succession, Chamela�Cuixmala Biosphere Reserve, forest succession - recently abandoned pastures, early successional forests abandoned and intermediate successional forests abandoned, Mexico leaf chewing NoOrthoptera 2010
123 Saavedra Saavedra2017_middle_B 19.578039 -105.040484 tropical dry forest succession, Chamela�Cuixmala Biosphere Reserve, forest succession - recently abandoned pastures, early successional forests abandoned and intermediate successional forests abandoned, Mexico leaf chewing NoOrthoptera 2010
124 Saavedra Saavedra2017_middle_C 19.509043 -104.9256 tropical dry forest succession, Chamela�Cuixmala Biosphere Reserve, forest succession - recently abandoned pastures, early successional forests abandoned and intermediate successional forests abandoned, Mexico leaf chewing NoOrthoptera 2010
125 Saavedra Saavedra2017_late_A 19.603551 -105.090781 tropical dry forest succession, Chamela�Cuixmala Biosphere Reserve, forest succession - recently abandoned pastures, early successional forests abandoned and intermediate successional forests abandoned, Mexico leaf chewing NoOrthoptera 2010
126 Saavedra Saavedra2017_late_B 19.596921 -105.033532 tropical dry forest succession, Chamela�Cuixmala Biosphere Reserve, forest succession - recently abandoned pastures, early successional forests abandoned and intermediate successional forests abandoned, Mexico leaf chewing NoOrthoptera 2010
127 Saavedra Saavedra2017_late_C 19.498545 -104.931694 tropical dry forest succession, Chamela�Cuixmala Biosphere Reserve, forest succession - recently abandoned pastures, early successional forests abandoned and intermediate successional forests abandoned, Mexico leaf chewing NoOrthoptera 2010
128 Sanjaya2016 Sanjaya2016 -6.862444 107.595211 Botanical Garden UPI, Bandung, Indonesia fruit eater NoOrthoptera 2012
129 Santos Santos2006 -29.8 -51.77 Taquari, Rio Grande do Sul, Bazil leaf miner NoOrthoptera 2004
130 SantosdeAraujo2019 SantosdeAraujo2019 48.318611 18.081944 Nitra City Park, Nitra, Slovakia gall maker NoOrthoptera 2008
131 seifert seifert2020_A 42.71 141.6 temperate lowland forest, Tomakomai, Hokkaido, Japan leaf chewing NoOrthoptera 2015
132 seifert seifert2020_B 38.9 -78.41 temperate lowland forest, Toms Brook, Virginia, USA leaf chewing NoOrthoptera 2017
133 seifert seifert2020_C 48.7 16.95 temperate lowland forest, Lanzhot , Czech Republic leaf chewing NoOrthoptera 2015
134 shimada shimada2020 35.039324 135.187705 paddy fields and grasslands, agricultural landscape, Japan sap sucker NoOrthoptera 2019
135 Shinohara Shinohara2019_Abandoned_1 35.495752 135.893636 seminatural grasslands, agricultural landscape, Wakasa town, Fukui Prefecture in central Japan, Japan mixed NoOrthoptera 2016
136 Shinohara Shinohara2019_Abandoned_2 35.507486 135.909609 seminatural grasslands, agricultural landscape, Wakasa town, Fukui Prefecture in central Japan, Japan mixed NoOrthoptera 2016
137 Shinohara Shinohara2019_Abandoned_3 35.489386 135.897067 seminatural grasslands, agricultural landscape, Wakasa town, Fukui Prefecture in central Japan, Japan mixed NoOrthoptera 2016
138 Shinohara Shinohara2019_Abandoned_4 35.565453 135.908289 seminatural grasslands, agricultural landscape, Wakasa town, Fukui Prefecture in central Japan, Japan mixed NoOrthoptera 2016
139 Shinohara Shinohara2019_Extensively_managed_1 35.498043 135.895563 seminatural grasslands, agricultural landscape, Wakasa town, Fukui Prefecture in central Japan, Japan mixed NoOrthoptera 2016
140 Shinohara Shinohara2019_Extensively_managed_2 35.50826 135.908896 seminatural grasslands, agricultural landscape, Wakasa town, Fukui Prefecture in central Japan, Japan mixed NoOrthoptera 2016
141 Shinohara Shinohara2019_Extensively_managed_3 35.489949 135.89785 seminatural grasslands, agricultural landscape, Wakasa town, Fukui Prefecture in central Japan, Japan mixed NoOrthoptera 2016
142 Shinohara Shinohara2019_Extensively_managed_4 35.561526 135.905618 seminatural grasslands, agricultural landscape, Wakasa town, Fukui Prefecture in central Japan, Japan mixed NoOrthoptera 2016
143 Shinohara Shinohara2019_Intensively_managed_1 35.498448 135.898817 seminatural grasslands, agricultural landscape, Wakasa town, Fukui Prefecture in central Japan, Japan mixed NoOrthoptera 2016
144 Shinohara Shinohara2019_Intensively_managed_2 35.509203 135.907169 seminatural grasslands, agricultural landscape, Wakasa town, Fukui Prefecture in central Japan, Japan mixed NoOrthoptera 2016
145 Shinohara Shinohara2019_Intensively_managed_3 35.489111 135.89987 seminatural grasslands, agricultural landscape, Wakasa town, Fukui Prefecture in central Japan, Japan mixed NoOrthoptera 2016
146 Shinohara Shinohara2019_Intensively_managed_4 35.563264 135.907279 seminatural grasslands, agricultural landscape, Wakasa town, Fukui Prefecture in central Japan, Japan mixed NoOrthoptera 2016
147 Silveira2021 Silveira2021_nonpreserved -16.788637 -43.880394 Neotropical savanna areas in anthropized landscapes, northern Minas Gerais State, Brazil mixed mixed 2019
148 Silveira2021 Silveira2021_preserved -17.358135 -44.259766 Neotropical savanna areas in preserved landscapes, northern Minas Gerais State, Brazil mixed mixed 2019
149 stary stary2008 49.51 14.93 managed forests, Techobuz, Czech sap sucker NoOrthoptera 2007
150 sugiura sugiura2020 26.67 142.98 many endemic organisms: vascular plants and insects, Ogasawara Village, Tokyo metropolitan, Japan mixed NoOrthoptera 2008
151 Sugiura2010 Sugiura2010 27.073072 142.217277 subtropical Ogasawara islands, Japan mixed NoOrthoptera 2008
152 Szpeiner2008 Szpeiner2008 -31.403573 -64.144999 ornemental plants, Cordoba city, Argentina sap sucker NoOrthoptera 2002
153 Tahar2015 Tahar2015 34.834667 5.738862 Biskra province, Algeria sap sucker NoOrthoptera 2014
154 tavakilian tavakilian1997 5.41 -52.97 swamp forests to mixed forests on well-drained soil, Sinnamary River Basin, French Guiana deadwood eater NoOrthoptera 1993
155 tcharntke tcharntke 49 8.32 Karlsruhe, southwest Germany leaf miner NoOrthoptera 1989
156 Todorov2014 Todorov2014 42.157088 24.82175 Experimentally Field of Agriculture University, Agro-ecosystems, Plovdiv Region, Bulgaria sap sucker NoOrthoptera 2006
157 ueckert ueckert 40.17 -103.21 mixed-grass prairie, 27 km north of Akron, Washington county, Colorado leaf chewing Orthoptera 1968
158 Volf2017 Volf2017_Tomakomai 42.716667 141.6 Tomakomai forest, Japan mixed NoOrthoptera 2014
159 Volf2017 Volf2017_Lanzhot 48.8 17.083333 Lanzhot forest, Czech Republic mixed NoOrthoptera 2014
160 Volf2017 Volf2017_Mikulcice 48.683333 16.933333 Mikulcice forest, Czech Republic mixed NoOrthoptera 2014
161 Xi2020 Xi2020 32.763004 102.518629 alpine meadow in the eastern Qinghai-Tibetan Plateau, Hongyuan County, Sichuan Province, China seed predator NoOrthoptera 2016
162 Zhu Zhu2018 23.45 111.883333 subtropical forest, subtropical forest located within Heishiding Nature Reserve, south China, China leaf chewing NoOrthoptera 2014

Binary file not shown.

View file

@ -0,0 +1,115 @@
require("ggplot2")
require("tictoc")
require("tidyverse")
devtools::load_all("R/")
# Importation of data
interaction_data <- read.table(file = "real_data/data/interaction-data.txt", sep = "\t", header = TRUE)
seq_ids_network_aggreg <- unique(interaction_data$id_network_aggreg)
names_aggreg_networks <- sapply(
seq_ids_network_aggreg,
function(id) {
paste0(
unique(interaction_data[which(interaction_data$id_network_aggreg == id), ]$web),
collapse = "+"
)
}
)
# Full data
if (!file.exists("real_data/data/dore-matrices.Rds")) {
# Computation of incidence matrices
incidence_matrices <- lapply(
seq_ids_network_aggreg,
function(m) {
current_interaction_data <- interaction_data[which(interaction_data$id_network_aggreg == m), ] %>%
mutate(
plantaggreg = paste(plantorder,
plantfamily, plantgenus, plantspecies,
sep = " "
),
insectaggreg = paste(insectorder,
insectfamily, insectgenus, insectspecies,
sep = " "
)
)
current_interaction_data <- table(current_interaction_data$insectaggreg, current_interaction_data$plantaggreg)
current_incidence_matrix <- matrix(current_interaction_data,
ncol = ncol(current_interaction_data), dimnames = dimnames(current_interaction_data)
)
current_incidence_matrix[which(current_incidence_matrix > 0)] <- 1
return(current_incidence_matrix)
}
)
names(incidence_matrices) <- names_aggreg_networks
saveRDS(incidence_matrices, file = "real_data/data/dore-matrices.Rds")
} else {
incidence_matrices <- readRDS(file = "real_data/data/dore-matrices.Rds")
}
# Emre completed Data
completed_networks_ids <- as.numeric(names(readRDS("real_data/data/Data.rds")))
completed_networks_names <- sapply(
completed_networks_ids,
function(id) {
paste0(
unique(interaction_data[which(interaction_data$id_network_aggreg == id), ]$web),
collapse = "+"
)
}
)
uncompleted <- incidence_matrices[match(completed_networks_names, names(incidence_matrices))]
point_2_completed <- setNames(readRDS("real_data/data/completed0.2.rds"), completed_networks_names)
point_5_completed <- setNames(readRDS("real_data/data/completed0.5.rds"), completed_networks_names)
random_completed <- setNames(readRDS("real_data/data/completedrandom.rds"), completed_networks_names)
if (!exists("arg")) {
arg <- commandArgs(trailingOnly = TRUE)
}
if (identical(arg, character(0))) {
number_of_net <- length(uncompleted)
model <- "iid"
nb_run <- 1
source_data <- eval(as.name("uncompleted"))
data_name <- "uncompleted"
} else {
number_of_net <- as.numeric(arg[1])
model <- arg[2]
nb_run <- as.numeric(arg[3])
source_data <- eval(as.name(arg[4]))
data_name <- arg[4]
}
print(number_of_net)
print(model)
print(nb_run)
print(data_name)
tic()
list_collection <- clusterize_bipartite_networks(
netlist = source_data[1:number_of_net],
net_id = names(source_data)[1:number_of_net],
colsbm_model = model,
nb_run = nb_run,
global_opts = list(
nb_cores = parallel::detectCores() - 1, verbosity = 4,
plot_details = 0
),
silent_parallelization = FALSE
)
toc()
saveRDS(list_collection, file = paste0(
"real_data/data/",
"dore_",
data_name,
"_collection_clustering_nb_run_", nb_run, "_", model, "_",
number_of_net, "_networks_",
format(Sys.time(), "%d-%m-%y-%X"),
".Rds"
))

View file

@ -0,0 +1,90 @@
require("ggplot2")
require("tictoc")
require("tidyverse")
devtools::load_all("R/")
# Importation of data
if (!file.exists("real_data/data/dore-matrices.Rds")) {
interaction_data <- read.table(file = "real_data/data/interaction-data.txt", sep = "\t", header = TRUE)
seq_ids_network_aggreg <- unique(interaction_data$id_network_aggreg)
names_aggreg_networks <- sapply(
seq_ids_network_aggreg,
function(id) {
paste0(
unique(interaction_data[which(interaction_data$id_network_aggreg == id), ]$web),
collapse = "+"
)
}
)
# Computation of incidence matrices
incidence_matrices <- lapply(
seq_ids_network_aggreg,
function(m) {
current_interaction_data <- interaction_data[which(interaction_data$id_network_aggreg == m), ] %>%
mutate(
plantaggreg = paste(plantorder,
plantfamily, plantgenus, plantspecies,
sep = "-"
),
insectaggreg = paste(insectorder,
insectfamily, insectgenus, insectspecies,
sep = "-"
)
)
current_interaction_data <- table(current_interaction_data$plantaggreg, current_interaction_data$insectaggreg)
current_incidence_matrix <- matrix(current_interaction_data,
ncol = ncol(current_interaction_data), dimnames = dimnames(current_interaction_data)
)
current_incidence_matrix[which(current_incidence_matrix > 0)] <- 1
return(current_incidence_matrix)
}
)
names(incidence_matrices) <- names_aggreg_networks
saveRDS(incidence_matrices, file = "real_data/data/dore-matrices.Rds")
} else {
incidence_matrices <- readRDS(file = "real_data/data/dore-matrices.Rds")
}
if (!exists("arg")) {
arg <- commandArgs(trailingOnly = TRUE)
}
if (identical(arg, character(0))) {
number_of_net <- length(incidence_matrices)
model <- "pirho"
nb_run <- 3
} else {
number_of_net <- as.numeric(arg[1])
model <- arg[2]
nb_run <- as.numeric(arg[3])
}
print(number_of_net)
print(model)
print(nb_run)
tic()
list_collection <- clusterize_bipartite_networks(
netlist = incidence_matrices[1:number_of_net],
net_id = names(incidence_matrices)[1:number_of_net],
colsbm_model = model,
nb_run = nb_run,
global_opts = list(
nb_cores = parallel::detectCores() - 1, verbosity = 4,
plot_details = 0
),
silent_parallelization = TRUE
)
toc()
saveRDS(list_collection, file = paste0(
"real_data/data/",
"dore_collection_clustering_nb_run", nb_run,"_",model,"_",
number_of_net, "networks_",
format(Sys.time(), "%d-%m-%y-%X"),
".Rds"
))

View file

@ -0,0 +1,76 @@
require("ggplot2")
devtools::load_all("R/")
## Preparing data for analysis
interaction_data <- read.table(file = "real_data/data/interaction-data.txt", sep = "\t", header = TRUE)
insect_orders <- unique(interaction_data$insectorder)
plant_family <- unique(interaction_data$plantfamily)
# All results
iid_clustering <- readRDS("real_data/data/dore_collection_clustering_nb_run1_iid_123networks_24-05-23-21:40:42.Rds")
iid_best_partition <- extract_best_bipartite_partition(iid_clustering)
iid_unlist <- unlist(iid_best_partition)
rho_clustering <- readRDS("real_data/data/dore_collection_clustering_nb_run1_rho_123networks_25-05-23-13:58:30.Rds")
rho_best_partition <- extract_best_bipartite_partition(rho_clustering)
rho_unlist <- unlist(rho_best_partition)
pi_clustering <- readRDS("real_data/data/dore_collection_clustering_nb_run1_pi_123networks_25-05-23-17:31:25.Rds")
pi_best_partition <- extract_best_bipartite_partition(pi_clustering)
pi_unlist <- unlist(pi_best_partition)
pirho_clustering <- readRDS("real_data/data/dore_collection_clustering_nb_run1_pirho_123networks_26-05-23-19:22:55.Rds")
pirho_best_partition <- extract_best_bipartite_partition(pirho_clustering)
pirho_unlist <- unlist(pirho_best_partition)
### Matching taxonomy
taxonomy_in_clusters <- function(unlisted_model) {
lapply(seq_len(length(unlisted_model)), function(col_idx) {
# Per collection
# Empty init
insect_count <- t(sapply(insect_orders, function(order) {
out_count <- rep(0, unlisted_model[[col_idx]]$Q[2])
out_count
}))
plant_count <- t(sapply(plant_family, function(order) {
out_count <- rep(0, unlisted_model[[col_idx]]$Q[1])
out_count
}))
for (m in seq.int(unlisted_model[[col_idx]]$M)) {
#### Insect
insect_names <- names(unlisted_model[[col_idx]]$Z[[1]][[2]])
insect_count <- insect_count + t(sapply(insect_orders, function(order) {
out_count <- rep(0, unlisted_model[[col_idx]]$Q[2])
names(out_count) <- seq.int(unlisted_model[[col_idx]]$Q[2])
insect_count <- table(unlisted_model[[col_idx]]$Z[[m]][[2]][grep(order, insect_names)])
out_count[names(insect_count)] <- insect_count
out_count
}))
#### Plants
plant_names <- names(unlisted_model[[col_idx]]$Z[[1]][[1]])
plant_count <- t(sapply(plant_family, function(order) {
out_count <- rep(0, unlisted_model[[col_idx]]$Q[1])
names(out_count) <- seq.int(unlisted_model[[col_idx]]$Q[1])
plant_count <- table(unlisted_model[[col_idx]]$Z[[m]][[1]][grep(order, plant_names)])
out_count[names(plant_count)] <- plant_count
out_count
}))
}
return(list(insects = insect_count, plants = plant_count))
})
}
taxonomy_remove_empty <- function(taxonomy_collections_list) {
lapply(taxonomy_collections_list, function(collection) {
list(
insects = collection$insects[which(rowSums(collection$insects != 0) > 0), ],
plants = collection$plants[which(rowSums(collection$plants != 0) > 0), ]
)
})
}

View file

@ -0,0 +1,526 @@
```{r, setup, include=FALSE, warning=FALSE}
knitr::opts_chunk$set(echo = FALSE)
```
```{r require_lib, echo = FALSE, include=FALSE, warning=FALSE}
require("tidyverse")
require("knitr")
require("colSBM")
source("temporary_plot.R")
```
```{r pretty_matrix_print, echo = FALSE, warning=FALSE}
# Define a generic method that transforms an object x in a LaTeX string
as_latex <- function(x, ...) {
UseMethod("as_latex", x)
}
# Define a class latex for LaTeX expressions
as_latex.character <- function(x) {
structure(
paste(x, collapse = " "),
class = c("latex", "character")
)
}
# A character string of class latex is rendered in display mode
# Define a knit_print() method for the latex class
knit_print.latex <- function(x, ...) {
knitr::asis_output(
paste0("$$", x, "$$")
)
}
# Now, define a method as_latex for matrix
as_latex.matrix <- function(x, ...) {
as_latex(c(
"\\begin{pmatrix}",
paste(
t(x),
rep(c(rep("&", nrow(x) - 1), "\\\\"), ncol(x)),
collapse = ""
),
"\\end{pmatrix}"
))
}
# Indicate to knitr that matrix are rendered as latex
knit_print.matrix <- function(x, ...) {
knitr::knit_print(as_latex(round(x, 2)))
}
```
```{r better_collection_extraction, echo = FALSE, warning=FALSE}
extract_unlist_reorder <- function(clustering_data_path) {
clustering <- readRDS(clustering_data_path)
if (!is.list(clustering)) {
clustering <- list(clustering)
}
best_partition <- extract_best_bipartite_partition(clustering)
best_partition_unlist <- unlist(best_partition)
if (!is.list(best_partition_unlist)) {
best_partition_unlist <- list(best_partition_unlist)
}
size <- length(best_partition_unlist)
return(setNames(lapply(best_partition_unlist, function(collection) {
reorder_parameters(collection)
}), paste(rep("Collection", size), seq_len(size))))
}
meso_print <- function(unlisted_partition) {
for (idx in seq_along(unlisted_partition)) {
cat(paste("\\subsubsection{Pour la collection", idx, "}"))
print(plot(unlisted_partition[[idx]], type = "meso", mixture = TRUE))
cat("\\newline \\tiny")
print(knitr::kable(unlisted_partition[[idx]]$net_id,
col.names = "Networks",
format = "latex",
position = "!h",
booktabs = TRUE
))
cat("\\normalsize\\newline")
cat(knitr::knit_print(unlisted_partition[[idx]]$alpha))
}
}
alpha_print <- function(unlisted_partition) {
for (idx in seq_along(unlisted_partition)) {
cat(paste("\nPour la collection ", idx, ":\n"))
cat(knitr::knit_print(unlisted_partition[[idx]]$alpha))
}
}
```
```{r taxonomy_functions, echo = FALSE, warning=FALSE}
interaction_data <- read.table(file = "data/interaction-data.txt", sep = "\t", header = TRUE)
insect_orders <- unique(interaction_data$insectorder)
plant_family <- unique(interaction_data$plantorder)
insect_orders[is.na(insect_orders)] <- "NA"
plant_family[is.na(plant_family)] <- "NA"
### Matching taxonomy
taxonomy_in_clusters <- function(unlisted_model) {
if (is.list(unlisted_model)) {
lapply(seq_len(length(unlisted_model)), function(col_idx) {
# Per collection
# Empty init
insect_count <- t(sapply(insect_orders, function(order) {
out_count <- rep(0, unlisted_model[[col_idx]]$Q[2])
out_count
}))
plant_count <- t(sapply(plant_family, function(order) {
out_count <- rep(0, unlisted_model[[col_idx]]$Q[1])
out_count
}))
for (m in seq.int(unlisted_model[[col_idx]]$M)) {
#### Insect
insect_names <- names(unlisted_model[[col_idx]]$Z[[1]][[2]])
insect_count <- insect_count + t(sapply(insect_orders, function(order) {
out_count <- rep(0, unlisted_model[[col_idx]]$Q[2])
names(out_count) <- seq.int(unlisted_model[[col_idx]]$Q[2])
insect_count <- table(unlisted_model[[col_idx]]$Z[[m]][[2]][grep(order, insect_names)])
out_count[names(insect_count)] <- insect_count
out_count
}))
#### Plants
plant_names <- names(unlisted_model[[col_idx]]$Z[[1]][[1]])
plant_count <- t(sapply(plant_family, function(order) {
out_count <- rep(0, unlisted_model[[col_idx]]$Q[1])
names(out_count) <- seq.int(unlisted_model[[col_idx]]$Q[1])
plant_count <- table(unlisted_model[[col_idx]]$Z[[m]][[1]][grep(order, plant_names)])
out_count[names(plant_count)] <- plant_count
out_count
}))
}
return(list(insects = insect_count, plants = plant_count))
})
} else {
# Per collection
# Empty init
insect_count <- t(sapply(insect_orders, function(order) {
out_count <- rep(0, unlisted_model$Q[2])
out_count
}))
plant_count <- t(sapply(plant_family, function(order) {
out_count <- rep(0, unlisted_model$Q[1])
out_count
}))
for (m in seq.int(unlisted_model$M)) {
#### Insect
insect_names <- names(unlisted_model$Z[[1]][[2]])
insect_count <- insect_count + t(sapply(insect_orders, function(order) {
out_count <- rep(0, unlisted_model$Q[2])
names(out_count) <- seq.int(unlisted_model$Q[2])
insect_count <- table(unlisted_model$Z[[m]][[2]][grep(order, insect_names)])
out_count[names(insect_count)] <- insect_count
out_count
}))
#### Plants
plant_names <- names(unlisted_model$Z[[1]][[1]])
plant_count <- t(sapply(plant_family, function(order) {
out_count <- rep(0, unlisted_model$Q[1])
names(out_count) <- seq.int(unlisted_model$Q[1])
plant_count <- table(unlisted_model$Z[[m]][[1]][grep(order, plant_names)])
out_count[names(plant_count)] <- plant_count
out_count
}))
}
return(list(list(insects = insect_count, plants = plant_count)))
}
}
taxonomy_remove_empty <- function(taxonomy_collections_list) {
lapply(taxonomy_collections_list, function(collection) {
list(
insects = collection$insects[which(rowSums(collection$insects != 0) > 0), ],
plants = collection$plants[which(rowSums(collection$plants != 0) > 0), ]
)
})
}
get_formatted_data <- function(collection, group, max_rank = 6) {
collection[[group]] %>%
as.data.frame() %>% # Transformation en data frame
mutate(
Total = rowSums(.),
Rang = rank(-Total, ties.method = "min")
) %>% # Creation d'une colonne Total
rownames_to_column(var = "TaxonBrut") %>%
mutate(Taxon = ifelse(Rang <= max_rank & Total > 0, TaxonBrut, "Other")) %>%
arrange(Rang) %>%
mutate(Taxon = factor(Taxon, levels = unique(Taxon))) %>%
select(-Total, -TaxonBrut, -Rang) %>%
pivot_longer(
cols = -c("Taxon"),
names_to = "Bloc",
values_to = "Nombre",
names_prefix = "V"
) %>%
group_by(Taxon, Bloc) %>%
summarise_all(sum) %>%
ungroup() %>%
group_by(Bloc) %>%
mutate(Proportion = Nombre / sum(Nombre)) %>%
ungroup() %>%
mutate(Group = group)
}
taxonomy_plot <- function(data, insects_or_plants, model, stack_or_fill) {
plots <- filter(data, Group == insects_or_plants) %>%
ggplot(aes(x = Bloc, y = Nombre, fill = Taxon)) +
geom_bar(stat = "identity", position = stack_or_fill) +
labs(x = "Block", y = "Number of Nodes", fill = "Taxonomy") +
theme_minimal() +
theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 1)) +
ggtitle(paste(
ifelse(insects_or_plants == "insects",
"Pollinators", "Plants"
), "repartition (",
ifelse(stack_or_fill == "stack", "absolute", "proportion"),
") in the", model, "clustering"
)) +
facet_wrap(~Collection, ncol = 3, scales = "free_x")
# Arrange the plots in a grid layout
gridExtra::grid.arrange(plots, newpage = TRUE)
}
```
```{r load data, echo = FALSE, include = FALSE, warning=FALSE}
# All results
iid_unlist <- extract_unlist_reorder("data/dore_collection_clustering_nb_run1_iid_123networks_24-05-23-21:40:42.Rds")
rho_unlist <- extract_unlist_reorder("data/dore_collection_clustering_nb_run1_rho_123networks_25-05-23-13:58:30.Rds")
pi_unlist <- extract_unlist_reorder("data/dore_collection_clustering_nb_run1_pi_123networks_25-05-23-17:31:25.Rds")
pirho_unlist <- extract_unlist_reorder("data/dore_collection_clustering_nb_run1_pirho_123networks_26-05-23-19:22:55.Rds")
```
# Application to \cite{doreRelativeEffectsAnthropogenic2021} data
\label{sec:application-to-dorerelativeeffectsanthropogenic2021-data}
## Clustering with model iid
With the *iid-colBiSBM* we obtain `r length(iid_unlist)` collections with the
following structures:
```{r iid_meso_plot, echo = FALSE, message=FALSE, results="asis", warning=FALSE}
#| fig.cap=paste(names(iid_unlist), rep("- iid",length(iid_unlist))),
#| fig.asp = 0.5,
#| dpi = 300
meso_print(iid_unlist)
```
Et voici donc les valeurs numériques pour les $\alpha$ (paramètres de connectivité).
```{r iid_alpha, echo = FALSE, results="asis", warning=FALSE}
alpha_print(iid_unlist)
```
### Comparaison avec des infos supplémentaires
```{r supinfo, echo = FALSE}
supinfo <- readxl::read_xlsx("data/supinfo.xlsx", sheet = 2)
interaction_data <- read.table(file = "data/interaction-data.txt", sep = "\t", header = TRUE)
seq_ids_network_aggreg <- unique(interaction_data$id_network_aggreg)
incidence_matrices <- readRDS(file = "data/dore-matrices.Rds")
names_aggreg_networks <- names(incidence_matrices)
vectorClusteringNet <- numeric(nrow(supinfo))
for (k in 1:length(iid_unlist)) {
idclust <- match(iid_unlist[[k]]$net_id, names_aggreg_networks)
supinfoclust <- match(seq_ids_network_aggreg[idclust], supinfo$Idweb)
vectorClusteringNet[supinfoclust] <- k
}
```
```{r Annual_timespan_plot, echo = FALSE}
ggplot(supinfo) +
aes(
y = Annual_time_span,
x = vectorClusteringNet, group = vectorClusteringNet, fill = as.factor(vectorClusteringNet)
) +
xlab("Numéro de collection") +
ylab("Annual time span") +
guides(fill = guide_legend(title = "Numéro\nde collection")) +
geom_boxplot()
```
### Répartition dans les clusters selon la taxonomie
```{r iid_taxonomy, echo = FALSE, warning=FALSE}
iid_taxonomy <- taxonomy_in_clusters(iid_unlist)
iid_taxonomy_long <- map_dfr(iid_taxonomy,
function(collection) {
map_dfr(
c("insects", "plants"),
function(group) {
get_formatted_data(collection, group)
}
)
},
.id = "Collection"
)
```
```{r iid_plot_taxonomy_pollinators, echo = FALSE, message = FALSE,fig.cap = 'Pollinators repartition for the iid model regarding taxonomy', warning=FALSE}
# Pollinators
taxonomy_plot(
data = iid_taxonomy_long,
insects_or_plants = "insects",
stack_or_fill = "stack", model = "iid"
)
taxonomy_plot(
data = iid_taxonomy_long,
insects_or_plants = "insects",
stack_or_fill = "fill", model = "iid"
)
```
```{r iid_plot_taxonomy_plants, echo = FALSE, message = FALSE,fig.cap = 'Plants repartition for the iid model regarding taxonomy', warning=FALSE}
taxonomy_plot(
data = iid_taxonomy_long,
insects_or_plants = "plants",
stack_or_fill = "stack", model = "iid"
)
taxonomy_plot(
data = iid_taxonomy_long,
insects_or_plants = "plants",
stack_or_fill = "fill", model = "iid"
)
```
#### Tables
```{r iid_taxo_tables, echo = FALSE}
iid_taxonomy_long %>%
filter(Group == "insects") %>%
group_by(Taxon) %>%
mutate(row_id = row_number()) %>%
pivot_wider(names_from = c(Collection, Bloc), values_from = Nombre, names_glue = "Collection_{Collection}_Bloc_{Bloc}") %>%
ungroup() %>%
select(-row_id) %>%
group_by(Taxon) %>%
summarize(across(starts_with("Collection"), ~ na.omit(.)[1])) %>%
ungroup()
```
## Clustering with model pi
Avec le modèle *pi* nous obtenons les `r length(pi_unlist)` collections et
les structures suivantes:
```{r pi_meso_plot, echo = FALSE, message=FALSE, fig.cap=paste(names(pi_unlist), rep("- pi",length(pi_unlist))), results="asis", warning=FALSE}
meso_print(pi_unlist)
```
Et voici donc les valeurs numériques pour les $\alpha$ (paramètres de connectivité).
```{r pi_alpha, echo = FALSE, results="asis", warning=FALSE}
alpha_print(pi_unlist)
```
### Répartition dans les clusters selon la taxonomie
```{r pi_taxonomy, echo = FALSE, warning=FALSE}
pi_taxonomy <- taxonomy_in_clusters(pi_unlist)
pi_taxonomy_long <- map_dfr(pi_taxonomy,
function(collection) {
map_dfr(
c("insects", "plants"),
function(group) {
get_formatted_data(collection, group)
}
)
},
.id = "Collection"
)
```
```{r pi_plot_taxonomy_pollinators, echo = FALSE, message = FALSE,fig.cap = 'Pollinators repartition for the pi model regarding taxonomy', warning=FALSE}
# Pollinators
taxonomy_plot(
data = pi_taxonomy_long,
insects_or_plants = "insects",
stack_or_fill = "stack", model = "pi"
)
taxonomy_plot(
data = pi_taxonomy_long,
insects_or_plants = "insects",
stack_or_fill = "fill", model = "pi"
)
```
```{r pi_plot_taxonomy_plants, echo = FALSE, message = FALSE,fig.cap = 'Plants repartition for the pi model regarding taxonomy', warning=FALSE}
taxonomy_plot(
data = pi_taxonomy_long,
insects_or_plants = "plants",
stack_or_fill = "stack", model = "pi"
)
taxonomy_plot(
data = pi_taxonomy_long,
insects_or_plants = "plants",
stack_or_fill = "fill", model = "pi"
)
```
## Clustering with model rho
Avec le modèle *rho* nous obtenons les `r length(rho_unlist)` collections et
les structures suivantes:
```{r rho_meso_plot, echo = FALSE, message=FALSE, fig.cap=paste(names(rho_unlist), rep("- rho",length(rho_unlist))), results="asis", warning=FALSE}
meso_print(rho_unlist)
```
Et voici donc les valeurs numériques pour les $\alpha$ (paramètres de connectivité).
```{r rho_alpha, echo = FALSE, results="asis", warning=FALSE}
alpha_print(rho_unlist)
```
### Répartition dans les clusters selon la taxonomie
```{r rho_taxonomy, echo = FALSE, warning=FALSE}
rho_taxonomy <- taxonomy_in_clusters(rho_unlist)
rho_taxonomy_long <- map_dfr(rho_taxonomy,
function(collection) {
map_dfr(
c("insects", "plants"),
function(group) {
get_formatted_data(collection, group)
}
)
},
.id = "Collection"
)
```
```{r rho_plot_taxonomy_pollinators, echo = FALSE, message = FALSE,fig.cap = 'Pollinators repartition for the rho model regarding taxonomy', warning=FALSE}
# Pollinators
taxonomy_plot(
data = rho_taxonomy_long,
insects_or_plants = "insects",
stack_or_fill = "stack", model = "rho"
)
taxonomy_plot(
data = rho_taxonomy_long,
insects_or_plants = "insects",
stack_or_fill = "fill", model = "rho"
)
```
```{r rho_plot_taxonomy_plants, echo = FALSE, message = FALSE,fig.cap = 'Plants repartition for the rho model regarding taxonomy', warning=FALSE}
taxonomy_plot(
data = rho_taxonomy_long,
insects_or_plants = "plants",
stack_or_fill = "stack", model = "rho"
)
taxonomy_plot(
data = rho_taxonomy_long,
insects_or_plants = "plants",
stack_or_fill = "fill", model = "rho"
)
```
## Clustering with model pirho
Avec le modèle *pirho* nous obtenons les `r length(pirho_unlist)` collections et
les structures suivantes:
```{r pirho_meso_plot, echo = FALSE, message=FALSE, fig.cap=paste(names(pirho_unlist), rep("- pirho",length(pirho_unlist))), results="asis", warning=FALSE}
meso_print(pirho_unlist)
```
Et voici donc les valeurs numériques pour les $\alpha$ (paramètres de connectivité).
```{r pirho_alpha, echo = FALSE, results="asis", warning=FALSE}
alpha_print(pirho_unlist)
```
### Répartition dans les clusters selon la taxonomie
```{r pirho_taxonomy, echo = FALSE, warning=FALSE}
pirho_taxonomy <- taxonomy_in_clusters(pirho_unlist)
pirho_taxonomy_long <- map_dfr(pirho_taxonomy,
function(collection) {
map_dfr(
c("insects", "plants"),
function(group) {
get_formatted_data(collection, group)
}
)
},
.id = "Collection"
)
```
```{r pirho_plot_taxonomy_pollinators, echo = FALSE, message = FALSE, fig.cap = 'Pollinators repartition for the pirho model regarding taxonomy', warning=FALSE}
# Pollinators
taxonomy_plot(
data = pirho_taxonomy_long,
insects_or_plants = "insects",
stack_or_fill = "stack", model = "pirho"
)
taxonomy_plot(
data = pirho_taxonomy_long,
insects_or_plants = "insects",
stack_or_fill = "fill", model = "pirho"
)
```
```{r pirho_plot_taxonomy_plants, echo = FALSE, message = FALSE,fig.cap = 'Plants repartition for the pirho model regarding taxonomy', warning=FALSE}
taxonomy_plot(
data = pirho_taxonomy_long,
insects_or_plants = "plants",
stack_or_fill = "stack", model = "pirho"
)
taxonomy_plot(
data = pirho_taxonomy_long,
insects_or_plants = "plants",
stack_or_fill = "fill", model = "pirho"
)
```

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,52 @@
@misc{anakokDisentanglingStructureEcological2022,
title = {Disentangling the Structure of Ecological Bipartite Networks from Observation Processes},
author = {Anakok, Emre and Barbillon, Pierre and Fontaine, Colin and Thebault, Elisa},
year = {2022},
month = nov,
number = {arXiv:2211.16364},
eprint = {2211.16364},
primaryclass = {stat},
publisher = {{arXiv}},
urldate = {2023-06-14},
abstract = {The structure of a bipartite interaction network can be described by providing a clustering for each of the two types of nodes. Such clusterings are outputted by fitting a Latent Block Model (LBM) on an observed network that comes from a sampling of species interactions in the field. However, the sampling is limited and possibly uneven. This may jeopardize the fit of the LBM and then the description of the structure of the network by detecting structures which result from the sampling and not from actual underlying ecological phenomena. If the observed interaction network consists of a weighted bipartite network where the number of observed interactions between two species is available, the sampling efforts for all species can be estimated and used to correct the LBM fit. We propose to combine an observation model that accounts for sampling and an LBM for describing the structure of underlying possible ecological interactions. We develop an original inference procedure for this model, the efficiency of which is demonstrated in simulation studies. The practical interest in ecology of our model is highlighted on a large dataset of plant-pollinator network.},
archiveprefix = {arxiv},
langid = {english},
keywords = {Statistics - Methodology},
file = {/home/polarolouis/Zotero/storage/LQ3FINZG/Anakok et al. - 2022 - Disentangling the structure of ecological bipartit.pdf}
}
@article{celisseConsistencyMaximumlikelihoodVariational2012,
title = {Consistency of Maximum-Likelihood and Variational Estimators in the Stochastic Block Model},
author = {Celisse, Alain and Daudin, Jean-Jacques and Pierre, Laurent},
year = {2012},
month = jan,
journal = {Electronic Journal of Statistics},
volume = {6},
number = {none},
pages = {1847--1899},
publisher = {{Institute of Mathematical Statistics and Bernoulli Society}},
issn = {1935-7524, 1935-7524},
doi = {10.1214/12-EJS729},
urldate = {2023-06-06},
abstract = {The stochastic block model (SBM) is a probabilistic model designed to describe heterogeneous directed and undirected graphs. In this paper, we address the asymptotic inference in SBM by use of maximum-likelihood and variational approaches. The identifiability of SBM is proved while asymptotic properties of maximum-likelihood and variational estimators are derived. In particular, the consistency of these estimators is settled for the probability of an edge between two vertices (and for the group proportions at the price of an additional assumption), which is to the best of our knowledge the first result of this type for variational estimators in random graphs.},
keywords = {62E17,62G05,62G20,62H30,Concentration inequalities,consistency,maximum likelihood estimators,Random graphs,Stochastic block model,variational estimators},
file = {/home/polarolouis/Zotero/storage/JNWRIYKG/celisse2012.pdf.pdf;/home/polarolouis/Zotero/storage/XG463B5I/Celisse et al. - 2012 - Consistency of maximum-likelihood and variational .pdf}
}
@misc{chabert-liddellLearningCommonStructures2023,
type = {Article},
title = {Learning Common Structures in a Collection of Networks. {{An}} Application to Food Webs},
author = {{Chabert-Liddell}, Saint-Clair and Barbillon, Pierre and Donnet, Sophie},
year = {2023},
month = mar,
number = {arXiv:2206.00560},
eprint = {2206.00560},
primaryclass = {stat},
publisher = {{arXiv}},
doi = {10.48550/arXiv.2206.00560},
urldate = {2023-05-22},
abstract = {Let a collection of networks represent interactions within several (social or ecological) systems. We pursue two objectives: identifying similarities in the topological structures that are held in common between the networks and clustering the collection into sub-collections of structurally homogeneous networks. We tackle these two questions with a probabilistic model based approach. We propose an extension of the Stochastic Block Model (SBM) adapted to the joint modeling of a collection of networks. The networks in the collection are assumed to be independent realizations of SBMs. The common connectivity structure is imposed through the equality of some parameters. The model parameters are estimated with a variational Expectation-Maximization (EM) algorithm. We derive an ad-hoc penalized likelihood criterion to select the number of blocks and to assess the adequacy of the consensus found between the structures of the different networks. This same criterion can also be used to cluster networks on the basis of their connectivity structure. It thus provides a partition of the collection into subsets of structurally homogeneous networks. The relevance of our proposition is assessed on two collections of ecological networks. First, an application to three stream food webs reveals the homogeneity of their structures and the correspondence between groups of species in different ecosystems playing equivalent ecological roles. Moreover, the joint analysis allows a finer analysis of the structure of smaller networks. Second, we cluster 67 food webs according to their connectivity structures and demonstrate that five mesoscale structures are sufficient to describe this collection.},
archiveprefix = {arxiv},
keywords = {Statistics - Applications,Statistics - Methodology},
file = {/home/polarolouis/Zotero/storage/M74TXGCF/Chabert-Liddell et al. - 2023 - Learning common structures in a collection of netw.pdf;/home/polarolouis/Zotero/storage/A35M8KNP/2206.html}
}

View file

@ -0,0 +1,197 @@
#' The function to plot the fitBipartite objects
#' @importFrom patchwork
#' @importFrom reshape2
#' @importFrom purrr
plotFitBipartite <- function(model, type = "graphon", oRow = NULL, oCol = NULL, mixture = FALSE, net_id = NULL, ...) {
# The below order use mean over all networks to have a consistent display
if (is.null(oRow)) {
if (model$Q[2] == 1) {
mean_rho <- 1
} else {
mean_rho <- matrixStats::rowMeans2(sapply(model$pi, function(pi) pi[[2]]))
}
oRow <- order(model$alpha %*% mean_rho, decreasing = TRUE)
}
if (is.null(oCol)) {
if (model$Q[1] == 1) {
mean_pi <- 1
} else {
mean_pi <- matrixStats::rowMeans2(sapply(model$pi, function(pi) pi[[1]]))
}
oCol <- order(mean_pi %*% model$alpha, decreasing = TRUE)
}
p <- switch(type,
graphon = {
(model$alpha[oRow, oCol] * mean(model$delta)) %>%
t() %>%
reshape2::melt() %>%
dplyr::mutate(
xmin = rep(c(0, cumsum(model$pi[[net_id]][[2]][oCol][1:(model$Q[2] - 1)])), model$Q[1]),
ymin = rep(c(0, cumsum(model$pi[[net_id]][[1]][oRow][1:(model$Q[1] - 1)])), each = model$Q[2]),
xmax = rep(cumsum(model$pi[[net_id]][[2]][oCol]), model$Q[1]),
ymax = rep(cumsum(model$pi[[net_id]][[1]][oRow]), each = model$Q[2])
) %>%
ggplot2::ggplot(ggplot2::aes(
xmin = xmin, ymin = ymin,
xmax = xmax, ymax = ymax, fill = value
)) +
ggplot2::geom_rect() +
ggplot2::scale_fill_gradient2("alpha", low = "white", mid = "red", midpoint = 1) +
ggplot2::geom_hline(yintercept = cumsum(model$pi[[net_id]][[1]][oRow][1:(model$Q[1] - 1)]), size = .2) +
ggplot2::geom_vline(xintercept = cumsum(model$pi[[net_id]][[2]][oCol][1:(model$Q[2] - 1)]), size = .2) +
ggplot2::scale_y_reverse() +
ggplot2::theme_bw(base_size = 15, base_rect_size = 1, base_line_size = 1) +
ggplot2::xlab("Column Blocks") +
ggplot2::ylab("Row Blocks") +
ggplot2::coord_equal(expand = FALSE)
},
meso = {
p_alpha <- model$alpha[oRow, oCol] %>%
t() %>%
reshape2::melt() %>%
ggplot2::ggplot(ggplot2::aes(x = Var1, y = Var2, fill = value)) +
ggplot2::geom_tile() +
ggplot2::scale_fill_gradient2("alpha", low = "white", high = "red") +
ggplot2::geom_hline(yintercept = seq(model$Q[1]) + .5) +
ggplot2::geom_vline(xintercept = seq(model$Q[2]) + .5) +
ggplot2::scale_y_reverse() +
ggplot2::theme_bw(base_size = 15, base_rect_size = 1, base_line_size = 1) +
ggplot2::xlab("") +
ggplot2::ylab("") +
ggplot2::coord_fixed(expand = FALSE)
# scale_y_reverse()
if (model$free_density) {
xl <- paste(round(model$delta, 1))
} else {
xl <- ""
}
df_pi <- purrr::map_dfc(
seq_along(model$net_id),
function(m) setNames(data.frame(model$pim[[m]][[1]][oRow]), m)
)
df_rho <- purrr::map_dfc(
seq_along(model$net_id),
function(m) setNames(data.frame(model$pim[[m]][[2]][oCol]), m)
)
# names(df_pi) <- model$net_id
if (mixture) {
p_pi <-
df_pi %>%
# rename() %>%
dplyr::mutate(q = seq(model$Q[1])) %>%
tidyr::pivot_longer(cols = -c(q)) %>%
mutate(Proportion = value) %>%
ggplot2::ggplot(ggplot2::aes(
fill = as.factor(q), y = name,
x = Proportion
)) +
ggplot2::geom_col() +
ggplot2::coord_flip(expand = FALSE) +
ggplot2::scale_fill_brewer("Row block",
type = "qual", palette = "Paired",
direction = -1
) +
ggplot2::guides(fill = ggplot2::guide_legend(
ncol = model$Q[1] %/% 3 + 1,
byrow = TRUE
)) +
ggplot2::ylab("") +
ggplot2::ylab(xl) +
ggplot2::theme_bw(base_size = 15)
p_rho <- df_rho %>%
# rename() %>%
dplyr::mutate(q = seq(model$Q[2])) %>%
tidyr::pivot_longer(cols = -c(q)) %>%
mutate(Proportion = value) %>%
ggplot2::ggplot(ggplot2::aes(
fill = as.factor(q), y = name,
x = Proportion
)) +
ggplot2::geom_col() +
# ggplot2::coord_flip(expand = FALSE) +
ggplot2::scale_fill_brewer("Column block",
type = "qual", palette = "Set2",
direction = -1
) +
ggplot2::guides(fill = ggplot2::guide_legend(
ncol = model$Q[2] %/% 3 + 1,
byrow = TRUE
)) +
ggplot2::ylab("") +
ggplot2::ylab(xl) +
ggplot2::theme_bw(base_size = 15)
# Merging the plots with patchwork
mixture_layout <- "
##CCCC
##CCCC
RRAAAA
RRAAAA
RRAAAA
"
p_alpha <- patchwork::wrap_plots(
R = p_pi, C = p_rho, A = p_alpha,
design = mixture_layout
) +
patchwork::plot_layout(
guides = "collect",
design = mixture_layout
)
}
return(p_alpha)
},
"block" = {
as.matrix(model$A[[net_id]])[
order(model$Z[[net_id]][[1]]),
order(model$Z[[net_id]][[2]])
] %>%
reshape2::melt() %>%
ggplot2::ggplot(ggplot2::aes(x = Var2, y = rev(Var1), fill = value)) +
ggplot2::geom_tile(show.legend = FALSE) +
ggplot2::geom_hline(
yintercept = cumsum(tabulate(model$Z[[net_id]][[1]])[model$Q[1]:2]) + .5,
col = "red", size = .5
) +
ggplot2::geom_vline(
xintercept = cumsum(tabulate(model$Z[[net_id]][[2]])[1:(model$Q[2] - 1)]) + .5,
col = "red", size = .5
) +
ggplot2::scale_fill_gradient(low = "white", high = "black", na.value = "transparent") +
ggplot2::ylab("") +
ggplot2::xlab(model$net_id[net_id]) +
ggplot2::scale_x_discrete(
breaks = ""
) +
# ggplot2::scale_y_reverse() +
ggplot2::scale_y_discrete(
breaks = "",
guide = ggplot2::guide_axis(angle = 0)
) +
ggplot2::coord_equal(expand = FALSE) +
ggplot2::theme_bw(base_size = 15) +
ggplot2::theme(axis.ticks = ggplot2::element_blank())
}
)
return(p)
}
#' Plot matrix summaries of the collection mesoscale structure
#'
#' @param x a fitBipartiteSBMPop object.
#' @param type The type of the plot. Could be "graphon", "meso" or "block".
#' @param ord A reordering of the blocks.
#' @param mixture Should the block proportions of each network be plotted as
#' well?
#' @param net_id Use to plot only on network in "graphon" view.
#' @param ... Further argument to be passed
#' @return A plot, a ggplot2 object.
#' @export
#'
#' @examples
plot.fitBipartiteSBMPop <- function(x, type = "graphon", oRow = NULL, oCol = NULL, mixture = FALSE, net_id = 1, ...) {
stopifnot(inherits(x, "fitBipartiteSBMPop"))
p <- plotFitBipartite(x,
type = type, oRow = oRow, oCol = oCol, mixture = mixture,
net_id = net_id, ...
)
p
}

View file

@ -0,0 +1,178 @@
---
title: "Analyzing the capacity of the colBiSBM to recover structure for missing data from other networks"
output:
html_document:
toc: true
theme: journal
pdf_document:
keep_tex: true
---
```{r setup, include=FALSE}
knitr::opts_chunk$set(echo = TRUE)
```
```{r required_libs, echo = FALSE, include=FALSE}
require("ggplot2")
require("tidyverse")
```
```{r import_data, echo=FALSE, include=FALSE}
NA_robustness_raw <- readRDS("simulation/data/NA_robustness_results-alpha_0.7, 0.4, 0.3, 0.4, 0.2, 0.05, 0.3, 0.05, 0.05-reps-10-14-06-23_17-41.Rds")
NA_robustness_df <- NA_robustness_raw %>%
mutate(
auc_diff = auc_colBiSBM - auc_LBM,
ari_row_diff = NA_net_ari_row - LBM_ari_row,
ari_col_diff = NA_net_ari_col - LBM_ari_col
) %>%
group_by(prop_NAs, model) %>%
summarise(
mean_auc_diff = mean(auc_diff),
sd_auc_diff = sd(auc_diff),
mean_ari_row_diff = mean(ari_row_diff),
sd_ari_row_diff = sd(ari_row_diff),
mean_ari_col_diff = mean(ari_col_diff),
sd_ari_col_diff = sd(ari_col_diff),
mean_LBM_ari_row = mean(LBM_ari_row),
sd_LBM_ari_row = sd(LBM_ari_row),
mean_LBM_ari_col = mean(LBM_ari_col),
sd_LBM_ari_col = sd(LBM_ari_col),
mean_NA_net_ari_row = mean(NA_net_ari_row),
sd_NA_net_ari_row = sd(NA_net_ari_row),
mean_NA_net_ari_col = mean(NA_net_ari_col),
sd_NA_net_ari_col = sd(NA_net_ari_col),
mean_elapsed_secs = mean(elapsed_secs),
sd_elapsed_secs = sd(elapsed_secs)
) %>%
ungroup()
```
```{r useful_function, echo = FALSE}
write_matex2 <- function(x) {
if (!is.matrix(x)) {
x <- matrix(x)
}
begin <- "\\begin{bmatrix}"
end <- "\\end{bmatrix}"
X <-
apply(x, 1, function(x) {
paste(
paste(x, collapse = "&"),
"\\\\"
)
})
paste(c(begin, X, end), collapse = "")
}
```
# Simulation context
The idea is to benchmark the capacity of the models when NAs are in the data.
To do this, we chose the below structure:
```{r simulation_parameters, echo = FALSE}
eps <- 0.05
M <- 3
# Defining parameters
nr <- 100
nc <- 150
pir <- c(0.5, 0.3, 0.2)
pic <- c(0.5, 0.3, 0.2)
alpha <- matrix(c(
0.7, 0.4, 0.3,
0.4, 0.2, eps,
0.3, eps, eps
), byrow = TRUE, nrow = length(pir), ncol = length(pic))
```
$$M = `r M`, n_r = `r nr`, n_c = `r nc` \\ \alpha = `r write_matex2(alpha)`
\\ \pi = `r write_matex2(pir)` \rho = `r write_matex2(pic)`$$
With $M$ the number of networks, $n_r$ the number of nodes in row of the incidence
matrix, $n_c$ the number of nodes in column, $\alpha$ the connectivity
parameters between the row and column clusters. $\pi$ and $\rho$ are
the proportion of nodes in the row and columns clusters.
And set some randomly chosen interactions to NA. The below plots will show the
different quality indicators in function of proportion of NAs in the first of
the 3 networks.
# AUC in function of the proportion of NAs
```{r auc_plots, echo = FALSE}
auc_plot <- NA_robustness_df %>% ggplot() +
geom_ribbon(aes(ymin = mean_auc_diff - sd_auc_diff, ymax = mean_auc_diff + sd_auc_diff, x = prop_NAs, fill = model), alpha = 0.1) +
geom_line(aes(x = prop_NAs, y = mean_auc_diff, color = model)) +
geom_point(aes(x = prop_NAs, y = mean_auc_diff, color = model)) +
xlab("NA proportion") +
ylab("AUC difference (colBiSBM - LBM)") +
scale_x_continuous(breaks = seq(0, 0.9, 0.1))
auc_plot
```
```{r auc_, echo = FALSE}
auc_plot <- NA_robustness_df %>% ggplot() +
geom_ribbon(aes(ymin = mean_auc_diff - sd_auc_diff, ymax = mean_auc_diff + sd_auc_diff, x = prop_NAs, fill = model), alpha = 0.1) +
geom_line(aes(x = prop_NAs, y = mean_auc_diff, color = model)) +
geom_point(aes(x = prop_NAs, y = mean_auc_diff, color = model)) +
xlab("NA proportion") +
ylab("AUC difference (colBiSBM - LBM)") +
scale_x_continuous(breaks = seq(0, 0.9, 0.1))
auc_plot
```
<!-- FAUX
At $0$, there is no NAs in the $1^{st}$ network so the information is completely
retrieved.
At $0.1$, the AUC is degraded and falls around 0.85 for the *iid, pi and rho*
models. The *pirho* model seems to perform slightly better, but it's confidence interval
intersects the others.
For the next values, two "behaviours" can be observed :
- the *iid* decreases and seems to stabilize until 0.8.
- the *pi, rho and pirho* seem to remain stable until 0.7, from 0.8 they go down.
But the *pirho* performs overall the best, staying around 0.86 and with $90\%$
NAs in ther first network it gives an AUC of 0.83 !
The *pi* and *rho* models perform the same (their confidence intervals overlap),
and they give an AUC of around 0.83 between $20\%$ to $80\%$ of NAs. The *rho*
ends at 0.8 AUC, where the *pi* ends at 0.76.
The 4 models maintain an AUC over 0.75 for $90%$ -->
# ARI in function of the proportion of NAs
```{r ARI_row_plot, echo = FALSE, fig.cap="Difference of ARI for the row clusterings"}
ari_row_plot <- NA_robustness_df %>% ggplot() +
# ylim(-1, 1) +
geom_ribbon(aes(ymin = mean_ari_row_diff - sd_ari_row_diff, ymax = mean_ari_row_diff + sd_ari_row_diff, x = prop_NAs, fill = model), alpha = 0.25) +
geom_line(aes(x = prop_NAs, y = mean_ari_row_diff, color = model)) +
geom_point(aes(x = prop_NAs, y = mean_ari_row_diff, color = model)) +
xlab("NA proportion") +
ylab("ARI difference") +
ggtitle("ARI on the row clustering, difference (colBiSBM - LBM)") +
scale_x_continuous(breaks = seq(0, 0.9, 0.1))
ari_row_plot
```
```{r ARI_col_plot, echo = FALSE, fig.cap="Difference of ARI for the columns clusterings"}
ari_col_plot <- NA_robustness_df %>% ggplot() +
# ylim(-1, 1) +
geom_ribbon(aes(ymin = mean_ari_col_diff - sd_ari_col_diff, ymax = mean_ari_col_diff + sd_ari_col_diff, x = prop_NAs, fill = model), alpha = 0.25) +
geom_line(aes(x = prop_NAs, y = mean_ari_col_diff, color = model)) +
geom_point(aes(x = prop_NAs, y = mean_ari_col_diff, color = model)) +
xlab("NA proportion") +
ylab("ARI difference") +
ggtitle("ARI on the column clustering, difference (colBiSBM - LBM)") +
scale_x_continuous(breaks = seq(0, 0.9, 0.1))
ari_col_plot
```

View file

@ -0,0 +1,110 @@
% Options for packages loaded elsewhere
\PassOptionsToPackage{unicode}{hyperref}
\PassOptionsToPackage{hyphens}{url}
%
\documentclass[
]{article}
\usepackage{lmodern}
\usepackage{amssymb,amsmath}
\usepackage{ifxetex,ifluatex}
\ifnum 0\ifxetex 1\fi\ifluatex 1\fi=0 % if pdftex
\usepackage[T1]{fontenc}
\usepackage[utf8]{inputenc}
\usepackage{textcomp} % provide euro and other symbols
\else % if luatex or xetex
\usepackage{unicode-math}
\defaultfontfeatures{Scale=MatchLowercase}
\defaultfontfeatures[\rmfamily]{Ligatures=TeX,Scale=1}
\fi
% Use upquote if available, for straight quotes in verbatim environments
\IfFileExists{upquote.sty}{\usepackage{upquote}}{}
\IfFileExists{microtype.sty}{% use microtype if available
\usepackage[]{microtype}
\UseMicrotypeSet[protrusion]{basicmath} % disable protrusion for tt fonts
}{}
\makeatletter
\@ifundefined{KOMAClassName}{% if non-KOMA class
\IfFileExists{parskip.sty}{%
\usepackage{parskip}
}{% else
\setlength{\parindent}{0pt}
\setlength{\parskip}{6pt plus 2pt minus 1pt}}
}{% if KOMA class
\KOMAoptions{parskip=half}}
\makeatother
\usepackage{xcolor}
\IfFileExists{xurl.sty}{\usepackage{xurl}}{} % add URL line breaks if available
\IfFileExists{bookmark.sty}{\usepackage{bookmark}}{\usepackage{hyperref}}
\hypersetup{
pdftitle={Analyzing the capacity of the colBiSBM to recover structure for missing data from other networks},
hidelinks,
pdfcreator={LaTeX via pandoc}}
\urlstyle{same} % disable monospaced font for URLs
\usepackage[margin=1in]{geometry}
\usepackage{graphicx}
\makeatletter
\def\maxwidth{\ifdim\Gin@nat@width>\linewidth\linewidth\else\Gin@nat@width\fi}
\def\maxheight{\ifdim\Gin@nat@height>\textheight\textheight\else\Gin@nat@height\fi}
\makeatother
% Scale images if necessary, so that they will not overflow the page
% margins by default, and it is still possible to overwrite the defaults
% using explicit options in \includegraphics[width, height, ...]{}
\setkeys{Gin}{width=\maxwidth,height=\maxheight,keepaspectratio}
% Set default figure placement to htbp
\makeatletter
\def\fps@figure{htbp}
\makeatother
\setlength{\emergencystretch}{3em} % prevent overfull lines
\providecommand{\tightlist}{%
\setlength{\itemsep}{0pt}\setlength{\parskip}{0pt}}
\setcounter{secnumdepth}{-\maxdimen} % remove section numbering
\title{Analyzing the capacity of the colBiSBM to recover structure for
missing data from other networks}
\author{}
\date{\vspace{-2.5em}}
\begin{document}
\maketitle
\begin{verbatim}
## `summarise()` has grouped output by 'prop_NAs'. You can override using the
## `.groups` argument.
\end{verbatim}
\hypertarget{simulation-context}{%
\section{Simulation context}\label{simulation-context}}
The idea is to benchmark the capacity of the models when NAs are in the
data.
To do this, whe choose the below structure: ! PARAMETERS OF THE
SIMULATION !
And set some randomly chosen interactions to NA. The below plots will
show the different quality indicators in function of proportion of NAs
in the first of the 3 networks.
\hypertarget{auc-in-function-of-the-proportion-of-nas}{%
\section{AUC in function of the proportion of
NAs}\label{auc-in-function-of-the-proportion-of-nas}}
\includegraphics{NA_robustness_analyse_files/figure-latex/auc_plots-1.pdf}
\hypertarget{ari-in-function-of-the-proportion-of-nas}{%
\section{ARI in function of the proportion of
NAs}\label{ari-in-function-of-the-proportion-of-nas}}
\begin{figure}
\centering
\includegraphics{NA_robustness_analyse_files/figure-latex/ARI_row_plot-1.pdf}
\caption{Difference of ARI for the row clusterings}
\end{figure}
\begin{figure}
\centering
\includegraphics{NA_robustness_analyse_files/figure-latex/ARI_col_plot-1.pdf}
\caption{Difference of ARI for the columns clusterings}
\end{figure}
\end{document}

View file

@ -0,0 +1,153 @@
devtools::load_all()
require("sbm")
require("pROC")
set.seed(1234)
eps <- 0.05
M <- 3
# Defining parameters
nr <- 100
nc <- 150
pir <- c(0.5, 0.3, 0.2)
pic <- c(0.5, 0.3, 0.2)
alpha <- matrix(c(
0.7, 0.4, 0.3,
0.4, 0.2, eps,
0.3, eps, eps
), byrow = TRUE, nrow = length(pir), ncol = length(pic))
max_repetition <- 10
# Collections
collections <- list(
"iid" = generate_bipartite_collection(nr, nc,
pir, pic,
alpha, M,
model = "iid",
return_memberships = TRUE
),
"pi" = generate_bipartite_collection(nr, nc,
pir, pic,
alpha, M,
model = "pi",
return_memberships = TRUE
),
"rho" = generate_bipartite_collection(nr, nc,
pir, pic,
alpha, M,
model = "rho",
return_memberships = TRUE
),
"pirho" = generate_bipartite_collection(nr, nc,
pir, pic,
alpha, M,
model = "pirho",
return_memberships = TRUE
)
)
conditions <- expand.grid(
prop_NAs = seq(from = 0, to = 0.9, by = 0.1),
model = c("iid", "pi", "rho", "pirho"),
repetition = seq.int(max_repetition)
)
result_dataframe <- do.call("rbind", bettermc::mclapply(seq_len(nrow(conditions)), function(current) {
# Looping over conditions
prop_NAs <- conditions[current, ]$prop_NAs
model <- as.character(conditions[current, ]$model)
bipartite_collection <- collections[[model]]
# This is a list of the M incidence matrices
bipartite_collection_incidence <- lapply(seq.int(M), function(m) {
bipartite_collection[[m]]$incidence_matrix
})
# Sampling values to replace by NAs
NAs_index <- sample(
seq_len(length(bipartite_collection_incidence[[1]])),
floor(prop_NAs * length(bipartite_collection_incidence[[1]]))
)
real_val_NAs <- bipartite_collection_incidence[[1]][NAs_index]
bipartite_collection_incidence[[1]][NAs_index] <- NA
NAs_coordinates <- which(is.na(bipartite_collection_incidence[[1]]),
arr.ind = TRUE
)
Z <- lapply(seq.int(M), function(m) {
list(
bipartite_collection[[m]]$row_blockmemberships,
bipartite_collection[[m]]$col_blockmemberships
)
})
start_time <- Sys.time()
mybisbmpop <- estimate_colBiSBM(
netlist = bipartite_collection_incidence, colsbm_model = model,
nb_run = 1,
global_opts = list(
nb_cores = parallel::detectCores() - 1, verbosity = 0
), silent_parallelization = TRUE
)
stop_time <- Sys.time()
baseline_LBM <- estimate_colBiSBM(
netlist = bipartite_collection_incidence[[1]], colsbm_model = model,
nb_run = 1,
global_opts = list(
nb_cores = parallel::detectCores() - 1, verbosity = 0
), silent_parallelization = TRUE
)
# Predicted links
X_hat_LBM <- baseline_LBM$best_fit$tau[[1]][[1]] %*% baseline_LBM$best_fit$alpha %*% t(baseline_LBM$best_fit$tau[[1]][[2]])
X_hat <- mybisbmpop$best_fit$tau[[1]][[1]] %*% mybisbmpop$best_fit$alpha %*% t(mybisbmpop$best_fit$tau[[1]][[2]])
# Compute ROC and AUC
auc_LBM <- auc(c(0, 1, real_val_NAs), c(0, 1, X_hat_LBM[NAs_index]))
auc_colBiSBM <- auc(c(0, 1, real_val_NAs), c(0, 1, X_hat[NAs_index]))
# Computing ARI on the NAs
return(data.frame(
prop_NAs = prop_NAs,
model = model,
repetition = conditions[current, ]$repetition,
auc_LBM = auc_LBM,
auc_colBiSBM = auc_colBiSBM,
LBM_ari_row = aricode::ARI(
Z[[1]][[1]],
baseline_LBM$best_fit$Z[[1]][[1]]
),
LBM_ari_col = aricode::ARI(
Z[[1]][[2]],
baseline_LBM$best_fit$Z[[1]][[2]]
),
NA_net_ari_row = aricode::ARI(
Z[[1]][[1]],
mybisbmpop$best_fit$Z[[1]][[1]]
),
NA_net_ari_col = aricode::ARI(
Z[[1]][[2]],
mybisbmpop$best_fit$Z[[1]][[2]]
),
elapsed_secs = difftime(stop_time, start_time, units = "sec")
))
},
mc.cores = parallel::detectCores() - 1,
mc.progress = TRUE
))
saveRDS(
result_dataframe,
paste0(
"simulation/data/",
"NA_robustness_results-alpha_", toString(alpha),
"-reps-", max_repetition, "-", format(Sys.time(), "%d-%m-%y_%H-%M"), ".Rds"
)
)

View file

@ -0,0 +1,29 @@
library("ggplot2")
library("dplyr")
filename <- "divergence_modular_to_nested_with_3_repetitions_29-03-23_13:55:42.Rds"
filename <- paste0(getwd(), "/simulation/data/", filename)
current_data <- readRDS(filename)
current_data$results <- current_data$results %>% mutate(diff_BICL = BICL - sep_LBM_BICL)
current_data$results <- current_data$results %>%
group_by(Current_M, Network_id, divergence) %>%
summarize(mean_diff_BICL = mean(diff_BICL), sd_diff_BICL = sd(diff_BICL))
ggplot(current_data$results) +
aes(x = divergence, y = mean_diff_BICL, group = factor(Current_M), col = factor(Current_M)) +
geom_point() +
geom_line() +
geom_ribbon(
aes(
ymin = mean_diff_BICL - sd_diff_BICL,
ymax = mean_diff_BICL + sd_diff_BICL,
fill = factor(Current_M),
col = NULL
),
alpha = .25
) +
geom_hline(yintercept = 0)

View file

@ -0,0 +1,115 @@
# Sourcing all necessary files
require("sbm", quietly = T)
require("dplyr", quietly = T)
require("tictoc", quietly = T)
require("ggplot2", quietly = T)
devtools::load_all(path = "R/")
eps <- 0.05
M <- 2
nr <- 100
nc <- 250
pir1 <- c(0.2, 0, 0.8)
pir2 <- c(0.4, 0.6, 0)
pic1 <- c(0.6, 0, 0.4)
pic2 <- c(0.4, 0.6, 0)
Q <- c(length(pir1), length(pic1))
# Make a non common alpha structure
alpha <- matrix(
c( # 12 2 1
0.4, eps, eps, # 12
eps, 0.5, 0, # 2
eps, 0, 0.2 # 1
),
nrow = Q[1], ncol = Q[2], byrow = TRUE
)
bipartite_collection <- list(
generate_bipartite_network(nr, nc, pir1, pic1, alpha),
generate_bipartite_network(nr, nc, pir2, pic2, alpha)
)
# This is a list of the M incidence matrices
bipartite_collection_incidence <- lapply(seq.int(M), function(m) {
bipartite_collection[[m]]$incidence_matrix
})
Z <- lapply(seq.int(M), function(m) {
list(
bipartite_collection[[m]]$row_clustering,
bipartite_collection[[m]]$col_clustering
)
})
results <- data.frame(matrix(nrow = 0, ncol = 6))
names(results) <- c(
"repetition",
"user_func_parallel",
"exploration_parallel",
"subexploration_parallel",
"ari_sum",
"time"
)
repetition_number <- 3
conditions <- expand.grid(
seq.int(3), c(TRUE),
c(TRUE, FALSE), c(TRUE, FALSE)
)
for (i in seq_len(nrow(conditions))) {
cat(
"\nCondition ", i, "/", nrow(conditions), "\n",
"repetition:", conditions[i, 1],
" -- user_func_parallel:", conditions[i, 2],
"-- exploration_parallel:", conditions[i, 3],
"-- subexploration_parallel:", conditions[i, 4],
"\n"
)
t0 <- Sys.time()
mybisbmpop <- estimate_colBiSBM(
netlist = bipartite_collection_incidence,
colsbm_model = "pirho",
global_opts = list(
nb_cores = parallel::detectCores() - 1,
verbosity = 0,
parallelization_vector = unlist(conditions[i,c(2,3,4)])
)
)
t1 <- Sys.time()
ari_sums <- sum(sapply(
seq_along(mybisbmpop$best_fit$Z),
function(m) {
sum(
aricode::ARI(
Z[[m]][[1]],
mybisbmpop$best_fit$Z[[m]][[1]]
),
aricode::ARI(
Z[[m]][[2]],
mybisbmpop$best_fit$Z[[m]][[2]]
)
)
}
))
results[nrow(results) + 1, ] <- c(
conditions[i, 1],
conditions[i, 2],
conditions[i, 3],
conditions[i, 4],
ari_sums,
t1 - t0
)
}
saveRDS(results,
file = paste0("./simulation/data/parallel_levels_", Sys.time(), ".Rds")
)

Some files were not shown because too many files have changed in this diff Show more