Adding Rcodes for reproducible files
This commit is contained in:
parent
0ac5b5c8dd
commit
aedc1c8bcc
147 changed files with 100915 additions and 0 deletions
210
Rcodes/real_data/CoOPLBM_completion_analyze.Rmd
Normal file
210
Rcodes/real_data/CoOPLBM_completion_analyze.Rmd
Normal file
|
|
@ -0,0 +1,210 @@
|
|||
---
|
||||
title: "Netclustering analysis with the CoOPLBM completion"
|
||||
bibliography: references.bib
|
||||
suppress-bibliography: true
|
||||
output:
|
||||
html_document:
|
||||
toc: true
|
||||
theme: journal
|
||||
pdf_document:
|
||||
keep_tex: true
|
||||
---
|
||||
|
||||
```{r libraries, echo = FALSE, include=FALSE}
|
||||
devtools::load_all()
|
||||
require(aricode)
|
||||
```
|
||||
|
||||
```{r useful_functions, echo = FALSE}
|
||||
extract_unlist <- function(data) {
|
||||
readRDS(data) |>
|
||||
extract_best_bipartite_partition() |>
|
||||
unlist()
|
||||
}
|
||||
|
||||
show_groups <- function(collection_list) {
|
||||
lapply(collection_list, function(collection) {
|
||||
collection$Q
|
||||
})
|
||||
}
|
||||
|
||||
partition_BICL <- function(collection_list) {
|
||||
sum(sapply(collection_list, function(collection) {
|
||||
collection$BICL
|
||||
}))
|
||||
}
|
||||
|
||||
extract_collection_clustering_id <- function(collection_list) {
|
||||
out_dataframe <- setNames(data.frame(matrix(nrow = 0, ncol = 2)), c("netid", "collection_id"))
|
||||
for (col_idx in seq_along(collection_list)) {
|
||||
for (net_id in collection_list[[col_idx]]$net_id) {
|
||||
out_dataframe[nrow(out_dataframe) + 1, ] <- c(net_id, col_idx)
|
||||
}
|
||||
}
|
||||
out_dataframe
|
||||
}
|
||||
|
||||
extract_full_models <- function(model_collections_list) {
|
||||
return(do.call(
|
||||
"rbind",
|
||||
lapply(
|
||||
seq_along(model_collections_list),
|
||||
function(idx) {
|
||||
model_name <- names(model_collections_list)[idx]
|
||||
df <- extract_collection_clustering_id(model_collections_list[[idx]])
|
||||
return(cbind(df, model = rep(model_name, length(df$netid))))
|
||||
}
|
||||
)
|
||||
))
|
||||
}
|
||||
|
||||
reorder_match_netids <- function(dataframe, target) {
|
||||
df <- dataframe[match(target, dataframe$netid), ]
|
||||
return(df)
|
||||
}
|
||||
|
||||
extract_full_reorder <- function(model_collections_list, target) {
|
||||
unordered <- extract_full_models(model_collections_list)
|
||||
return(do.call("rbind", lapply(
|
||||
names(model_collections_list),
|
||||
function(model_name) {
|
||||
return(reorder_match_netids(unordered[which(unordered$model == model_name), ], target))
|
||||
}
|
||||
)))
|
||||
}
|
||||
```
|
||||
|
||||
```{r data_importation, echo = FALSE}
|
||||
# Uncompleted
|
||||
uncompleted_model_list <- list(
|
||||
"iid" = extract_unlist("real_data/data/dore_uncompleted_collection_clustering_nb_run_1_iid_70_networks_08-06-23-16:31:17.Rds"),
|
||||
"pi" = extract_unlist("real_data/data/dore_uncompleted_collection_clustering_nb_run_1_pi_70_networks_08-06-23-16:52:16.Rds"),
|
||||
"rho" = extract_unlist("real_data/data/dore_uncompleted_collection_clustering_nb_run_1_rho_70_networks_08-06-23-16:49:58.Rds"),
|
||||
"pirho" = extract_unlist("real_data/data/dore_uncompleted_collection_clustering_nb_run_1_pirho_70_networks_08-06-23-16:41:33.Rds")
|
||||
)
|
||||
|
||||
# Below we will need to have the netid in the same order so we choose to use the
|
||||
# uncompleted iid model order as reference
|
||||
netid_order <- extract_collection_clustering_id(uncompleted_model_list$iid)$netid
|
||||
model_order <- c("iid", "pi", "rho", "pirho")
|
||||
|
||||
uncompleted_clusterings <- extract_full_reorder(uncompleted_model_list, netid_order)
|
||||
|
||||
# 0.2 threshold
|
||||
point_2_model_list <- list(
|
||||
"iid" = extract_unlist("real_data/data/dore_point_2_completed_collection_clustering_nb_run_1_iid_70_networks_07-06-23-18:40:10.Rds"),
|
||||
"pi" = extract_unlist("real_data/data/dore_point_2_completed_collection_clustering_nb_run_1_pi_70_networks_07-06-23-19:22:19.Rds"),
|
||||
"rho" = extract_unlist("real_data/data/dore_point_2_completed_collection_clustering_nb_run_1_rho_70_networks_07-06-23-20:03:53.Rds"),
|
||||
"pirho" = extract_unlist("real_data/data/dore_point_2_completed_collection_clustering_nb_run_1_pirho_70_networks_07-06-23-21:09:12.Rds")
|
||||
)
|
||||
point_2_clusterings <- extract_full_reorder(point_2_model_list, netid_order)
|
||||
|
||||
# 0.5 threshold
|
||||
point_5_model_list <- list(
|
||||
"iid" = extract_unlist("real_data/data/dore_point_5_completed_collection_clustering_nb_run_1_iid_70_networks_07-06-23-19:19:53.Rds"),
|
||||
"pi" = extract_unlist("real_data/data/dore_point_5_completed_collection_clustering_nb_run_1_pi_70_networks_07-06-23-21:31:20.Rds"),
|
||||
"rho" = extract_unlist("real_data/data/dore_point_5_completed_collection_clustering_nb_run_1_rho_70_networks_07-06-23-21:03:50.Rds"),
|
||||
"pirho" = extract_unlist("real_data/data/dore_point_5_completed_collection_clustering_nb_run_1_pirho_70_networks_07-06-23-21:13:10.Rds")
|
||||
)
|
||||
point_5_clusterings <- extract_full_reorder(point_5_model_list, netid_order)
|
||||
|
||||
# Uniform re-sampled
|
||||
random_model_list <- list(
|
||||
"iid" = extract_unlist("real_data/data/dore_random_completed_collection_clustering_nb_run_1_iid_70_networks_07-06-23-21:44:14.Rds"),
|
||||
"pi" = extract_unlist("real_data/data/dore_random_completed_collection_clustering_nb_run_1_pi_70_networks_07-06-23-22:52:47.Rds"),
|
||||
"rho" = extract_unlist("real_data/data/dore_random_completed_collection_clustering_nb_run_1_rho_70_networks_08-06-23-18:16:04.Rds"),
|
||||
"pirho" = extract_unlist("real_data/data/dore_random_completed_collection_clustering_nb_run_1_pirho_70_networks_07-06-23-23:07:08.Rds")
|
||||
)
|
||||
random_clusterings <- extract_full_reorder(random_model_list, netid_order)
|
||||
```
|
||||
|
||||
# Context of this analysis
|
||||
|
||||
After performing a netclustering on the raw data, we will see if the detect
|
||||
structure resulting in the clustering comes from the sampling effort. To test
|
||||
this we will use the CoOPLBM model by
|
||||
@anakokDisentanglingStructureEcological2022 to complete the data.
|
||||
|
||||
The CoOPLBM model assumes that the observed incidence matrix $R$ is an
|
||||
element-wise product of an $M$ matrix following an LBM and an $N$ matrix which
|
||||
elements follow Poisson distributions independent on $M$.
|
||||
|
||||
The model gives us the $\widehat{M}$ matrix, the elements of which are:
|
||||
|
||||
$$\widehat{M_{ij}} = \mathbb{P}(M_{ij} = 1)$$
|
||||
|
||||
Note that if $R_{ij} = 1$ then $\widehat{M_{ij}} = 1$
|
||||
|
||||
- 1 if the interaction was observed
|
||||
- a probability, that there should be an interaction but it wasn't observed
|
||||
|
||||
This *completed matrix* can be used in different manners to be fed to the colSBM
|
||||
model.
|
||||
|
||||
# Threshold based completions
|
||||
With the thresholds, the infered incidence matrix obtained by
|
||||
CoOPLBM is used to generate a completed incidence matrix by the following
|
||||
procedure :
|
||||
$$X_{ij} = \begin{cases}
|
||||
1 & \text{if the value is over the threshold} \\
|
||||
0 & \text{else} \\
|
||||
\end{cases}$$
|
||||
|
||||
## 0.5 completed threshold
|
||||
```{r useful-functions, echo = FALSE, include=FALSE}
|
||||
ARI_netclustering_models <- function(
|
||||
clustering_compare,
|
||||
uncompleted_clustering = uncompleted_clustering,
|
||||
models = c("iid", "pi", "rho", "pirho")) {
|
||||
sapply(models, function(model) {
|
||||
ARI(
|
||||
uncompleted_clusterings[
|
||||
which(uncompleted_clusterings$model == model),
|
||||
]$collection_id,
|
||||
clustering_compare[
|
||||
which(clustering_compare$model == model),
|
||||
]$collection_id
|
||||
)
|
||||
})
|
||||
}
|
||||
```
|
||||
|
||||
Here, the completion threshold is set to $0.5$.
|
||||
|
||||
First we will compute an ARI on the collection id given by the raw data and the
|
||||
completed matrix.
|
||||
|
||||
```{r 0.5_ARI, echo = FALSE, results="asis"}
|
||||
knitr::kable(ARI_netclustering_models(point_5_clusterings),
|
||||
col.names = c("ARI with uncompleted data")
|
||||
)
|
||||
```
|
||||
|
||||
In the above table, one can see the network clustering obtained after applying
|
||||
CoOPLBM has not much in common with the clustering of the uncompleted data.
|
||||
|
||||
### Number of sub-collections and details of each sub-collection
|
||||
```{r 0.5_partition_numbers, echo = FALSE}
|
||||
```
|
||||
|
||||
## 0.2 completed threshold
|
||||
|
||||
The $0.2$ threshold adds a lot of interactions compared to raw matrix.
|
||||
|
||||
```{r 0.2_ARI, echo = FALSE, results="asis"}
|
||||
knitr::kable(ARI_netclustering_models(point_2_clusterings),
|
||||
col.names = c("ARI with uncompleted data")
|
||||
)
|
||||
```
|
||||
|
||||
# Sample based completions
|
||||
|
||||
The $M$ matrix is used to sample a new $X$ matrix which elements are the
|
||||
realisation of Bernoulli distributions of probability $M_{i,j}$.
|
||||
$$\mathbb{P}(X_{i,j} = 1) = M_{i,j} $$
|
||||
|
||||
```{r random_ARI, echo = FALSE, results="asis"}
|
||||
knitr::kable(ARI_netclustering_models(random_clusterings),
|
||||
col.names = c("ARI with uncompleted data")
|
||||
)
|
||||
```
|
||||
543
Rcodes/real_data/CoOPLBM_completion_analyze.html
Normal file
543
Rcodes/real_data/CoOPLBM_completion_analyze.html
Normal file
File diff suppressed because one or more lines are too long
BIN
Rcodes/real_data/CoOPLBM_completion_analyze.pdf
Normal file
BIN
Rcodes/real_data/CoOPLBM_completion_analyze.pdf
Normal file
Binary file not shown.
141
Rcodes/real_data/CoOPLBM_completion_analyze.tex
Normal file
141
Rcodes/real_data/CoOPLBM_completion_analyze.tex
Normal file
|
|
@ -0,0 +1,141 @@
|
|||
% Options for packages loaded elsewhere
|
||||
\PassOptionsToPackage{unicode}{hyperref}
|
||||
\PassOptionsToPackage{hyphens}{url}
|
||||
%
|
||||
\documentclass[
|
||||
]{article}
|
||||
\usepackage{lmodern}
|
||||
\usepackage{amssymb,amsmath}
|
||||
\usepackage{ifxetex,ifluatex}
|
||||
\ifnum 0\ifxetex 1\fi\ifluatex 1\fi=0 % if pdftex
|
||||
\usepackage[T1]{fontenc}
|
||||
\usepackage[utf8]{inputenc}
|
||||
\usepackage{textcomp} % provide euro and other symbols
|
||||
\else % if luatex or xetex
|
||||
\usepackage{unicode-math}
|
||||
\defaultfontfeatures{Scale=MatchLowercase}
|
||||
\defaultfontfeatures[\rmfamily]{Ligatures=TeX,Scale=1}
|
||||
\fi
|
||||
% Use upquote if available, for straight quotes in verbatim environments
|
||||
\IfFileExists{upquote.sty}{\usepackage{upquote}}{}
|
||||
\IfFileExists{microtype.sty}{% use microtype if available
|
||||
\usepackage[]{microtype}
|
||||
\UseMicrotypeSet[protrusion]{basicmath} % disable protrusion for tt fonts
|
||||
}{}
|
||||
\makeatletter
|
||||
\@ifundefined{KOMAClassName}{% if non-KOMA class
|
||||
\IfFileExists{parskip.sty}{%
|
||||
\usepackage{parskip}
|
||||
}{% else
|
||||
\setlength{\parindent}{0pt}
|
||||
\setlength{\parskip}{6pt plus 2pt minus 1pt}}
|
||||
}{% if KOMA class
|
||||
\KOMAoptions{parskip=half}}
|
||||
\makeatother
|
||||
\usepackage{xcolor}
|
||||
\IfFileExists{xurl.sty}{\usepackage{xurl}}{} % add URL line breaks if available
|
||||
\IfFileExists{bookmark.sty}{\usepackage{bookmark}}{\usepackage{hyperref}}
|
||||
\hypersetup{
|
||||
pdftitle={Netclustering analysis with the CoOPLBM completion},
|
||||
hidelinks,
|
||||
pdfcreator={LaTeX via pandoc}}
|
||||
\urlstyle{same} % disable monospaced font for URLs
|
||||
\usepackage[margin=1in]{geometry}
|
||||
\usepackage{graphicx}
|
||||
\makeatletter
|
||||
\def\maxwidth{\ifdim\Gin@nat@width>\linewidth\linewidth\else\Gin@nat@width\fi}
|
||||
\def\maxheight{\ifdim\Gin@nat@height>\textheight\textheight\else\Gin@nat@height\fi}
|
||||
\makeatother
|
||||
% Scale images if necessary, so that they will not overflow the page
|
||||
% margins by default, and it is still possible to overwrite the defaults
|
||||
% using explicit options in \includegraphics[width, height, ...]{}
|
||||
\setkeys{Gin}{width=\maxwidth,height=\maxheight,keepaspectratio}
|
||||
% Set default figure placement to htbp
|
||||
\makeatletter
|
||||
\def\fps@figure{htbp}
|
||||
\makeatother
|
||||
\setlength{\emergencystretch}{3em} % prevent overfull lines
|
||||
\providecommand{\tightlist}{%
|
||||
\setlength{\itemsep}{0pt}\setlength{\parskip}{0pt}}
|
||||
\setcounter{secnumdepth}{-\maxdimen} % remove section numbering
|
||||
\newlength{\cslhangindent}
|
||||
\setlength{\cslhangindent}{1.5em}
|
||||
\newenvironment{cslreferences}%
|
||||
{\setlength{\parindent}{0pt}%
|
||||
\everypar{\setlength{\hangindent}{\cslhangindent}}\ignorespaces}%
|
||||
{\par}
|
||||
|
||||
\title{Netclustering analysis with the CoOPLBM completion}
|
||||
\author{}
|
||||
\date{\vspace{-2.5em}}
|
||||
|
||||
\begin{document}
|
||||
\maketitle
|
||||
|
||||
\hypertarget{context-of-this-analysis}{%
|
||||
\section{Context of this analysis}\label{context-of-this-analysis}}
|
||||
|
||||
After performing a netclustering on the raw data, we will see if the
|
||||
detect structure resulting in the clustering comes from the sampling
|
||||
effort. To test this we will use the CoOPLBM model by Anakok et al.
|
||||
(2022) to complete the data.
|
||||
|
||||
The CoOPLBM model assumes that the observed incidence matrix \(R\) is an
|
||||
element-wise product of an \(M\) matrix following an LBM and an \(N\)
|
||||
matrix which elements follow Poisson distributions independent on \(M\).
|
||||
|
||||
The model gives us the \(\hat{M}\) matrix, which elements are:
|
||||
|
||||
\begin{itemize}
|
||||
\tightlist
|
||||
\item
|
||||
1 if the interaction was observed
|
||||
\item
|
||||
a probability, that there should be an interaction but it wasn't
|
||||
observed
|
||||
\end{itemize}
|
||||
|
||||
This \emph{completed matrix} can be used in different manners to be fed
|
||||
to the colSBM model.
|
||||
|
||||
\hypertarget{threshold-based-completions}{%
|
||||
\section{Threshold based
|
||||
completions}\label{threshold-based-completions}}
|
||||
|
||||
With the thresholds, the infered incidence matrix obtained by CoOPLBM is
|
||||
used to generate a completed incidence matrix by the following procedure
|
||||
: \[X_{ij} = \begin{cases}
|
||||
1 & \text{if the value is over the threshold} \\
|
||||
0 & \text{else} \\
|
||||
\end{cases}\]
|
||||
|
||||
\hypertarget{completed-threshold}{%
|
||||
\subsection{0.5 completed threshold}\label{completed-threshold}}
|
||||
|
||||
Here, the completion threshold is set to \(0.5\).
|
||||
|
||||
\hypertarget{ari-of-networks-clustering-0.5-threshold-vs-raw-data}{%
|
||||
\subsubsection{ARI of networks clustering: 0.5 threshold vs raw
|
||||
data}\label{ari-of-networks-clustering-0.5-threshold-vs-raw-data}}
|
||||
|
||||
\hypertarget{sample-based-completions}{%
|
||||
\section{Sample based completions}\label{sample-based-completions}}
|
||||
|
||||
The \(M\) matrix is used to sample a new \(X\) matrix which elements are
|
||||
the realisation of Bernoulli distributions of probability \(M_{i,j}\).
|
||||
\[\mathbb{P}(X_{i,j} = 1) = M_{i,j} \]
|
||||
|
||||
\hypertarget{references}{%
|
||||
\section*{References}\label{references}}
|
||||
\addcontentsline{toc}{section}{References}
|
||||
|
||||
\hypertarget{refs}{}
|
||||
\begin{cslreferences}
|
||||
\leavevmode\hypertarget{ref-anakokDisentanglingStructureEcological2022}{}%
|
||||
Anakok, Emre, Pierre Barbillon, Colin Fontaine, and Elisa Thebault.
|
||||
2022. ``Disentangling the Structure of Ecological Bipartite Networks
|
||||
from Observation Processes.'' arXiv.
|
||||
\url{http://arxiv.org/abs/2211.16364}.
|
||||
\end{cslreferences}
|
||||
|
||||
\end{document}
|
||||
BIN
Rcodes/real_data/data/Data.rds
Executable file
BIN
Rcodes/real_data/data/Data.rds
Executable file
Binary file not shown.
36188
Rcodes/real_data/data/DataTotherbi_updateJune2023.csv
Normal file
36188
Rcodes/real_data/data/DataTotherbi_updateJune2023.csv
Normal file
File diff suppressed because it is too large
Load diff
BIN
Rcodes/real_data/data/completed0.2.rds
Executable file
BIN
Rcodes/real_data/data/completed0.2.rds
Executable file
Binary file not shown.
BIN
Rcodes/real_data/data/completed0.5.rds
Executable file
BIN
Rcodes/real_data/data/completed0.5.rds
Executable file
Binary file not shown.
BIN
Rcodes/real_data/data/completedrandom.rds
Executable file
BIN
Rcodes/real_data/data/completedrandom.rds
Executable file
Binary file not shown.
BIN
Rcodes/real_data/data/dore-matrices.Rds
Normal file
BIN
Rcodes/real_data/data/dore-matrices.Rds
Normal file
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
55277
Rcodes/real_data/data/interaction-data.txt
Normal file
55277
Rcodes/real_data/data/interaction-data.txt
Normal file
File diff suppressed because it is too large
Load diff
|
|
@ -0,0 +1,162 @@
|
|||
publi;web;latitude;longitude;locationtot;feedingguild1;feedingguild2;samplingyear
|
||||
adaime;adaime2017_A;2.5;-51.5;terra firme forest, floodable areas and savannas, Cal<61>oene municipality, Amapa State, Brazil;fruit eater;NoOrthoptera;2013
|
||||
adaime;adaime2017_B;3.5;-51.8;terra firme forest, floodable areas and savannas, Oiapoque municipality, Amapa State, Brazil;fruit eater;NoOrthoptera;2013
|
||||
Aldryhim;Aldryhim1985;24;-46; Saudi Arabia, Saudi Arabia;sap sucker;NoOrthoptera;1995
|
||||
basset;basset1996;-7.4;146.73;grasslands and forest patches, dominated by secondary forests, Mt Kaindi, Wau, Papua New Guinea;leaf chewing;NoOrthoptera;1993
|
||||
bergamini;bergamini2016;-17.6;-43.6;grasslands, Brazilian Cerrado savannas, Brazil;flower eater;NoOrthoptera;2003
|
||||
bluthgen;bluthgen2006;4.97;117.8;mature lowland evergreen dipterocarp forest , Danun Valley conservation area, Danum Valley, Sabah, Malaysia;leaf chewing;NoOrthoptera;2004
|
||||
bodner;bodner2009;-3.97;-79.98;pristine montane forest, Reserva biologica San Francisco, Zamora-Chinchipe, Ecuador ;leaf chewing;NoOrthoptera;2006
|
||||
Brehm;Brehm2003;-3.97;-79.08;primary disturbed montane rainforest and early successional stages after human disturbances, Estacion Cientifica San Francisco, Zamora Chinchipe, South Ecuador;mixed;NoOrthoptera;2000
|
||||
Brown;Brown2019;14;99.7;lowland seasonal evergreen rainforest, Khao Chong Botanical Garden, Thailand;mixed;NoOrthoptera;2015
|
||||
coley;coley2006;9;-80;moist tropical lowland forest, Barro Colorado Island, Panama;leaf chewing;NoOrthoptera;1999
|
||||
cuevas-reyes;cuevas-reyes;19.49;-104.99;tropical deciduous forest and patches of tropical riparian forest, Chamela-Cuixmala Biosphere reserve, Pacific coast of Jalisco, Mexico;gall maker;NoOrthoptera;2001
|
||||
dyer&gentry;dyer&gentry2002;10.43;-83.98;wetland, La Selva biological station, Costa Rica;leaf chewing;NoOrthoptera;2002
|
||||
garcia&robledo;garcia&robledo2013;10.43;-83.98;aseasonal tropical wet forest, La Sevla Biological Station, Costa Rica;leaf chewing;NoOrthoptera;2011
|
||||
Hackett2019;Hackett2019_HH;50.72;-1.75;ocean-adjacent peninsula consisting of a mosaic of habitats, Hengistbury Head, UK;mixed;NoOrthoptera;2013
|
||||
Hackett2019;Hackett2019_TP;-46.59;169.43;ocean-adjacent peninsula consisting of a mosaic of habitats, Tautuku Peninsula, New Zealand;mixed;NoOrthoptera;2015
|
||||
heleno;heleno2009;37.78;-25.22;native vegetation was cleared for pastures, replaced by production forest or taken over by weeds, Serra da Tronqueira, Sao Miguel, Azores archipelago, Portugal;mixed;NoOrthoptera;2006
|
||||
hemidi;hemidi2013;34.8;5.73; Biskra, Alg<6C>rie;sap sucker;NoOrthoptera;2011
|
||||
henneman&memmott;henneman&memmott1;22.09;-159.56;isolated from agricultural areas, Alakai Swamp, island of Kauai, Hawaii;leaf chewing;NoOrthoptera;2000
|
||||
henneman&memmott;henneman&memmott2;22.09;-159.56;wetland, Alakai Swamp, island of Kauai, Hawaii;leaf chewing;NoOrthoptera;2000
|
||||
Ibanez;Ibanez2013;45.03;6.4;subalpine grasslands, Vilar d'Ar<41>ne, central french alps;leaf chewing;Orthoptera;2011
|
||||
Idechil;Idechil2007;7.5;134.5; Republic of Palau, islands of Palau;sap sucker;NoOrthoptera;2004
|
||||
janzen;janzen1980;10.8;-85.7;deciduous forest with moister habitats , Santa Rosa National Park, Costa Rica;frugivore_seed predator;NoOrthoptera;1980
|
||||
janzen;janzen2003;10.83;-85.6;rainforest, Area de conservation Guanacaste, Costa Rica;leaf chewing;NoOrthoptera;2003
|
||||
joern;joern_altuda;30.4;-103.6;mountainous area with arid grassland plant species, Altuda and Marathon, near Alpine, Texas;leaf chewing;Orthoptera;1975
|
||||
joern;joern_marathon;30.4;-103.6;arid grassland, Alpine, Texas, USA;leaf chewing;Orthoptera;1975
|
||||
Joern1985;Joern1985;41.578544;-101.707547;Arapaho Prairie, upland Sand Hills grassland, Arthur County, Nebraska, USA;leaf chewing;Orthoptera;1985
|
||||
kim&choi;kim&choi2021_A;35.41;127.48;temperate forest, mixed deciduous forest, Sangseonam hornbeam tree forest, South Korea;leaf chewing;NoOrthoptera;2016
|
||||
kim&choi;kim&choi2021_B;35.37;127.57;temperate forest, mixed deciduous forest, Mt Jirisan, banseon oak tree forest, South Korea;leaf chewing;NoOrthoptera;2016
|
||||
Knuff;Knuff2019;47.998955;8.205064;managed forests, southern Black Forest, southwestern Germany;gall maker;NoOrthoptera;2017
|
||||
kollars;kollars2013;48.32;18.1;alluvial soil with high content of clay fraction, Botanical garden of Slovak University of Agriculture, Nitra, Solavakia;mixed;NoOrthoptera;2012
|
||||
Konig2022;Konig2022;47.592457;12.940233;open grassland sites on calcareous bedrock, National Park Berchtesgaden and Lower Franconia, Bavaria, Germany;leaf chewing;Orthoptera;2019
|
||||
lewis;lewis2002;17.07;-88.69;moist tropical forest, Chiquibul Forest reserve, Cayo district, Belize, Central America;leaf miner;NoOrthoptera;1998
|
||||
Macfadyen;Macfadyen_A1;51.4;-2.69;temperate grassland, 10 organic farms in UK, south west England;mixed;NoOrthoptera;2006
|
||||
Macfadyen;Macfadyen_A10;51;-2.49;temperate grassland, 10 organic farms in UK, south west England;mixed;NoOrthoptera;2006
|
||||
Macfadyen;Macfadyen_A2;51.55;-2.49;temperate grassland, 10 organic farms in UK, south west England;mixed;NoOrthoptera;2006
|
||||
Macfadyen;Macfadyen_A3;51.51;-2.42;temperate grassland, 10 organic farms in UK, south west England;mixed;NoOrthoptera;2006
|
||||
Macfadyen;Macfadyen_A4;51.52;-2.49;temperate grassland, 10 organic farms in UK, south west England;mixed;NoOrthoptera;2006
|
||||
Macfadyen;Macfadyen_A5;51.51;-2.5;temperate grassland, 10 organic farms in UK, south west England;mixed;NoOrthoptera;2006
|
||||
Macfadyen;Macfadyen_A6;51.52;-2.48;temperate grassland, 10 organic farms in UK, south west England;mixed;NoOrthoptera;2006
|
||||
Macfadyen;Macfadyen_A7;51.32;-2.35;temperate grassland, 10 organic farms in UK, south west England;mixed;NoOrthoptera;2006
|
||||
Macfadyen;Macfadyen_A8;51.36;-2.47;temperate grassland, 10 organic farms in UK, south west England;mixed;NoOrthoptera;2006
|
||||
Macfadyen;Macfadyen_A9;51.32;-2.34;temperate grassland, 10 organic farms in UK, south west England;mixed;NoOrthoptera;2006
|
||||
Macfadyen;Macfadyen_B1;5.421563;-2.6801987;temperate grassland, 10 organic farms in UK, south west England;mixed;NoOrthoptera;2006
|
||||
Macfadyen;Macfadyen_B2;51.291623;-2.6028854;pasture fields, 10 organic farms in UK, south west England;mixed;NoOrthoptera;2006
|
||||
Macfadyen;Macfadyen_B3;51.29728;-2.4066504;pasture fields, 10 organic farms in UK, south west England;mixed;NoOrthoptera;2006
|
||||
Macfadyen;Macfadyen_B4;51.126799;-2.3271997;pasture fields, 10 organic farms in UK, south west England;mixed;NoOrthoptera;2006
|
||||
Macfadyen;Macfadyen_B5;51.069394;-2.4322512;pasture fields, 10 organic farms in UK, south west England;mixed;NoOrthoptera;2006
|
||||
Macfadyen;Macfadyen_B6;51.614902;-2.3436984;pasture fields, 10 organic farms in UK, south west England;mixed;NoOrthoptera;2006
|
||||
Macfadyen;Macfadyen_B7;51.694362;-2.1084028;pasture fields, 10 organic farms in UK, south west England;mixed;NoOrthoptera;2006
|
||||
Macfadyen;Macfadyen_B8;51.638776;-2.1769368;pasture fields, 10 organic farms in UK, south west England;mixed;NoOrthoptera;2006
|
||||
Macfadyen;Macfadyen_B9;51.685258;-2.0148078;pasture fields, 10 organic farms in UK, south west England;mixed;NoOrthoptera;2006
|
||||
Macfadyen;Macfadyen_B10;51.142195;-2.2890993;pasture fields, 10 organic farms in UK, south west England;mixed;NoOrthoptera;2006
|
||||
martins;martins2020;-17.6;-43.6;grasslands, Brazilian Cerrado , Brazil;flower eater;NoOrthoptera;2005
|
||||
Masetti;Masetti2004_A;44.16;12.22;near a 5 yr hedgerow, potato, wheat, alfalfa and wine grapes, Centro Ricerche produzioni Vegetali, Bologna Province, Italia;leaf miner;NoOrthoptera;1999
|
||||
Masetti;Masetti2004_B;45;12;near a 4 yr hedgerow, corn, wheat and beets, Bologna Province, Italia, Bologna Province, Italia;leaf miner;NoOrthoptera;1999
|
||||
Masetti;Masetti2004_C;45;12;near a 6 yr hederow and a ditch, Bologna Province, Italia, Bologna Province, Italia;leaf miner;NoOrthoptera;1999
|
||||
massa;massa2001_A;37.6;14; Sicily, Italy;leaf miner;NoOrthoptera;2000
|
||||
massa;massa2001_B;30;35; Al Bahhath, Aqaba and Dana Village, Jordan;leaf miner;NoOrthoptera;1999
|
||||
memmott;memmott1994;10.83;-85.7;tropical dry forest, Santa Rosa National Park, Guanacaste, Costa Rica;leaf miner;NoOrthoptera;1990
|
||||
muller;muller1999;51.4;-0.64;heavily grazed field, Silwood Park, Berkshire, England;sap sucker;NoOrthoptera;1995
|
||||
nakagawa;nakagawa_1;4.33;113.83;humult and udult soils, Lambir Hills National Park, Sarawak, Malaysia;seed predator;NoOrthoptera;1998
|
||||
nakagawa;nakagawa_2;4.33;113.83;tropical lowland forest, Lambir Hills National Park, Malaysia;seed predator;NoOrthoptera;1998
|
||||
novotny1;novotny2005;-5.23;145.4;primary and secondary forests, Madang province, Papua New Guinea;fruit eater;NoOrthoptera;2001
|
||||
novotny2;novotny2012;-5.23;145.4;tropical lowland forest, Madang province, Papua New Guinea;mixed;NoOrthoptera;2008
|
||||
pearson&altermatt;pearson&altermatt2013;48.54;9.04; Baden W<>rttemberg, Germany;leaf chewing;NoOrthoptera;2005
|
||||
Peralta;Peralta2017;-41.49228;173.022973;native Nothofagaceae forest and exotic pine plantations, native and exotic plantation forests, New Zealand;leaf chewing;NoOrthoptera;2011
|
||||
Pitteloud;Pitteloud2020_B1;46.25788;7.02235;alpine open grasslands, open grasslands in Swiss Alps, Switzerland;leaf chewing;Orthoptera;2016
|
||||
Pitteloud;Pitteloud2020_B2;46.26914;7.03241;alpine open grasslands, open grasslands in Swiss Alps, Switzerland;leaf chewing;Orthoptera;2016
|
||||
Pitteloud;Pitteloud2020_B3;46.27424;7.04833;alpine open grasslands, open grasslands in Swiss Alps, Switzerland;leaf chewing;Orthoptera;2016
|
||||
Pitteloud;Pitteloud2020_B4;46.28629;7.0957;alpine open grasslands, open grasslands in Swiss Alps, Switzerland;leaf chewing;Orthoptera;2016
|
||||
Pitteloud;Pitteloud2020_B5.1;46.28567;7.12451;alpine open grasslands, open grasslands in Swiss Alps, Switzerland;leaf chewing;Orthoptera;2016
|
||||
Pitteloud;Pitteloud2020_B5.2;46.29149;7.11251;alpine open grasslands, open grasslands in Swiss Alps, Switzerland;leaf chewing;Orthoptera;2016
|
||||
Pitteloud;Pitteloud2020_B6;46.26865;7.10755;alpine open grasslands, open grasslands in Swiss Alps, Switzerland;leaf chewing;Orthoptera;2016
|
||||
Pitteloud;Pitteloud2020_B7;46.27683;7.15518;alpine open grasslands, open grasslands in Swiss Alps, Switzerland;leaf chewing;Orthoptera;2016
|
||||
Pitteloud;Pitteloud2020_B8;46.26852;7.16232;alpine open grasslands, open grasslands in Swiss Alps, Switzerland;leaf chewing;Orthoptera;2016
|
||||
Pitteloud;Pitteloud2020_C1;46.86999;9.5166;alpine open grasslands, open grasslands in Swiss Alps, Switzerland;leaf chewing;Orthoptera;2016
|
||||
Pitteloud;Pitteloud2020_C2;46.87022;9.50919;alpine open grasslands, open grasslands in Swiss Alps, Switzerland;leaf chewing;Orthoptera;2016
|
||||
Pitteloud;Pitteloud2020_C3;46.8743;9.5083;alpine open grasslands, open grasslands in Swiss Alps, Switzerland;leaf chewing;Orthoptera;2016
|
||||
Pitteloud;Pitteloud2020_C4;46.87021;9.48978;alpine open grasslands, open grasslands in Swiss Alps, Switzerland;leaf chewing;Orthoptera;2016
|
||||
Pitteloud;Pitteloud2020_C5;46.87841;9.49396;alpine open grasslands, open grasslands in Swiss Alps, Switzerland;leaf chewing;Orthoptera;2016
|
||||
Pitteloud;Pitteloud2020_C6;46.88867;9.48989;alpine open grasslands, open grasslands in Swiss Alps, Switzerland;leaf chewing;Orthoptera;2016
|
||||
Pitteloud;Pitteloud2020_C7;46.88946;9.48001;alpine open grasslands, open grasslands in Swiss Alps, Switzerland;leaf chewing;Orthoptera;2016
|
||||
Pitteloud;Pitteloud2020_C8;46.89054;9.47624;alpine open grasslands, open grasslands in Swiss Alps, Switzerland;leaf chewing;Orthoptera;2016
|
||||
Pitteloud;Pitteloud2020_F1;46.4732;8.81948;alpine open grasslands, open grasslands in Swiss Alps, Switzerland;leaf chewing;Orthoptera;2016
|
||||
Pitteloud;Pitteloud2020_F2;46.4887;8.79271;alpine open grasslands, open grasslands in Swiss Alps, Switzerland;leaf chewing;Orthoptera;2016
|
||||
Pitteloud;Pitteloud2020_F3;46.4951;8.78085;alpine open grasslands, open grasslands in Swiss Alps, Switzerland;leaf chewing;Orthoptera;2016
|
||||
Pitteloud;Pitteloud2020_F4;46.5028;8.7816;alpine open grasslands, open grasslands in Swiss Alps, Switzerland;leaf chewing;Orthoptera;2016
|
||||
Pitteloud;Pitteloud2020_F5;46.50577;8.7947;alpine open grasslands, open grasslands in Swiss Alps, Switzerland;leaf chewing;Orthoptera;2016
|
||||
Pitteloud;Pitteloud2020_F6;46.50891;8.77677;alpine open grasslands, open grasslands in Swiss Alps, Switzerland;leaf chewing;Orthoptera;2016
|
||||
Pitteloud;Pitteloud2020_F7;46.51244;8.77881;alpine open grasslands, open grasslands in Swiss Alps, Switzerland;leaf chewing;Orthoptera;2016
|
||||
Pitteloud;Pitteloud2020_F8;46.51553;8.78595;alpine open grasslands, open grasslands in Swiss Alps, Switzerland;leaf chewing;Orthoptera;2016
|
||||
Pitteloud;Pitteloud2020_G1;46.63493;7.90025;alpine open grasslands, open grasslands in Swiss Alps, Switzerland;leaf chewing;Orthoptera;2016
|
||||
Pitteloud;Pitteloud2020_G2;46.63957;7.9796;alpine open grasslands, open grasslands in Swiss Alps, Switzerland;leaf chewing;Orthoptera;2016
|
||||
Pitteloud;Pitteloud2020_G3;46.64264;7.97271;alpine open grasslands, open grasslands in Swiss Alps, Switzerland;leaf chewing;Orthoptera;2016
|
||||
Pitteloud;Pitteloud2020_G4;46.64737;7.97086;alpine open grasslands, open grasslands in Swiss Alps, Switzerland;leaf chewing;Orthoptera;2016
|
||||
Pitteloud;Pitteloud2020_G5;46.64949;7.9509;alpine open grasslands, open grasslands in Swiss Alps, Switzerland;leaf chewing;Orthoptera;2016
|
||||
Pitteloud;Pitteloud2020_G6;46.65319;7.94267;alpine open grasslands, open grasslands in Swiss Alps, Switzerland;leaf chewing;Orthoptera;2016
|
||||
Pitteloud;Pitteloud2020_G7;46.65756;7.99056;alpine open grasslands, open grasslands in Swiss Alps, Switzerland;leaf chewing;Orthoptera;2016
|
||||
Pitteloud;Pitteloud2020_G8;46.66035;7.9805;alpine open grasslands, open grasslands in Swiss Alps, Switzerland;leaf chewing;Orthoptera;2016
|
||||
Pitteloud;Pitteloud2020_M1;46.12583;7.08096;alpine open grasslands, open grasslands in Swiss Alps, Switzerland;leaf chewing;Orthoptera;2016
|
||||
Pitteloud;Pitteloud2020_M2;46.06652;7.14868;alpine open grasslands, open grasslands in Swiss Alps, Switzerland;leaf chewing;Orthoptera;2016
|
||||
Pitteloud;Pitteloud2020_M3;46.05299;7.15534;alpine open grasslands, open grasslands in Swiss Alps, Switzerland;leaf chewing;Orthoptera;2016
|
||||
Pitteloud;Pitteloud2020_M4;46.01957;7.1621;alpine open grasslands, open grasslands in Swiss Alps, Switzerland;leaf chewing;Orthoptera;2016
|
||||
Pitteloud;Pitteloud2020_M5;46.02743;7.17717;alpine open grasslands, open grasslands in Swiss Alps, Switzerland;leaf chewing;Orthoptera;2016
|
||||
Pitteloud;Pitteloud2020_M6;46.03024;7.17648;alpine open grasslands, open grasslands in Swiss Alps, Switzerland;leaf chewing;Orthoptera;2016
|
||||
Pitteloud;Pitteloud2020_M7;46.03573;7.17791;alpine open grasslands, open grasslands in Swiss Alps, Switzerland;leaf chewing;Orthoptera;2016
|
||||
Pitteloud;Pitteloud2020_M8;46.03712;7.18249;alpine open grasslands, open grasslands in Swiss Alps, Switzerland;leaf chewing;Orthoptera;2016
|
||||
Pitteloud;Pitteloud2020_S1;46.31328;7.58466;alpine open grasslands, open grasslands in Swiss Alps, Switzerland;leaf chewing;Orthoptera;2016
|
||||
Pitteloud;Pitteloud2020_S2;46.32012;7.57929;alpine open grasslands, open grasslands in Swiss Alps, Switzerland;leaf chewing;Orthoptera;2016
|
||||
Pitteloud;Pitteloud2020_S3;46.3251;7.5532;alpine open grasslands, open grasslands in Swiss Alps, Switzerland;leaf chewing;Orthoptera;2016
|
||||
Pitteloud;Pitteloud2020_S4;46.33103;7.56003;alpine open grasslands, open grasslands in Swiss Alps, Switzerland;leaf chewing;Orthoptera;2016
|
||||
Pitteloud;Pitteloud2020_S5;46.3353;7.53348;alpine open grasslands, open grasslands in Swiss Alps, Switzerland;leaf chewing;Orthoptera;2016
|
||||
Pitteloud;Pitteloud2020_S6;46.34787;7.5386;alpine open grasslands, open grasslands in Swiss Alps, Switzerland;leaf chewing;Orthoptera;2016
|
||||
Pitteloud;Pitteloud2020_S7;46.34687;7.52317;alpine open grasslands, open grasslands in Swiss Alps, Switzerland;leaf chewing;Orthoptera;2016
|
||||
Pitteloud;Pitteloud2020_S8;46.35071;7.52571;alpine open grasslands, open grasslands in Swiss Alps, Switzerland;leaf chewing;Orthoptera;2016
|
||||
pocock;pocock2012_aphid;51.31;-2.32;mixed farm: 50% arable, 50% grass pasture or ley, Norwood Farm, Somerset, UK;mixed;NoOrthoptera;2008
|
||||
prado;prado2004;-19;-43;high plant diversity and endemism, Campos rupestres, Brazil;flower eater;NoOrthoptera;1996
|
||||
Saavedra;Saavedra2017_initial_A;19.575694;-105.035162;tropical dry forest succession, Chamela<6C>Cuixmala Biosphere Reserve, forest succession - recently abandoned pastures, early successional forests abandoned and intermediate successional forests abandoned, Mexico;leaf chewing;NoOrthoptera;2010
|
||||
Saavedra;Saavedra2017_initial_B;19.572095;-105.054989;tropical dry forest succession, Chamela<6C>Cuixmala Biosphere Reserve, forest succession - recently abandoned pastures, early successional forests abandoned and intermediate successional forests abandoned, Mexico;leaf chewing;NoOrthoptera;2010
|
||||
Saavedra;Saavedra2017_initial_C;19.416939;-104.895751;tropical dry forest succession, Chamela<6C>Cuixmala Biosphere Reserve, forest succession - recently abandoned pastures, early successional forests abandoned and intermediate successional forests abandoned, Mexico;leaf chewing;NoOrthoptera;2010
|
||||
Saavedra;Saavedra2017_middle_A;19.610383;-105.027566;tropical dry forest succession, Chamela<6C>Cuixmala Biosphere Reserve, forest succession - recently abandoned pastures, early successional forests abandoned and intermediate successional forests abandoned, Mexico;leaf chewing;NoOrthoptera;2010
|
||||
Saavedra;Saavedra2017_middle_B;19.578039;-105.040484;tropical dry forest succession, Chamela<6C>Cuixmala Biosphere Reserve, forest succession - recently abandoned pastures, early successional forests abandoned and intermediate successional forests abandoned, Mexico;leaf chewing;NoOrthoptera;2010
|
||||
Saavedra;Saavedra2017_middle_C;19.509043;-104.9256;tropical dry forest succession, Chamela<6C>Cuixmala Biosphere Reserve, forest succession - recently abandoned pastures, early successional forests abandoned and intermediate successional forests abandoned, Mexico;leaf chewing;NoOrthoptera;2010
|
||||
Saavedra;Saavedra2017_late_A;19.603551;-105.090781;tropical dry forest succession, Chamela<6C>Cuixmala Biosphere Reserve, forest succession - recently abandoned pastures, early successional forests abandoned and intermediate successional forests abandoned, Mexico;leaf chewing;NoOrthoptera;2010
|
||||
Saavedra;Saavedra2017_late_B;19.596921;-105.033532;tropical dry forest succession, Chamela<6C>Cuixmala Biosphere Reserve, forest succession - recently abandoned pastures, early successional forests abandoned and intermediate successional forests abandoned, Mexico;leaf chewing;NoOrthoptera;2010
|
||||
Saavedra;Saavedra2017_late_C;19.498545;-104.931694;tropical dry forest succession, Chamela<6C>Cuixmala Biosphere Reserve, forest succession - recently abandoned pastures, early successional forests abandoned and intermediate successional forests abandoned, Mexico;leaf chewing;NoOrthoptera;2010
|
||||
Sanjaya2016;Sanjaya2016;-6.862444;107.595211;Botanical Garden UPI, Bandung, Indonesia;fruit eater;NoOrthoptera;2012
|
||||
Santos;Santos2006;-29.8;-51.77; Taquari, Rio Grande do Sul, Bazil;leaf miner;NoOrthoptera;2004
|
||||
SantosdeAraujo2019;SantosdeAraujo2019;48.318611;18.081944;Nitra City Park, Nitra, Slovakia;gall maker;NoOrthoptera;2008
|
||||
seifert;seifert2020_A;42.71;141.6;temperate lowland forest, Tomakomai, Hokkaido, Japan;leaf chewing;NoOrthoptera;2015
|
||||
seifert;seifert2020_B;38.9;-78.41;temperate lowland forest, Toms Brook, Virginia, USA;leaf chewing;NoOrthoptera;2017
|
||||
seifert;seifert2020_C;48.7;16.95;temperate lowland forest, Lanzhot , Czech Republic;leaf chewing;NoOrthoptera;2015
|
||||
shimada;shimada2020;35.039324;135.187705;paddy fields and grasslands, agricultural landscape, Japan;sap sucker;NoOrthoptera;2019
|
||||
Shinohara;Shinohara2019_Abandoned_1;35.495752;135.893636;seminatural grasslands, agricultural landscape, Wakasa town, Fukui Prefecture in central Japan, Japan;mixed;NoOrthoptera;2016
|
||||
Shinohara;Shinohara2019_Abandoned_2;35.507486;135.909609;seminatural grasslands, agricultural landscape, Wakasa town, Fukui Prefecture in central Japan, Japan;mixed;NoOrthoptera;2016
|
||||
Shinohara;Shinohara2019_Abandoned_3;35.489386;135.897067;seminatural grasslands, agricultural landscape, Wakasa town, Fukui Prefecture in central Japan, Japan;mixed;NoOrthoptera;2016
|
||||
Shinohara;Shinohara2019_Abandoned_4;35.565453;135.908289;seminatural grasslands, agricultural landscape, Wakasa town, Fukui Prefecture in central Japan, Japan;mixed;NoOrthoptera;2016
|
||||
Shinohara;Shinohara2019_Extensively_managed_1;35.498043;135.895563;seminatural grasslands, agricultural landscape, Wakasa town, Fukui Prefecture in central Japan, Japan;mixed;NoOrthoptera;2016
|
||||
Shinohara;Shinohara2019_Extensively_managed_2;35.50826;135.908896;seminatural grasslands, agricultural landscape, Wakasa town, Fukui Prefecture in central Japan, Japan;mixed;NoOrthoptera;2016
|
||||
Shinohara;Shinohara2019_Extensively_managed_3;35.489949;135.89785;seminatural grasslands, agricultural landscape, Wakasa town, Fukui Prefecture in central Japan, Japan;mixed;NoOrthoptera;2016
|
||||
Shinohara;Shinohara2019_Extensively_managed_4;35.561526;135.905618;seminatural grasslands, agricultural landscape, Wakasa town, Fukui Prefecture in central Japan, Japan;mixed;NoOrthoptera;2016
|
||||
Shinohara;Shinohara2019_Intensively_managed_1;35.498448;135.898817;seminatural grasslands, agricultural landscape, Wakasa town, Fukui Prefecture in central Japan, Japan;mixed;NoOrthoptera;2016
|
||||
Shinohara;Shinohara2019_Intensively_managed_2;35.509203;135.907169;seminatural grasslands, agricultural landscape, Wakasa town, Fukui Prefecture in central Japan, Japan;mixed;NoOrthoptera;2016
|
||||
Shinohara;Shinohara2019_Intensively_managed_3;35.489111;135.89987;seminatural grasslands, agricultural landscape, Wakasa town, Fukui Prefecture in central Japan, Japan;mixed;NoOrthoptera;2016
|
||||
Shinohara;Shinohara2019_Intensively_managed_4;35.563264;135.907279;seminatural grasslands, agricultural landscape, Wakasa town, Fukui Prefecture in central Japan, Japan;mixed;NoOrthoptera;2016
|
||||
Silveira2021;Silveira2021_nonpreserved;-16.788637;-43.880394;Neotropical savanna areas in anthropized landscapes, northern Minas Gerais State, Brazil;mixed;mixed;2019
|
||||
Silveira2021;Silveira2021_preserved;-17.358135;-44.259766;Neotropical savanna areas in preserved landscapes, northern Minas Gerais State, Brazil;mixed;mixed;2019
|
||||
stary;stary2008;49.51;14.93;managed forests, Techobuz, Czech;sap sucker;NoOrthoptera;2007
|
||||
sugiura;sugiura2020;26.67;142.98;many endemic organisms: vascular plants and insects, Ogasawara Village, Tokyo metropolitan, Japan;mixed;NoOrthoptera;2008
|
||||
Sugiura2010;Sugiura2010;27.073072;142.217277;subtropical Ogasawara islands, Japan;mixed;NoOrthoptera;2008
|
||||
Szpeiner2008;Szpeiner2008;-31.403573;-64.144999;ornemental plants, Cordoba city, Argentina;sap sucker;NoOrthoptera;2002
|
||||
Tahar2015;Tahar2015;34.834667;5.738862;Biskra province, Algeria;sap sucker;NoOrthoptera;2014
|
||||
tavakilian;tavakilian1997;5.41;-52.97;swamp forests to mixed forests on well-drained soil, Sinnamary River Basin, French Guiana;deadwood eater;NoOrthoptera;1993
|
||||
tcharntke;tcharntke;49;8.32; Karlsruhe, southwest Germany;leaf miner;NoOrthoptera;1989
|
||||
Todorov2014;Todorov2014;42.157088;24.82175;Experimentally Field of Agriculture University, Agro-ecosystems, Plovdiv Region, Bulgaria;sap sucker;NoOrthoptera;2006
|
||||
ueckert;ueckert;40.17;-103.21;mixed-grass prairie, 27 km north of Akron, Washington county, Colorado;leaf chewing;Orthoptera;1968
|
||||
Volf2017;Volf2017_Tomakomai;42.716667;141.6;Tomakomai forest, Japan;mixed;NoOrthoptera;2014
|
||||
Volf2017;Volf2017_Lanzhot;48.8;17.083333;Lanzhot forest, Czech Republic;mixed;NoOrthoptera;2014
|
||||
Volf2017;Volf2017_Mikulcice;48.683333;16.933333;Mikulcice forest, Czech Republic;mixed;NoOrthoptera;2014
|
||||
Xi2020;Xi2020;32.763004;102.518629;alpine meadow in the eastern Qinghai-Tibetan Plateau, Hongyuan County, Sichuan Province, China;seed predator;NoOrthoptera;2016
|
||||
Zhu;Zhu2018;23.45;111.883333;subtropical forest, subtropical forest located within Heishiding Nature Reserve, south China, China;leaf chewing;NoOrthoptera;2014
|
||||
|
BIN
Rcodes/real_data/data/supinfo.xlsx
Normal file
BIN
Rcodes/real_data/data/supinfo.xlsx
Normal file
Binary file not shown.
115
Rcodes/real_data/netclustering_CoOPLBM_completed.R
Normal file
115
Rcodes/real_data/netclustering_CoOPLBM_completed.R
Normal file
|
|
@ -0,0 +1,115 @@
|
|||
require("ggplot2")
|
||||
require("tictoc")
|
||||
require("tidyverse")
|
||||
|
||||
devtools::load_all("R/")
|
||||
|
||||
# Importation of data
|
||||
interaction_data <- read.table(file = "real_data/data/interaction-data.txt", sep = "\t", header = TRUE)
|
||||
|
||||
seq_ids_network_aggreg <- unique(interaction_data$id_network_aggreg)
|
||||
names_aggreg_networks <- sapply(
|
||||
seq_ids_network_aggreg,
|
||||
function(id) {
|
||||
paste0(
|
||||
unique(interaction_data[which(interaction_data$id_network_aggreg == id), ]$web),
|
||||
collapse = "+"
|
||||
)
|
||||
}
|
||||
)
|
||||
|
||||
# Full data
|
||||
if (!file.exists("real_data/data/dore-matrices.Rds")) {
|
||||
# Computation of incidence matrices
|
||||
incidence_matrices <- lapply(
|
||||
seq_ids_network_aggreg,
|
||||
function(m) {
|
||||
current_interaction_data <- interaction_data[which(interaction_data$id_network_aggreg == m), ] %>%
|
||||
mutate(
|
||||
plantaggreg = paste(plantorder,
|
||||
plantfamily, plantgenus, plantspecies,
|
||||
sep = " "
|
||||
),
|
||||
insectaggreg = paste(insectorder,
|
||||
insectfamily, insectgenus, insectspecies,
|
||||
sep = " "
|
||||
)
|
||||
)
|
||||
current_interaction_data <- table(current_interaction_data$insectaggreg, current_interaction_data$plantaggreg)
|
||||
|
||||
current_incidence_matrix <- matrix(current_interaction_data,
|
||||
ncol = ncol(current_interaction_data), dimnames = dimnames(current_interaction_data)
|
||||
)
|
||||
|
||||
current_incidence_matrix[which(current_incidence_matrix > 0)] <- 1
|
||||
return(current_incidence_matrix)
|
||||
}
|
||||
)
|
||||
|
||||
names(incidence_matrices) <- names_aggreg_networks
|
||||
|
||||
saveRDS(incidence_matrices, file = "real_data/data/dore-matrices.Rds")
|
||||
} else {
|
||||
incidence_matrices <- readRDS(file = "real_data/data/dore-matrices.Rds")
|
||||
}
|
||||
# Emre completed Data
|
||||
completed_networks_ids <- as.numeric(names(readRDS("real_data/data/Data.rds")))
|
||||
completed_networks_names <- sapply(
|
||||
completed_networks_ids,
|
||||
function(id) {
|
||||
paste0(
|
||||
unique(interaction_data[which(interaction_data$id_network_aggreg == id), ]$web),
|
||||
collapse = "+"
|
||||
)
|
||||
}
|
||||
)
|
||||
|
||||
uncompleted <- incidence_matrices[match(completed_networks_names, names(incidence_matrices))]
|
||||
point_2_completed <- setNames(readRDS("real_data/data/completed0.2.rds"), completed_networks_names)
|
||||
point_5_completed <- setNames(readRDS("real_data/data/completed0.5.rds"), completed_networks_names)
|
||||
random_completed <- setNames(readRDS("real_data/data/completedrandom.rds"), completed_networks_names)
|
||||
|
||||
if (!exists("arg")) {
|
||||
arg <- commandArgs(trailingOnly = TRUE)
|
||||
}
|
||||
|
||||
if (identical(arg, character(0))) {
|
||||
number_of_net <- length(uncompleted)
|
||||
model <- "iid"
|
||||
nb_run <- 1
|
||||
source_data <- eval(as.name("uncompleted"))
|
||||
data_name <- "uncompleted"
|
||||
} else {
|
||||
number_of_net <- as.numeric(arg[1])
|
||||
model <- arg[2]
|
||||
nb_run <- as.numeric(arg[3])
|
||||
source_data <- eval(as.name(arg[4]))
|
||||
data_name <- arg[4]
|
||||
}
|
||||
print(number_of_net)
|
||||
print(model)
|
||||
print(nb_run)
|
||||
print(data_name)
|
||||
tic()
|
||||
list_collection <- clusterize_bipartite_networks(
|
||||
netlist = source_data[1:number_of_net],
|
||||
net_id = names(source_data)[1:number_of_net],
|
||||
colsbm_model = model,
|
||||
nb_run = nb_run,
|
||||
global_opts = list(
|
||||
nb_cores = parallel::detectCores() - 1, verbosity = 4,
|
||||
plot_details = 0
|
||||
),
|
||||
silent_parallelization = FALSE
|
||||
)
|
||||
toc()
|
||||
|
||||
saveRDS(list_collection, file = paste0(
|
||||
"real_data/data/",
|
||||
"dore_",
|
||||
data_name,
|
||||
"_collection_clustering_nb_run_", nb_run, "_", model, "_",
|
||||
number_of_net, "_networks_",
|
||||
format(Sys.time(), "%d-%m-%y-%X"),
|
||||
".Rds"
|
||||
))
|
||||
90
Rcodes/real_data/netclustering_check_real_data.R
Normal file
90
Rcodes/real_data/netclustering_check_real_data.R
Normal file
|
|
@ -0,0 +1,90 @@
|
|||
require("ggplot2")
|
||||
require("tictoc")
|
||||
require("tidyverse")
|
||||
|
||||
devtools::load_all("R/")
|
||||
|
||||
# Importation of data
|
||||
if (!file.exists("real_data/data/dore-matrices.Rds")) {
|
||||
interaction_data <- read.table(file = "real_data/data/interaction-data.txt", sep = "\t", header = TRUE)
|
||||
|
||||
seq_ids_network_aggreg <- unique(interaction_data$id_network_aggreg)
|
||||
names_aggreg_networks <- sapply(
|
||||
seq_ids_network_aggreg,
|
||||
function(id) {
|
||||
paste0(
|
||||
unique(interaction_data[which(interaction_data$id_network_aggreg == id), ]$web),
|
||||
collapse = "+"
|
||||
)
|
||||
}
|
||||
)
|
||||
# Computation of incidence matrices
|
||||
incidence_matrices <- lapply(
|
||||
seq_ids_network_aggreg,
|
||||
function(m) {
|
||||
current_interaction_data <- interaction_data[which(interaction_data$id_network_aggreg == m), ] %>%
|
||||
mutate(
|
||||
plantaggreg = paste(plantorder,
|
||||
plantfamily, plantgenus, plantspecies,
|
||||
sep = "-"
|
||||
),
|
||||
insectaggreg = paste(insectorder,
|
||||
insectfamily, insectgenus, insectspecies,
|
||||
sep = "-"
|
||||
)
|
||||
)
|
||||
current_interaction_data <- table(current_interaction_data$plantaggreg, current_interaction_data$insectaggreg)
|
||||
|
||||
current_incidence_matrix <- matrix(current_interaction_data,
|
||||
ncol = ncol(current_interaction_data), dimnames = dimnames(current_interaction_data)
|
||||
)
|
||||
|
||||
current_incidence_matrix[which(current_incidence_matrix > 0)] <- 1
|
||||
return(current_incidence_matrix)
|
||||
}
|
||||
)
|
||||
|
||||
names(incidence_matrices) <- names_aggreg_networks
|
||||
|
||||
saveRDS(incidence_matrices, file = "real_data/data/dore-matrices.Rds")
|
||||
} else {
|
||||
incidence_matrices <- readRDS(file = "real_data/data/dore-matrices.Rds")
|
||||
}
|
||||
|
||||
if (!exists("arg")) {
|
||||
arg <- commandArgs(trailingOnly = TRUE)
|
||||
}
|
||||
|
||||
if (identical(arg, character(0))) {
|
||||
number_of_net <- length(incidence_matrices)
|
||||
model <- "pirho"
|
||||
nb_run <- 3
|
||||
} else {
|
||||
number_of_net <- as.numeric(arg[1])
|
||||
model <- arg[2]
|
||||
nb_run <- as.numeric(arg[3])
|
||||
}
|
||||
print(number_of_net)
|
||||
print(model)
|
||||
print(nb_run)
|
||||
tic()
|
||||
list_collection <- clusterize_bipartite_networks(
|
||||
netlist = incidence_matrices[1:number_of_net],
|
||||
net_id = names(incidence_matrices)[1:number_of_net],
|
||||
colsbm_model = model,
|
||||
nb_run = nb_run,
|
||||
global_opts = list(
|
||||
nb_cores = parallel::detectCores() - 1, verbosity = 4,
|
||||
plot_details = 0
|
||||
),
|
||||
silent_parallelization = TRUE
|
||||
)
|
||||
toc()
|
||||
|
||||
saveRDS(list_collection, file = paste0(
|
||||
"real_data/data/",
|
||||
"dore_collection_clustering_nb_run", nb_run,"_",model,"_",
|
||||
number_of_net, "networks_",
|
||||
format(Sys.time(), "%d-%m-%y-%X"),
|
||||
".Rds"
|
||||
))
|
||||
76
Rcodes/real_data/netclustering_dore_analyze.R
Normal file
76
Rcodes/real_data/netclustering_dore_analyze.R
Normal file
|
|
@ -0,0 +1,76 @@
|
|||
require("ggplot2")
|
||||
devtools::load_all("R/")
|
||||
|
||||
## Preparing data for analysis
|
||||
interaction_data <- read.table(file = "real_data/data/interaction-data.txt", sep = "\t", header = TRUE)
|
||||
|
||||
insect_orders <- unique(interaction_data$insectorder)
|
||||
plant_family <- unique(interaction_data$plantfamily)
|
||||
|
||||
# All results
|
||||
iid_clustering <- readRDS("real_data/data/dore_collection_clustering_nb_run1_iid_123networks_24-05-23-21:40:42.Rds")
|
||||
iid_best_partition <- extract_best_bipartite_partition(iid_clustering)
|
||||
iid_unlist <- unlist(iid_best_partition)
|
||||
|
||||
|
||||
rho_clustering <- readRDS("real_data/data/dore_collection_clustering_nb_run1_rho_123networks_25-05-23-13:58:30.Rds")
|
||||
rho_best_partition <- extract_best_bipartite_partition(rho_clustering)
|
||||
rho_unlist <- unlist(rho_best_partition)
|
||||
|
||||
pi_clustering <- readRDS("real_data/data/dore_collection_clustering_nb_run1_pi_123networks_25-05-23-17:31:25.Rds")
|
||||
pi_best_partition <- extract_best_bipartite_partition(pi_clustering)
|
||||
pi_unlist <- unlist(pi_best_partition)
|
||||
|
||||
pirho_clustering <- readRDS("real_data/data/dore_collection_clustering_nb_run1_pirho_123networks_26-05-23-19:22:55.Rds")
|
||||
pirho_best_partition <- extract_best_bipartite_partition(pirho_clustering)
|
||||
pirho_unlist <- unlist(pirho_best_partition)
|
||||
|
||||
### Matching taxonomy
|
||||
taxonomy_in_clusters <- function(unlisted_model) {
|
||||
lapply(seq_len(length(unlisted_model)), function(col_idx) {
|
||||
# Per collection
|
||||
# Empty init
|
||||
insect_count <- t(sapply(insect_orders, function(order) {
|
||||
out_count <- rep(0, unlisted_model[[col_idx]]$Q[2])
|
||||
out_count
|
||||
}))
|
||||
|
||||
plant_count <- t(sapply(plant_family, function(order) {
|
||||
out_count <- rep(0, unlisted_model[[col_idx]]$Q[1])
|
||||
out_count
|
||||
}))
|
||||
|
||||
for (m in seq.int(unlisted_model[[col_idx]]$M)) {
|
||||
#### Insect
|
||||
insect_names <- names(unlisted_model[[col_idx]]$Z[[1]][[2]])
|
||||
|
||||
insect_count <- insect_count + t(sapply(insect_orders, function(order) {
|
||||
out_count <- rep(0, unlisted_model[[col_idx]]$Q[2])
|
||||
names(out_count) <- seq.int(unlisted_model[[col_idx]]$Q[2])
|
||||
insect_count <- table(unlisted_model[[col_idx]]$Z[[m]][[2]][grep(order, insect_names)])
|
||||
out_count[names(insect_count)] <- insect_count
|
||||
out_count
|
||||
}))
|
||||
#### Plants
|
||||
plant_names <- names(unlisted_model[[col_idx]]$Z[[1]][[1]])
|
||||
|
||||
plant_count <- t(sapply(plant_family, function(order) {
|
||||
out_count <- rep(0, unlisted_model[[col_idx]]$Q[1])
|
||||
names(out_count) <- seq.int(unlisted_model[[col_idx]]$Q[1])
|
||||
plant_count <- table(unlisted_model[[col_idx]]$Z[[m]][[1]][grep(order, plant_names)])
|
||||
out_count[names(plant_count)] <- plant_count
|
||||
out_count
|
||||
}))
|
||||
}
|
||||
return(list(insects = insect_count, plants = plant_count))
|
||||
})
|
||||
}
|
||||
|
||||
taxonomy_remove_empty <- function(taxonomy_collections_list) {
|
||||
lapply(taxonomy_collections_list, function(collection) {
|
||||
list(
|
||||
insects = collection$insects[which(rowSums(collection$insects != 0) > 0), ],
|
||||
plants = collection$plants[which(rowSums(collection$plants != 0) > 0), ]
|
||||
)
|
||||
})
|
||||
}
|
||||
527
Rcodes/real_data/presentation_dore.Rmd
Normal file
527
Rcodes/real_data/presentation_dore.Rmd
Normal file
|
|
@ -0,0 +1,527 @@
|
|||
---
|
||||
title: "Présentation de l'application de colSBM sur Doré et al. 2020"
|
||||
output:
|
||||
html_document:
|
||||
toc: true
|
||||
theme: united
|
||||
pdf_document:
|
||||
keep_tex: true
|
||||
---
|
||||
|
||||
```{r, setup, include=FALSE, warning=FALSE}
|
||||
knitr::opts_chunk$set(echo = FALSE)
|
||||
options(knitr.table.format = function() {
|
||||
if (knitr::is_latex_output()) "latex" else "pandoc"
|
||||
})
|
||||
```
|
||||
|
||||
```{r require_lib, echo = FALSE, include=FALSE, warning=FALSE}
|
||||
require("tidyverse")
|
||||
require("knitr")
|
||||
devtools::load_all("R/")
|
||||
source("real_data/temporary_plot.R")
|
||||
```
|
||||
|
||||
```{r pretty_matrix_print, echo = FALSE, warning=FALSE}
|
||||
# Define a generic method that transforms an object x in a LaTeX string
|
||||
as_latex <- function(x, ...) {
|
||||
UseMethod("as_latex", x)
|
||||
}
|
||||
|
||||
# Define a class latex for LaTeX expressions
|
||||
as_latex.character <- function(x) {
|
||||
structure(
|
||||
paste(x, collapse = " "),
|
||||
class = c("latex", "character")
|
||||
)
|
||||
}
|
||||
|
||||
# A character string of class latex is rendered in display mode
|
||||
# Define a knit_print() method for the latex class
|
||||
knit_print.latex <- function(x, ...) {
|
||||
knitr::asis_output(
|
||||
paste0("$$", x, "$$")
|
||||
)
|
||||
}
|
||||
|
||||
# Now, define a method as_latex for matrix
|
||||
as_latex.matrix <- function(x, ...) {
|
||||
as_latex(c(
|
||||
"\\begin{bmatrix}",
|
||||
paste(
|
||||
t(x),
|
||||
rep(c(rep("&", nrow(x) - 1), "\\\\"), ncol(x)),
|
||||
collapse = ""
|
||||
),
|
||||
"\\end{bmatrix}"
|
||||
))
|
||||
}
|
||||
|
||||
# Indicate to knitr that matrix are rendered as latex
|
||||
knit_print.matrix <- function(x, ...) {
|
||||
knitr::knit_print(as_latex(round(x, 2)))
|
||||
}
|
||||
```
|
||||
|
||||
```{r better_collection_extraction, echo = FALSE, warning=FALSE}
|
||||
extract_unlist_reorder <- function(clustering_data_path) {
|
||||
clustering <- readRDS(clustering_data_path)
|
||||
if (!is.list(clustering)) {
|
||||
clustering <- list(clustering)
|
||||
}
|
||||
best_partition <- extract_best_bipartite_partition(clustering)
|
||||
best_partition_unlist <- unlist(best_partition)
|
||||
if (!is.list(best_partition_unlist)) {
|
||||
best_partition_unlist <- list(best_partition_unlist)
|
||||
}
|
||||
size <- length(best_partition_unlist)
|
||||
return(setNames(lapply(best_partition_unlist, function(collection) {
|
||||
reorder_parameters(collection)
|
||||
}), paste(rep("Collection", size), seq_len(size))))
|
||||
}
|
||||
|
||||
meso_print <- function(unlisted_partition) {
|
||||
for (idx in seq_along(unlisted_partition)) {
|
||||
cat(paste("\n\n\n\nPour la collection", idx, "\n\n"))
|
||||
print(plot(unlisted_partition[[idx]], type = "meso", mixture = TRUE))
|
||||
cat("\n\n")
|
||||
print(knitr::kable(unlisted_partition[[idx]]$net_id, col.names = "Networks"))
|
||||
}
|
||||
}
|
||||
|
||||
alpha_print <- function(unlisted_partition) {
|
||||
for (idx in seq_along(unlisted_partition)) {
|
||||
cat(paste("\nPour la collection ", idx, ":\n"))
|
||||
cat(knitr::knit_print(unlisted_partition[[idx]]$alpha))
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
```{r taxonomy_functions, echo = FALSE, warning=FALSE}
|
||||
interaction_data <- read.table(file = "real_data/data/interaction-data.txt", sep = "\t", header = TRUE)
|
||||
|
||||
insect_orders <- unique(interaction_data$insectorder)
|
||||
plant_family <- unique(interaction_data$plantorder)
|
||||
|
||||
insect_orders[is.na(insect_orders)] <- "NA"
|
||||
plant_family[is.na(plant_family)] <- "NA"
|
||||
### Matching taxonomy
|
||||
taxonomy_in_clusters <- function(unlisted_model) {
|
||||
if (is.list(unlisted_model)) {
|
||||
lapply(seq_len(length(unlisted_model)), function(col_idx) {
|
||||
# Per collection
|
||||
# Empty init
|
||||
insect_count <- t(sapply(insect_orders, function(order) {
|
||||
out_count <- rep(0, unlisted_model[[col_idx]]$Q[2])
|
||||
out_count
|
||||
}))
|
||||
|
||||
plant_count <- t(sapply(plant_family, function(order) {
|
||||
out_count <- rep(0, unlisted_model[[col_idx]]$Q[1])
|
||||
out_count
|
||||
}))
|
||||
|
||||
for (m in seq.int(unlisted_model[[col_idx]]$M)) {
|
||||
#### Insect
|
||||
insect_names <- names(unlisted_model[[col_idx]]$Z[[1]][[2]])
|
||||
|
||||
insect_count <- insect_count + t(sapply(insect_orders, function(order) {
|
||||
out_count <- rep(0, unlisted_model[[col_idx]]$Q[2])
|
||||
names(out_count) <- seq.int(unlisted_model[[col_idx]]$Q[2])
|
||||
insect_count <- table(unlisted_model[[col_idx]]$Z[[m]][[2]][grep(order, insect_names)])
|
||||
out_count[names(insect_count)] <- insect_count
|
||||
out_count
|
||||
}))
|
||||
#### Plants
|
||||
plant_names <- names(unlisted_model[[col_idx]]$Z[[1]][[1]])
|
||||
|
||||
plant_count <- t(sapply(plant_family, function(order) {
|
||||
out_count <- rep(0, unlisted_model[[col_idx]]$Q[1])
|
||||
names(out_count) <- seq.int(unlisted_model[[col_idx]]$Q[1])
|
||||
plant_count <- table(unlisted_model[[col_idx]]$Z[[m]][[1]][grep(order, plant_names)])
|
||||
out_count[names(plant_count)] <- plant_count
|
||||
out_count
|
||||
}))
|
||||
}
|
||||
return(list(insects = insect_count, plants = plant_count))
|
||||
})
|
||||
} else {
|
||||
# Per collection
|
||||
# Empty init
|
||||
insect_count <- t(sapply(insect_orders, function(order) {
|
||||
out_count <- rep(0, unlisted_model$Q[2])
|
||||
out_count
|
||||
}))
|
||||
|
||||
plant_count <- t(sapply(plant_family, function(order) {
|
||||
out_count <- rep(0, unlisted_model$Q[1])
|
||||
out_count
|
||||
}))
|
||||
|
||||
for (m in seq.int(unlisted_model$M)) {
|
||||
#### Insect
|
||||
insect_names <- names(unlisted_model$Z[[1]][[2]])
|
||||
|
||||
insect_count <- insect_count + t(sapply(insect_orders, function(order) {
|
||||
out_count <- rep(0, unlisted_model$Q[2])
|
||||
names(out_count) <- seq.int(unlisted_model$Q[2])
|
||||
insect_count <- table(unlisted_model$Z[[m]][[2]][grep(order, insect_names)])
|
||||
out_count[names(insect_count)] <- insect_count
|
||||
out_count
|
||||
}))
|
||||
#### Plants
|
||||
plant_names <- names(unlisted_model$Z[[1]][[1]])
|
||||
|
||||
plant_count <- t(sapply(plant_family, function(order) {
|
||||
out_count <- rep(0, unlisted_model$Q[1])
|
||||
names(out_count) <- seq.int(unlisted_model$Q[1])
|
||||
plant_count <- table(unlisted_model$Z[[m]][[1]][grep(order, plant_names)])
|
||||
out_count[names(plant_count)] <- plant_count
|
||||
out_count
|
||||
}))
|
||||
}
|
||||
return(list(list(insects = insect_count, plants = plant_count)))
|
||||
}
|
||||
}
|
||||
|
||||
taxonomy_remove_empty <- function(taxonomy_collections_list) {
|
||||
lapply(taxonomy_collections_list, function(collection) {
|
||||
list(
|
||||
insects = collection$insects[which(rowSums(collection$insects != 0) > 0), ],
|
||||
plants = collection$plants[which(rowSums(collection$plants != 0) > 0), ]
|
||||
)
|
||||
})
|
||||
}
|
||||
|
||||
get_formatted_data <- function(collection, group, max_rank = 6) {
|
||||
collection[[group]] %>%
|
||||
as.data.frame() %>% # Transformation en data frame
|
||||
mutate(
|
||||
Total = rowSums(.),
|
||||
Rang = rank(-Total, ties.method = "min")
|
||||
) %>% # Creation d'une colonne Total
|
||||
rownames_to_column(var = "TaxonBrut") %>%
|
||||
mutate(Taxon = ifelse(Rang <= max_rank & Total > 0, TaxonBrut, "Other")) %>%
|
||||
arrange(Rang) %>%
|
||||
mutate(Taxon = factor(Taxon, levels = unique(Taxon))) %>%
|
||||
select(-Total, -TaxonBrut, -Rang) %>%
|
||||
pivot_longer(
|
||||
cols = -c("Taxon"),
|
||||
names_to = "Bloc",
|
||||
values_to = "Nombre",
|
||||
names_prefix = "V"
|
||||
) %>%
|
||||
group_by(Taxon, Bloc) %>%
|
||||
summarise_all(sum) %>%
|
||||
ungroup() %>%
|
||||
group_by(Bloc) %>%
|
||||
mutate(Proportion = Nombre / sum(Nombre)) %>%
|
||||
ungroup() %>%
|
||||
mutate(Group = group)
|
||||
}
|
||||
|
||||
taxonomy_plot <- function(data, insects_or_plants, model, stack_or_fill) {
|
||||
plots <- filter(data, Group == insects_or_plants) %>%
|
||||
ggplot(aes(x = Bloc, y = Nombre, fill = Taxon)) +
|
||||
geom_bar(stat = "identity", position = stack_or_fill) +
|
||||
labs(x = "Block", y = "Number of Nodes", fill = "Taxonomy") +
|
||||
theme_minimal() +
|
||||
theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 1)) +
|
||||
ggtitle(paste(
|
||||
ifelse(insects_or_plants == "insects",
|
||||
"Pollinators", "Plants"
|
||||
), "repartition (",
|
||||
ifelse(stack_or_fill == "stack", "absolute", "proportion"),
|
||||
") in the", model, "clustering"
|
||||
)) +
|
||||
facet_wrap(~Collection, ncol = 3, scales = "free_x")
|
||||
|
||||
# Arrange the plots in a grid layout
|
||||
gridExtra::grid.arrange(plots, newpage = TRUE)
|
||||
}
|
||||
```
|
||||
|
||||
```{r load data, echo = FALSE, include = FALSE, warning=FALSE}
|
||||
# All results
|
||||
iid_unlist <- extract_unlist_reorder("real_data/data/dore_collection_clustering_nb_run1_iid_123networks_24-05-23-21:40:42.Rds")
|
||||
|
||||
rho_unlist <- extract_unlist_reorder("real_data/data/dore_collection_clustering_nb_run1_rho_123networks_25-05-23-13:58:30.Rds")
|
||||
|
||||
pi_unlist <- extract_unlist_reorder("real_data/data/dore_collection_clustering_nb_run1_pi_123networks_25-05-23-17:31:25.Rds")
|
||||
|
||||
pirho_unlist <- extract_unlist_reorder("real_data/data/dore_collection_clustering_nb_run1_pirho_123networks_26-05-23-19:22:55.Rds")
|
||||
```
|
||||
|
||||
## Clustering avec le modèle iid
|
||||
Avec le modèle *iid* nous obtenons les `r length(iid_unlist)` collections et
|
||||
les structures suivantes:
|
||||
|
||||
```{r iid_meso_plot, echo = FALSE, message=FALSE, results="asis", fig.cap=paste(names(iid_unlist), rep("- iid",length(iid_unlist))), warning=FALSE}
|
||||
meso_print(iid_unlist)
|
||||
```
|
||||
|
||||
Et voici donc les valeurs numériques pour les $\alpha$ (paramètres de connectivité).
|
||||
|
||||
```{r iid_alpha, echo = FALSE, results="asis", warning=FALSE}
|
||||
alpha_print(iid_unlist)
|
||||
```
|
||||
### Comparaison avec des infos supplémentaires
|
||||
```{r supinfo, echo = FALSE}
|
||||
supinfo <- readxl::read_xlsx("real_data/data/supinfo.xlsx", sheet = 2)
|
||||
interaction_data <- read.table(file = "real_data/data/interaction-data.txt", sep = "\t", header = TRUE)
|
||||
|
||||
seq_ids_network_aggreg <- unique(interaction_data$id_network_aggreg)
|
||||
incidence_matrices <- readRDS(file = "real_data/data/dore-matrices.Rds")
|
||||
names_aggreg_networks <- names(incidence_matrices)
|
||||
vectorClusteringNet <- numeric(nrow(supinfo))
|
||||
for (k in 1:length(iid_unlist)) {
|
||||
idclust <- match(iid_unlist[[k]]$net_id, names_aggreg_networks)
|
||||
supinfoclust <- match(seq_ids_network_aggreg[idclust], supinfo$Idweb)
|
||||
vectorClusteringNet[supinfoclust] <- k
|
||||
}
|
||||
```
|
||||
|
||||
```{r Annual_timespan_plot, echo = FALSE}
|
||||
ggplot(supinfo) +
|
||||
aes(
|
||||
y = Annual_time_span,
|
||||
x = vectorClusteringNet, group = vectorClusteringNet, fill = as.factor(vectorClusteringNet)
|
||||
) +
|
||||
xlab("Numéro de collection") +
|
||||
ylab("Annual time span") +
|
||||
guides(fill = guide_legend(title = "Numéro\nde collection")) +
|
||||
geom_boxplot()
|
||||
```
|
||||
|
||||
### Répartition dans les clusters selon la taxonomie
|
||||
```{r iid_taxonomy, echo = FALSE, warning=FALSE}
|
||||
iid_taxonomy <- taxonomy_in_clusters(iid_unlist)
|
||||
|
||||
iid_taxonomy_long <- map_dfr(iid_taxonomy,
|
||||
function(collection) {
|
||||
map_dfr(
|
||||
c("insects", "plants"),
|
||||
function(group) {
|
||||
get_formatted_data(collection, group)
|
||||
}
|
||||
)
|
||||
},
|
||||
.id = "Collection"
|
||||
)
|
||||
```
|
||||
|
||||
```{r iid_plot_taxonomy_pollinators, echo = FALSE, message = FALSE,fig.cap = 'Pollinators repartition for the iid model regarding taxonomy', warning=FALSE}
|
||||
# Pollinators
|
||||
taxonomy_plot(
|
||||
data = iid_taxonomy_long,
|
||||
insects_or_plants = "insects",
|
||||
stack_or_fill = "stack", model = "iid"
|
||||
)
|
||||
taxonomy_plot(
|
||||
data = iid_taxonomy_long,
|
||||
insects_or_plants = "insects",
|
||||
stack_or_fill = "fill", model = "iid"
|
||||
)
|
||||
```
|
||||
|
||||
```{r iid_plot_taxonomy_plants, echo = FALSE, message = FALSE,fig.cap = 'Plants repartition for the iid model regarding taxonomy', warning=FALSE}
|
||||
taxonomy_plot(
|
||||
data = iid_taxonomy_long,
|
||||
insects_or_plants = "plants",
|
||||
stack_or_fill = "stack", model = "iid"
|
||||
)
|
||||
taxonomy_plot(
|
||||
data = iid_taxonomy_long,
|
||||
insects_or_plants = "plants",
|
||||
stack_or_fill = "fill", model = "iid"
|
||||
)
|
||||
```
|
||||
|
||||
#### Tables
|
||||
```{r iid_taxo_tables, echo = FALSE}
|
||||
iid_taxonomy_long %>%
|
||||
filter(Group == "insects") %>%
|
||||
group_by(Taxon) %>%
|
||||
mutate(row_id = row_number()) %>%
|
||||
pivot_wider(names_from = c(Collection, Bloc), values_from = Nombre, names_glue = "Collection_{Collection}_Bloc_{Bloc}") %>%
|
||||
ungroup() %>%
|
||||
select(-row_id) %>%
|
||||
group_by(Taxon) %>%
|
||||
summarize(across(starts_with("Collection"), ~ na.omit(.)[1])) %>%
|
||||
ungroup()
|
||||
```
|
||||
|
||||
|
||||
## Clustering avec le modèle pi
|
||||
Avec le modèle *pi* nous obtenons les `r length(pi_unlist)` collections et
|
||||
les structures suivantes:
|
||||
|
||||
```{r pi_meso_plot, echo = FALSE, message=FALSE, fig.cap=paste(names(pi_unlist), rep("- pi",length(pi_unlist))), results="asis", warning=FALSE}
|
||||
meso_print(pi_unlist)
|
||||
```
|
||||
|
||||
Et voici donc les valeurs numériques pour les $\alpha$ (paramètres de connectivité).
|
||||
|
||||
```{r pi_alpha, echo = FALSE, results="asis", warning=FALSE}
|
||||
alpha_print(pi_unlist)
|
||||
```
|
||||
|
||||
### Répartition dans les clusters selon la taxonomie
|
||||
```{r pi_taxonomy, echo = FALSE, warning=FALSE}
|
||||
pi_taxonomy <- taxonomy_in_clusters(pi_unlist)
|
||||
|
||||
pi_taxonomy_long <- map_dfr(pi_taxonomy,
|
||||
function(collection) {
|
||||
map_dfr(
|
||||
c("insects", "plants"),
|
||||
function(group) {
|
||||
get_formatted_data(collection, group)
|
||||
}
|
||||
)
|
||||
},
|
||||
.id = "Collection"
|
||||
)
|
||||
```
|
||||
|
||||
```{r pi_plot_taxonomy_pollinators, echo = FALSE, message = FALSE,fig.cap = 'Pollinators repartition for the pi model regarding taxonomy', warning=FALSE}
|
||||
# Pollinators
|
||||
taxonomy_plot(
|
||||
data = pi_taxonomy_long,
|
||||
insects_or_plants = "insects",
|
||||
stack_or_fill = "stack", model = "pi"
|
||||
)
|
||||
taxonomy_plot(
|
||||
data = pi_taxonomy_long,
|
||||
insects_or_plants = "insects",
|
||||
stack_or_fill = "fill", model = "pi"
|
||||
)
|
||||
```
|
||||
|
||||
```{r pi_plot_taxonomy_plants, echo = FALSE, message = FALSE,fig.cap = 'Plants repartition for the pi model regarding taxonomy', warning=FALSE}
|
||||
taxonomy_plot(
|
||||
data = pi_taxonomy_long,
|
||||
insects_or_plants = "plants",
|
||||
stack_or_fill = "stack", model = "pi"
|
||||
)
|
||||
taxonomy_plot(
|
||||
data = pi_taxonomy_long,
|
||||
insects_or_plants = "plants",
|
||||
stack_or_fill = "fill", model = "pi"
|
||||
)
|
||||
```
|
||||
|
||||
## Clustering avec le modèle rho
|
||||
Avec le modèle *rho* nous obtenons les `r length(rho_unlist)` collections et
|
||||
les structures suivantes:
|
||||
|
||||
```{r rho_meso_plot, echo = FALSE, message=FALSE, fig.cap=paste(names(rho_unlist), rep("- rho",length(rho_unlist))), results="asis", warning=FALSE}
|
||||
meso_print(rho_unlist)
|
||||
```
|
||||
|
||||
Et voici donc les valeurs numériques pour les $\alpha$ (paramètres de connectivité).
|
||||
|
||||
```{r rho_alpha, echo = FALSE, results="asis", warning=FALSE}
|
||||
alpha_print(rho_unlist)
|
||||
```
|
||||
|
||||
### Répartition dans les clusters selon la taxonomie
|
||||
```{r rho_taxonomy, echo = FALSE, warning=FALSE}
|
||||
rho_taxonomy <- taxonomy_in_clusters(rho_unlist)
|
||||
|
||||
rho_taxonomy_long <- map_dfr(rho_taxonomy,
|
||||
function(collection) {
|
||||
map_dfr(
|
||||
c("insects", "plants"),
|
||||
function(group) {
|
||||
get_formatted_data(collection, group)
|
||||
}
|
||||
)
|
||||
},
|
||||
.id = "Collection"
|
||||
)
|
||||
```
|
||||
|
||||
```{r rho_plot_taxonomy_pollinators, echo = FALSE, message = FALSE,fig.cap = 'Pollinators repartition for the rho model regarding taxonomy', warning=FALSE}
|
||||
# Pollinators
|
||||
taxonomy_plot(
|
||||
data = rho_taxonomy_long,
|
||||
insects_or_plants = "insects",
|
||||
stack_or_fill = "stack", model = "rho"
|
||||
)
|
||||
taxonomy_plot(
|
||||
data = rho_taxonomy_long,
|
||||
insects_or_plants = "insects",
|
||||
stack_or_fill = "fill", model = "rho"
|
||||
)
|
||||
```
|
||||
|
||||
```{r rho_plot_taxonomy_plants, echo = FALSE, message = FALSE,fig.cap = 'Plants repartition for the rho model regarding taxonomy', warning=FALSE}
|
||||
taxonomy_plot(
|
||||
data = rho_taxonomy_long,
|
||||
insects_or_plants = "plants",
|
||||
stack_or_fill = "stack", model = "rho"
|
||||
)
|
||||
taxonomy_plot(
|
||||
data = rho_taxonomy_long,
|
||||
insects_or_plants = "plants",
|
||||
stack_or_fill = "fill", model = "rho"
|
||||
)
|
||||
```
|
||||
|
||||
## Clustering avec le modèle pirho
|
||||
Avec le modèle *pirho* nous obtenons les `r length(pirho_unlist)` collections et
|
||||
les structures suivantes:
|
||||
|
||||
```{r pirho_meso_plot, echo = FALSE, message=FALSE, fig.cap=paste(names(pirho_unlist), rep("- pirho",length(pirho_unlist))), results="asis", warning=FALSE}
|
||||
meso_print(pirho_unlist)
|
||||
```
|
||||
|
||||
Et voici donc les valeurs numériques pour les $\alpha$ (paramètres de connectivité).
|
||||
|
||||
```{r pirho_alpha, echo = FALSE, results="asis", warning=FALSE}
|
||||
alpha_print(pirho_unlist)
|
||||
```
|
||||
|
||||
### Répartition dans les clusters selon la taxonomie
|
||||
```{r pirho_taxonomy, echo = FALSE, warning=FALSE}
|
||||
pirho_taxonomy <- taxonomy_in_clusters(pirho_unlist)
|
||||
|
||||
pirho_taxonomy_long <- map_dfr(pirho_taxonomy,
|
||||
function(collection) {
|
||||
map_dfr(
|
||||
c("insects", "plants"),
|
||||
function(group) {
|
||||
get_formatted_data(collection, group)
|
||||
}
|
||||
)
|
||||
},
|
||||
.id = "Collection"
|
||||
)
|
||||
```
|
||||
|
||||
```{r pirho_plot_taxonomy_pollinators, echo = FALSE, message = FALSE, fig.cap = 'Pollinators repartition for the pirho model regarding taxonomy', warning=FALSE}
|
||||
# Pollinators
|
||||
taxonomy_plot(
|
||||
data = pirho_taxonomy_long,
|
||||
insects_or_plants = "insects",
|
||||
stack_or_fill = "stack", model = "pirho"
|
||||
)
|
||||
taxonomy_plot(
|
||||
data = pirho_taxonomy_long,
|
||||
insects_or_plants = "insects",
|
||||
stack_or_fill = "fill", model = "pirho"
|
||||
)
|
||||
```
|
||||
|
||||
```{r pirho_plot_taxonomy_plants, echo = FALSE, message = FALSE,fig.cap = 'Plants repartition for the pirho model regarding taxonomy', warning=FALSE}
|
||||
taxonomy_plot(
|
||||
data = pirho_taxonomy_long,
|
||||
insects_or_plants = "plants",
|
||||
stack_or_fill = "stack", model = "pirho"
|
||||
)
|
||||
taxonomy_plot(
|
||||
data = pirho_taxonomy_long,
|
||||
insects_or_plants = "plants",
|
||||
stack_or_fill = "fill", model = "pirho"
|
||||
)
|
||||
```
|
||||
2691
Rcodes/real_data/presentation_dore.html
Normal file
2691
Rcodes/real_data/presentation_dore.html
Normal file
File diff suppressed because one or more lines are too long
BIN
Rcodes/real_data/presentation_dore.pdf
Normal file
BIN
Rcodes/real_data/presentation_dore.pdf
Normal file
Binary file not shown.
1503
Rcodes/real_data/presentation_dore.tex
Normal file
1503
Rcodes/real_data/presentation_dore.tex
Normal file
File diff suppressed because it is too large
Load diff
52
Rcodes/real_data/references.bib
Normal file
52
Rcodes/real_data/references.bib
Normal file
|
|
@ -0,0 +1,52 @@
|
|||
@misc{anakokDisentanglingStructureEcological2022,
|
||||
title = {Disentangling the Structure of Ecological Bipartite Networks from Observation Processes},
|
||||
author = {Anakok, Emre and Barbillon, Pierre and Fontaine, Colin and Thebault, Elisa},
|
||||
year = {2022},
|
||||
month = nov,
|
||||
number = {arXiv:2211.16364},
|
||||
eprint = {2211.16364},
|
||||
primaryclass = {stat},
|
||||
publisher = {{arXiv}},
|
||||
urldate = {2023-06-14},
|
||||
abstract = {The structure of a bipartite interaction network can be described by providing a clustering for each of the two types of nodes. Such clusterings are outputted by fitting a Latent Block Model (LBM) on an observed network that comes from a sampling of species interactions in the field. However, the sampling is limited and possibly uneven. This may jeopardize the fit of the LBM and then the description of the structure of the network by detecting structures which result from the sampling and not from actual underlying ecological phenomena. If the observed interaction network consists of a weighted bipartite network where the number of observed interactions between two species is available, the sampling efforts for all species can be estimated and used to correct the LBM fit. We propose to combine an observation model that accounts for sampling and an LBM for describing the structure of underlying possible ecological interactions. We develop an original inference procedure for this model, the efficiency of which is demonstrated in simulation studies. The practical interest in ecology of our model is highlighted on a large dataset of plant-pollinator network.},
|
||||
archiveprefix = {arxiv},
|
||||
langid = {english},
|
||||
keywords = {Statistics - Methodology},
|
||||
file = {/home/polarolouis/Zotero/storage/LQ3FINZG/Anakok et al. - 2022 - Disentangling the structure of ecological bipartit.pdf}
|
||||
}
|
||||
|
||||
@article{celisseConsistencyMaximumlikelihoodVariational2012,
|
||||
title = {Consistency of Maximum-Likelihood and Variational Estimators in the Stochastic Block Model},
|
||||
author = {Celisse, Alain and Daudin, Jean-Jacques and Pierre, Laurent},
|
||||
year = {2012},
|
||||
month = jan,
|
||||
journal = {Electronic Journal of Statistics},
|
||||
volume = {6},
|
||||
number = {none},
|
||||
pages = {1847--1899},
|
||||
publisher = {{Institute of Mathematical Statistics and Bernoulli Society}},
|
||||
issn = {1935-7524, 1935-7524},
|
||||
doi = {10.1214/12-EJS729},
|
||||
urldate = {2023-06-06},
|
||||
abstract = {The stochastic block model (SBM) is a probabilistic model designed to describe heterogeneous directed and undirected graphs. In this paper, we address the asymptotic inference in SBM by use of maximum-likelihood and variational approaches. The identifiability of SBM is proved while asymptotic properties of maximum-likelihood and variational estimators are derived. In particular, the consistency of these estimators is settled for the probability of an edge between two vertices (and for the group proportions at the price of an additional assumption), which is to the best of our knowledge the first result of this type for variational estimators in random graphs.},
|
||||
keywords = {62E17,62G05,62G20,62H30,Concentration inequalities,consistency,maximum likelihood estimators,Random graphs,Stochastic block model,variational estimators},
|
||||
file = {/home/polarolouis/Zotero/storage/JNWRIYKG/celisse2012.pdf.pdf;/home/polarolouis/Zotero/storage/XG463B5I/Celisse et al. - 2012 - Consistency of maximum-likelihood and variational .pdf}
|
||||
}
|
||||
|
||||
@misc{chabert-liddellLearningCommonStructures2023,
|
||||
type = {Article},
|
||||
title = {Learning Common Structures in a Collection of Networks. {{An}} Application to Food Webs},
|
||||
author = {{Chabert-Liddell}, Saint-Clair and Barbillon, Pierre and Donnet, Sophie},
|
||||
year = {2023},
|
||||
month = mar,
|
||||
number = {arXiv:2206.00560},
|
||||
eprint = {2206.00560},
|
||||
primaryclass = {stat},
|
||||
publisher = {{arXiv}},
|
||||
doi = {10.48550/arXiv.2206.00560},
|
||||
urldate = {2023-05-22},
|
||||
abstract = {Let a collection of networks represent interactions within several (social or ecological) systems. We pursue two objectives: identifying similarities in the topological structures that are held in common between the networks and clustering the collection into sub-collections of structurally homogeneous networks. We tackle these two questions with a probabilistic model based approach. We propose an extension of the Stochastic Block Model (SBM) adapted to the joint modeling of a collection of networks. The networks in the collection are assumed to be independent realizations of SBMs. The common connectivity structure is imposed through the equality of some parameters. The model parameters are estimated with a variational Expectation-Maximization (EM) algorithm. We derive an ad-hoc penalized likelihood criterion to select the number of blocks and to assess the adequacy of the consensus found between the structures of the different networks. This same criterion can also be used to cluster networks on the basis of their connectivity structure. It thus provides a partition of the collection into subsets of structurally homogeneous networks. The relevance of our proposition is assessed on two collections of ecological networks. First, an application to three stream food webs reveals the homogeneity of their structures and the correspondence between groups of species in different ecosystems playing equivalent ecological roles. Moreover, the joint analysis allows a finer analysis of the structure of smaller networks. Second, we cluster 67 food webs according to their connectivity structures and demonstrate that five mesoscale structures are sufficient to describe this collection.},
|
||||
archiveprefix = {arxiv},
|
||||
keywords = {Statistics - Applications,Statistics - Methodology},
|
||||
file = {/home/polarolouis/Zotero/storage/M74TXGCF/Chabert-Liddell et al. - 2023 - Learning common structures in a collection of netw.pdf;/home/polarolouis/Zotero/storage/A35M8KNP/2206.html}
|
||||
}
|
||||
197
Rcodes/real_data/temporary_plot.R
Normal file
197
Rcodes/real_data/temporary_plot.R
Normal file
|
|
@ -0,0 +1,197 @@
|
|||
#' The function to plot the fitBipartite objects
|
||||
#' @importFrom patchwork
|
||||
#' @importFrom reshape2
|
||||
#' @importFrom purrr
|
||||
plotFitBipartite <- function(model, type = "graphon", oRow = NULL, oCol = NULL, mixture = FALSE, net_id = NULL, ...) {
|
||||
# The below order use mean over all networks to have a consistent display
|
||||
if (is.null(oRow)) {
|
||||
if (model$Q[2] == 1) {
|
||||
mean_rho <- 1
|
||||
} else {
|
||||
mean_rho <- matrixStats::rowMeans2(sapply(model$pi, function(pi) pi[[2]]))
|
||||
}
|
||||
oRow <- order(model$alpha %*% mean_rho, decreasing = TRUE)
|
||||
}
|
||||
if (is.null(oCol)) {
|
||||
if (model$Q[1] == 1) {
|
||||
mean_pi <- 1
|
||||
} else {
|
||||
mean_pi <- matrixStats::rowMeans2(sapply(model$pi, function(pi) pi[[1]]))
|
||||
}
|
||||
oCol <- order(mean_pi %*% model$alpha, decreasing = TRUE)
|
||||
}
|
||||
p <- switch(type,
|
||||
graphon = {
|
||||
(model$alpha[oRow, oCol] * mean(model$delta)) %>%
|
||||
t() %>%
|
||||
reshape2::melt() %>%
|
||||
dplyr::mutate(
|
||||
xmin = rep(c(0, cumsum(model$pi[[net_id]][[2]][oCol][1:(model$Q[2] - 1)])), model$Q[1]),
|
||||
ymin = rep(c(0, cumsum(model$pi[[net_id]][[1]][oRow][1:(model$Q[1] - 1)])), each = model$Q[2]),
|
||||
xmax = rep(cumsum(model$pi[[net_id]][[2]][oCol]), model$Q[1]),
|
||||
ymax = rep(cumsum(model$pi[[net_id]][[1]][oRow]), each = model$Q[2])
|
||||
) %>%
|
||||
ggplot2::ggplot(ggplot2::aes(
|
||||
xmin = xmin, ymin = ymin,
|
||||
xmax = xmax, ymax = ymax, fill = value
|
||||
)) +
|
||||
ggplot2::geom_rect() +
|
||||
ggplot2::scale_fill_gradient2("alpha", low = "white", mid = "red", midpoint = 1) +
|
||||
ggplot2::geom_hline(yintercept = cumsum(model$pi[[net_id]][[1]][oRow][1:(model$Q[1] - 1)]), size = .2) +
|
||||
ggplot2::geom_vline(xintercept = cumsum(model$pi[[net_id]][[2]][oCol][1:(model$Q[2] - 1)]), size = .2) +
|
||||
ggplot2::scale_y_reverse() +
|
||||
ggplot2::theme_bw(base_size = 15, base_rect_size = 1, base_line_size = 1) +
|
||||
ggplot2::xlab("Column Blocks") +
|
||||
ggplot2::ylab("Row Blocks") +
|
||||
ggplot2::coord_equal(expand = FALSE)
|
||||
},
|
||||
meso = {
|
||||
p_alpha <- model$alpha[oRow, oCol] %>%
|
||||
t() %>%
|
||||
reshape2::melt() %>%
|
||||
ggplot2::ggplot(ggplot2::aes(x = Var1, y = Var2, fill = value)) +
|
||||
ggplot2::geom_tile() +
|
||||
ggplot2::scale_fill_gradient2("alpha", low = "white", high = "red") +
|
||||
ggplot2::geom_hline(yintercept = seq(model$Q[1]) + .5) +
|
||||
ggplot2::geom_vline(xintercept = seq(model$Q[2]) + .5) +
|
||||
ggplot2::scale_y_reverse() +
|
||||
ggplot2::theme_bw(base_size = 15, base_rect_size = 1, base_line_size = 1) +
|
||||
ggplot2::xlab("") +
|
||||
ggplot2::ylab("") +
|
||||
ggplot2::coord_fixed(expand = FALSE)
|
||||
# scale_y_reverse()
|
||||
if (model$free_density) {
|
||||
xl <- paste(round(model$delta, 1))
|
||||
} else {
|
||||
xl <- ""
|
||||
}
|
||||
df_pi <- purrr::map_dfc(
|
||||
seq_along(model$net_id),
|
||||
function(m) setNames(data.frame(model$pim[[m]][[1]][oRow]), m)
|
||||
)
|
||||
df_rho <- purrr::map_dfc(
|
||||
seq_along(model$net_id),
|
||||
function(m) setNames(data.frame(model$pim[[m]][[2]][oCol]), m)
|
||||
)
|
||||
# names(df_pi) <- model$net_id
|
||||
if (mixture) {
|
||||
p_pi <-
|
||||
df_pi %>%
|
||||
# rename() %>%
|
||||
dplyr::mutate(q = seq(model$Q[1])) %>%
|
||||
tidyr::pivot_longer(cols = -c(q)) %>%
|
||||
mutate(Proportion = value) %>%
|
||||
ggplot2::ggplot(ggplot2::aes(
|
||||
fill = as.factor(q), y = name,
|
||||
x = Proportion
|
||||
)) +
|
||||
ggplot2::geom_col() +
|
||||
ggplot2::coord_flip(expand = FALSE) +
|
||||
ggplot2::scale_fill_brewer("Row block",
|
||||
type = "qual", palette = "Paired",
|
||||
direction = -1
|
||||
) +
|
||||
ggplot2::guides(fill = ggplot2::guide_legend(
|
||||
ncol = model$Q[1] %/% 3 + 1,
|
||||
byrow = TRUE
|
||||
)) +
|
||||
ggplot2::ylab("") +
|
||||
ggplot2::ylab(xl) +
|
||||
ggplot2::theme_bw(base_size = 15)
|
||||
p_rho <- df_rho %>%
|
||||
# rename() %>%
|
||||
dplyr::mutate(q = seq(model$Q[2])) %>%
|
||||
tidyr::pivot_longer(cols = -c(q)) %>%
|
||||
mutate(Proportion = value) %>%
|
||||
ggplot2::ggplot(ggplot2::aes(
|
||||
fill = as.factor(q), y = name,
|
||||
x = Proportion
|
||||
)) +
|
||||
ggplot2::geom_col() +
|
||||
# ggplot2::coord_flip(expand = FALSE) +
|
||||
ggplot2::scale_fill_brewer("Column block",
|
||||
type = "qual", palette = "Set2",
|
||||
direction = -1
|
||||
) +
|
||||
ggplot2::guides(fill = ggplot2::guide_legend(
|
||||
ncol = model$Q[2] %/% 3 + 1,
|
||||
byrow = TRUE
|
||||
)) +
|
||||
ggplot2::ylab("") +
|
||||
ggplot2::ylab(xl) +
|
||||
ggplot2::theme_bw(base_size = 15)
|
||||
# Merging the plots with patchwork
|
||||
mixture_layout <- "
|
||||
##CCCC
|
||||
##CCCC
|
||||
RRAAAA
|
||||
RRAAAA
|
||||
RRAAAA
|
||||
"
|
||||
p_alpha <- patchwork::wrap_plots(
|
||||
R = p_pi, C = p_rho, A = p_alpha,
|
||||
design = mixture_layout
|
||||
) +
|
||||
patchwork::plot_layout(
|
||||
guides = "collect",
|
||||
design = mixture_layout
|
||||
)
|
||||
}
|
||||
return(p_alpha)
|
||||
},
|
||||
"block" = {
|
||||
as.matrix(model$A[[net_id]])[
|
||||
order(model$Z[[net_id]][[1]]),
|
||||
order(model$Z[[net_id]][[2]])
|
||||
] %>%
|
||||
reshape2::melt() %>%
|
||||
ggplot2::ggplot(ggplot2::aes(x = Var2, y = rev(Var1), fill = value)) +
|
||||
ggplot2::geom_tile(show.legend = FALSE) +
|
||||
ggplot2::geom_hline(
|
||||
yintercept = cumsum(tabulate(model$Z[[net_id]][[1]])[model$Q[1]:2]) + .5,
|
||||
col = "red", size = .5
|
||||
) +
|
||||
ggplot2::geom_vline(
|
||||
xintercept = cumsum(tabulate(model$Z[[net_id]][[2]])[1:(model$Q[2] - 1)]) + .5,
|
||||
col = "red", size = .5
|
||||
) +
|
||||
ggplot2::scale_fill_gradient(low = "white", high = "black", na.value = "transparent") +
|
||||
ggplot2::ylab("") +
|
||||
ggplot2::xlab(model$net_id[net_id]) +
|
||||
ggplot2::scale_x_discrete(
|
||||
breaks = ""
|
||||
) +
|
||||
# ggplot2::scale_y_reverse() +
|
||||
ggplot2::scale_y_discrete(
|
||||
breaks = "",
|
||||
guide = ggplot2::guide_axis(angle = 0)
|
||||
) +
|
||||
ggplot2::coord_equal(expand = FALSE) +
|
||||
ggplot2::theme_bw(base_size = 15) +
|
||||
ggplot2::theme(axis.ticks = ggplot2::element_blank())
|
||||
}
|
||||
)
|
||||
return(p)
|
||||
}
|
||||
|
||||
#' Plot matrix summaries of the collection mesoscale structure
|
||||
#'
|
||||
#' @param x a fitBipartiteSBMPop object.
|
||||
#' @param type The type of the plot. Could be "graphon", "meso" or "block".
|
||||
#' @param ord A reordering of the blocks.
|
||||
#' @param mixture Should the block proportions of each network be plotted as
|
||||
#' well?
|
||||
#' @param net_id Use to plot only on network in "graphon" view.
|
||||
#' @param ... Further argument to be passed
|
||||
#' @return A plot, a ggplot2 object.
|
||||
#' @export
|
||||
#'
|
||||
#' @examples
|
||||
plot.fitBipartiteSBMPop <- function(x, type = "graphon", oRow = NULL, oCol = NULL, mixture = FALSE, net_id = 1, ...) {
|
||||
stopifnot(inherits(x, "fitBipartiteSBMPop"))
|
||||
p <- plotFitBipartite(x,
|
||||
type = type, oRow = oRow, oCol = oCol, mixture = mixture,
|
||||
net_id = net_id, ...
|
||||
)
|
||||
p
|
||||
}
|
||||
178
Rcodes/simulation/NA_robustness_analyse.Rmd
Normal file
178
Rcodes/simulation/NA_robustness_analyse.Rmd
Normal file
|
|
@ -0,0 +1,178 @@
|
|||
---
|
||||
title: "Analyzing the capacity of the colBiSBM to recover structure for missing data from other networks"
|
||||
output:
|
||||
html_document:
|
||||
toc: true
|
||||
theme: journal
|
||||
pdf_document:
|
||||
keep_tex: true
|
||||
---
|
||||
|
||||
```{r setup, include=FALSE}
|
||||
knitr::opts_chunk$set(echo = TRUE)
|
||||
```
|
||||
|
||||
```{r required_libs, echo = FALSE, include=FALSE}
|
||||
require("ggplot2")
|
||||
require("tidyverse")
|
||||
```
|
||||
|
||||
```{r import_data, echo=FALSE, include=FALSE}
|
||||
NA_robustness_raw <- readRDS("simulation/data/NA_robustness_results-alpha_0.7, 0.4, 0.3, 0.4, 0.2, 0.05, 0.3, 0.05, 0.05-reps-10-14-06-23_17-41.Rds")
|
||||
NA_robustness_df <- NA_robustness_raw %>%
|
||||
mutate(
|
||||
auc_diff = auc_colBiSBM - auc_LBM,
|
||||
ari_row_diff = NA_net_ari_row - LBM_ari_row,
|
||||
ari_col_diff = NA_net_ari_col - LBM_ari_col
|
||||
) %>%
|
||||
group_by(prop_NAs, model) %>%
|
||||
summarise(
|
||||
mean_auc_diff = mean(auc_diff),
|
||||
sd_auc_diff = sd(auc_diff),
|
||||
mean_ari_row_diff = mean(ari_row_diff),
|
||||
sd_ari_row_diff = sd(ari_row_diff),
|
||||
mean_ari_col_diff = mean(ari_col_diff),
|
||||
sd_ari_col_diff = sd(ari_col_diff),
|
||||
mean_LBM_ari_row = mean(LBM_ari_row),
|
||||
sd_LBM_ari_row = sd(LBM_ari_row),
|
||||
mean_LBM_ari_col = mean(LBM_ari_col),
|
||||
sd_LBM_ari_col = sd(LBM_ari_col),
|
||||
mean_NA_net_ari_row = mean(NA_net_ari_row),
|
||||
sd_NA_net_ari_row = sd(NA_net_ari_row),
|
||||
mean_NA_net_ari_col = mean(NA_net_ari_col),
|
||||
sd_NA_net_ari_col = sd(NA_net_ari_col),
|
||||
mean_elapsed_secs = mean(elapsed_secs),
|
||||
sd_elapsed_secs = sd(elapsed_secs)
|
||||
) %>%
|
||||
ungroup()
|
||||
```
|
||||
|
||||
```{r useful_function, echo = FALSE}
|
||||
write_matex2 <- function(x) {
|
||||
if (!is.matrix(x)) {
|
||||
x <- matrix(x)
|
||||
}
|
||||
begin <- "\\begin{bmatrix}"
|
||||
end <- "\\end{bmatrix}"
|
||||
X <-
|
||||
apply(x, 1, function(x) {
|
||||
paste(
|
||||
paste(x, collapse = "&"),
|
||||
"\\\\"
|
||||
)
|
||||
})
|
||||
paste(c(begin, X, end), collapse = "")
|
||||
}
|
||||
```
|
||||
|
||||
# Simulation context
|
||||
|
||||
The idea is to benchmark the capacity of the models when NAs are in the data.
|
||||
|
||||
To do this, we chose the below structure:
|
||||
|
||||
```{r simulation_parameters, echo = FALSE}
|
||||
eps <- 0.05
|
||||
|
||||
M <- 3
|
||||
|
||||
# Defining parameters
|
||||
nr <- 100
|
||||
nc <- 150
|
||||
pir <- c(0.5, 0.3, 0.2)
|
||||
pic <- c(0.5, 0.3, 0.2)
|
||||
alpha <- matrix(c(
|
||||
0.7, 0.4, 0.3,
|
||||
0.4, 0.2, eps,
|
||||
0.3, eps, eps
|
||||
), byrow = TRUE, nrow = length(pir), ncol = length(pic))
|
||||
```
|
||||
|
||||
$$M = `r M`, n_r = `r nr`, n_c = `r nc` \\ \alpha = `r write_matex2(alpha)`
|
||||
\\ \pi = `r write_matex2(pir)` \rho = `r write_matex2(pic)`$$
|
||||
|
||||
With $M$ the number of networks, $n_r$ the number of nodes in row of the incidence
|
||||
matrix, $n_c$ the number of nodes in column, $\alpha$ the connectivity
|
||||
parameters between the row and column clusters. $\pi$ and $\rho$ are
|
||||
the proportion of nodes in the row and columns clusters.
|
||||
|
||||
And set some randomly chosen interactions to NA. The below plots will show the
|
||||
different quality indicators in function of proportion of NAs in the first of
|
||||
the 3 networks.
|
||||
|
||||
# AUC in function of the proportion of NAs
|
||||
|
||||
```{r auc_plots, echo = FALSE}
|
||||
auc_plot <- NA_robustness_df %>% ggplot() +
|
||||
geom_ribbon(aes(ymin = mean_auc_diff - sd_auc_diff, ymax = mean_auc_diff + sd_auc_diff, x = prop_NAs, fill = model), alpha = 0.1) +
|
||||
geom_line(aes(x = prop_NAs, y = mean_auc_diff, color = model)) +
|
||||
geom_point(aes(x = prop_NAs, y = mean_auc_diff, color = model)) +
|
||||
xlab("NA proportion") +
|
||||
ylab("AUC difference (colBiSBM - LBM)") +
|
||||
scale_x_continuous(breaks = seq(0, 0.9, 0.1))
|
||||
|
||||
|
||||
auc_plot
|
||||
```
|
||||
|
||||
```{r auc_, echo = FALSE}
|
||||
auc_plot <- NA_robustness_df %>% ggplot() +
|
||||
geom_ribbon(aes(ymin = mean_auc_diff - sd_auc_diff, ymax = mean_auc_diff + sd_auc_diff, x = prop_NAs, fill = model), alpha = 0.1) +
|
||||
geom_line(aes(x = prop_NAs, y = mean_auc_diff, color = model)) +
|
||||
geom_point(aes(x = prop_NAs, y = mean_auc_diff, color = model)) +
|
||||
xlab("NA proportion") +
|
||||
ylab("AUC difference (colBiSBM - LBM)") +
|
||||
scale_x_continuous(breaks = seq(0, 0.9, 0.1))
|
||||
|
||||
|
||||
auc_plot
|
||||
```
|
||||
<!-- FAUX
|
||||
At $0$, there is no NAs in the $1^{st}$ network so the information is completely
|
||||
retrieved.
|
||||
|
||||
At $0.1$, the AUC is degraded and falls around 0.85 for the *iid, pi and rho*
|
||||
models. The *pirho* model seems to perform slightly better, but it's confidence interval
|
||||
intersects the others.
|
||||
|
||||
For the next values, two "behaviours" can be observed :
|
||||
- the *iid* decreases and seems to stabilize until 0.8.
|
||||
- the *pi, rho and pirho* seem to remain stable until 0.7, from 0.8 they go down.
|
||||
But the *pirho* performs overall the best, staying around 0.86 and with $90\%$
|
||||
NAs in ther first network it gives an AUC of 0.83 !
|
||||
|
||||
The *pi* and *rho* models perform the same (their confidence intervals overlap),
|
||||
and they give an AUC of around 0.83 between $20\%$ to $80\%$ of NAs. The *rho*
|
||||
ends at 0.8 AUC, where the *pi* ends at 0.76.
|
||||
|
||||
The 4 models maintain an AUC over 0.75 for $90%$ -->
|
||||
|
||||
# ARI in function of the proportion of NAs
|
||||
|
||||
```{r ARI_row_plot, echo = FALSE, fig.cap="Difference of ARI for the row clusterings"}
|
||||
ari_row_plot <- NA_robustness_df %>% ggplot() +
|
||||
# ylim(-1, 1) +
|
||||
geom_ribbon(aes(ymin = mean_ari_row_diff - sd_ari_row_diff, ymax = mean_ari_row_diff + sd_ari_row_diff, x = prop_NAs, fill = model), alpha = 0.25) +
|
||||
geom_line(aes(x = prop_NAs, y = mean_ari_row_diff, color = model)) +
|
||||
geom_point(aes(x = prop_NAs, y = mean_ari_row_diff, color = model)) +
|
||||
xlab("NA proportion") +
|
||||
ylab("ARI difference") +
|
||||
ggtitle("ARI on the row clustering, difference (colBiSBM - LBM)") +
|
||||
scale_x_continuous(breaks = seq(0, 0.9, 0.1))
|
||||
|
||||
ari_row_plot
|
||||
```
|
||||
|
||||
```{r ARI_col_plot, echo = FALSE, fig.cap="Difference of ARI for the columns clusterings"}
|
||||
ari_col_plot <- NA_robustness_df %>% ggplot() +
|
||||
# ylim(-1, 1) +
|
||||
geom_ribbon(aes(ymin = mean_ari_col_diff - sd_ari_col_diff, ymax = mean_ari_col_diff + sd_ari_col_diff, x = prop_NAs, fill = model), alpha = 0.25) +
|
||||
geom_line(aes(x = prop_NAs, y = mean_ari_col_diff, color = model)) +
|
||||
geom_point(aes(x = prop_NAs, y = mean_ari_col_diff, color = model)) +
|
||||
xlab("NA proportion") +
|
||||
ylab("ARI difference") +
|
||||
ggtitle("ARI on the column clustering, difference (colBiSBM - LBM)") +
|
||||
scale_x_continuous(breaks = seq(0, 0.9, 0.1))
|
||||
|
||||
ari_col_plot
|
||||
```
|
||||
475
Rcodes/simulation/NA_robustness_analyse.html
Normal file
475
Rcodes/simulation/NA_robustness_analyse.html
Normal file
File diff suppressed because one or more lines are too long
BIN
Rcodes/simulation/NA_robustness_analyse.pdf
Normal file
BIN
Rcodes/simulation/NA_robustness_analyse.pdf
Normal file
Binary file not shown.
110
Rcodes/simulation/NA_robustness_analyse.tex
Normal file
110
Rcodes/simulation/NA_robustness_analyse.tex
Normal file
|
|
@ -0,0 +1,110 @@
|
|||
% Options for packages loaded elsewhere
|
||||
\PassOptionsToPackage{unicode}{hyperref}
|
||||
\PassOptionsToPackage{hyphens}{url}
|
||||
%
|
||||
\documentclass[
|
||||
]{article}
|
||||
\usepackage{lmodern}
|
||||
\usepackage{amssymb,amsmath}
|
||||
\usepackage{ifxetex,ifluatex}
|
||||
\ifnum 0\ifxetex 1\fi\ifluatex 1\fi=0 % if pdftex
|
||||
\usepackage[T1]{fontenc}
|
||||
\usepackage[utf8]{inputenc}
|
||||
\usepackage{textcomp} % provide euro and other symbols
|
||||
\else % if luatex or xetex
|
||||
\usepackage{unicode-math}
|
||||
\defaultfontfeatures{Scale=MatchLowercase}
|
||||
\defaultfontfeatures[\rmfamily]{Ligatures=TeX,Scale=1}
|
||||
\fi
|
||||
% Use upquote if available, for straight quotes in verbatim environments
|
||||
\IfFileExists{upquote.sty}{\usepackage{upquote}}{}
|
||||
\IfFileExists{microtype.sty}{% use microtype if available
|
||||
\usepackage[]{microtype}
|
||||
\UseMicrotypeSet[protrusion]{basicmath} % disable protrusion for tt fonts
|
||||
}{}
|
||||
\makeatletter
|
||||
\@ifundefined{KOMAClassName}{% if non-KOMA class
|
||||
\IfFileExists{parskip.sty}{%
|
||||
\usepackage{parskip}
|
||||
}{% else
|
||||
\setlength{\parindent}{0pt}
|
||||
\setlength{\parskip}{6pt plus 2pt minus 1pt}}
|
||||
}{% if KOMA class
|
||||
\KOMAoptions{parskip=half}}
|
||||
\makeatother
|
||||
\usepackage{xcolor}
|
||||
\IfFileExists{xurl.sty}{\usepackage{xurl}}{} % add URL line breaks if available
|
||||
\IfFileExists{bookmark.sty}{\usepackage{bookmark}}{\usepackage{hyperref}}
|
||||
\hypersetup{
|
||||
pdftitle={Analyzing the capacity of the colBiSBM to recover structure for missing data from other networks},
|
||||
hidelinks,
|
||||
pdfcreator={LaTeX via pandoc}}
|
||||
\urlstyle{same} % disable monospaced font for URLs
|
||||
\usepackage[margin=1in]{geometry}
|
||||
\usepackage{graphicx}
|
||||
\makeatletter
|
||||
\def\maxwidth{\ifdim\Gin@nat@width>\linewidth\linewidth\else\Gin@nat@width\fi}
|
||||
\def\maxheight{\ifdim\Gin@nat@height>\textheight\textheight\else\Gin@nat@height\fi}
|
||||
\makeatother
|
||||
% Scale images if necessary, so that they will not overflow the page
|
||||
% margins by default, and it is still possible to overwrite the defaults
|
||||
% using explicit options in \includegraphics[width, height, ...]{}
|
||||
\setkeys{Gin}{width=\maxwidth,height=\maxheight,keepaspectratio}
|
||||
% Set default figure placement to htbp
|
||||
\makeatletter
|
||||
\def\fps@figure{htbp}
|
||||
\makeatother
|
||||
\setlength{\emergencystretch}{3em} % prevent overfull lines
|
||||
\providecommand{\tightlist}{%
|
||||
\setlength{\itemsep}{0pt}\setlength{\parskip}{0pt}}
|
||||
\setcounter{secnumdepth}{-\maxdimen} % remove section numbering
|
||||
|
||||
\title{Analyzing the capacity of the colBiSBM to recover structure for
|
||||
missing data from other networks}
|
||||
\author{}
|
||||
\date{\vspace{-2.5em}}
|
||||
|
||||
\begin{document}
|
||||
\maketitle
|
||||
|
||||
\begin{verbatim}
|
||||
## `summarise()` has grouped output by 'prop_NAs'. You can override using the
|
||||
## `.groups` argument.
|
||||
\end{verbatim}
|
||||
|
||||
\hypertarget{simulation-context}{%
|
||||
\section{Simulation context}\label{simulation-context}}
|
||||
|
||||
The idea is to benchmark the capacity of the models when NAs are in the
|
||||
data.
|
||||
|
||||
To do this, whe choose the below structure: ! PARAMETERS OF THE
|
||||
SIMULATION !
|
||||
|
||||
And set some randomly chosen interactions to NA. The below plots will
|
||||
show the different quality indicators in function of proportion of NAs
|
||||
in the first of the 3 networks.
|
||||
|
||||
\hypertarget{auc-in-function-of-the-proportion-of-nas}{%
|
||||
\section{AUC in function of the proportion of
|
||||
NAs}\label{auc-in-function-of-the-proportion-of-nas}}
|
||||
|
||||
\includegraphics{NA_robustness_analyse_files/figure-latex/auc_plots-1.pdf}
|
||||
|
||||
\hypertarget{ari-in-function-of-the-proportion-of-nas}{%
|
||||
\section{ARI in function of the proportion of
|
||||
NAs}\label{ari-in-function-of-the-proportion-of-nas}}
|
||||
|
||||
\begin{figure}
|
||||
\centering
|
||||
\includegraphics{NA_robustness_analyse_files/figure-latex/ARI_row_plot-1.pdf}
|
||||
\caption{Difference of ARI for the row clusterings}
|
||||
\end{figure}
|
||||
|
||||
\begin{figure}
|
||||
\centering
|
||||
\includegraphics{NA_robustness_analyse_files/figure-latex/ARI_col_plot-1.pdf}
|
||||
\caption{Difference of ARI for the columns clusterings}
|
||||
\end{figure}
|
||||
|
||||
\end{document}
|
||||
153
Rcodes/simulation/NA_robustness_check.R
Normal file
153
Rcodes/simulation/NA_robustness_check.R
Normal file
|
|
@ -0,0 +1,153 @@
|
|||
devtools::load_all()
|
||||
require("sbm")
|
||||
require("pROC")
|
||||
|
||||
set.seed(1234)
|
||||
|
||||
eps <- 0.05
|
||||
|
||||
M <- 3
|
||||
|
||||
# Defining parameters
|
||||
nr <- 100
|
||||
nc <- 150
|
||||
pir <- c(0.5, 0.3, 0.2)
|
||||
pic <- c(0.5, 0.3, 0.2)
|
||||
alpha <- matrix(c(
|
||||
0.7, 0.4, 0.3,
|
||||
0.4, 0.2, eps,
|
||||
0.3, eps, eps
|
||||
), byrow = TRUE, nrow = length(pir), ncol = length(pic))
|
||||
max_repetition <- 10
|
||||
|
||||
# Collections
|
||||
collections <- list(
|
||||
"iid" = generate_bipartite_collection(nr, nc,
|
||||
pir, pic,
|
||||
alpha, M,
|
||||
model = "iid",
|
||||
return_memberships = TRUE
|
||||
),
|
||||
"pi" = generate_bipartite_collection(nr, nc,
|
||||
pir, pic,
|
||||
alpha, M,
|
||||
model = "pi",
|
||||
return_memberships = TRUE
|
||||
),
|
||||
"rho" = generate_bipartite_collection(nr, nc,
|
||||
pir, pic,
|
||||
alpha, M,
|
||||
model = "rho",
|
||||
return_memberships = TRUE
|
||||
),
|
||||
"pirho" = generate_bipartite_collection(nr, nc,
|
||||
pir, pic,
|
||||
alpha, M,
|
||||
model = "pirho",
|
||||
return_memberships = TRUE
|
||||
)
|
||||
)
|
||||
|
||||
|
||||
conditions <- expand.grid(
|
||||
prop_NAs = seq(from = 0, to = 0.9, by = 0.1),
|
||||
model = c("iid", "pi", "rho", "pirho"),
|
||||
repetition = seq.int(max_repetition)
|
||||
)
|
||||
|
||||
|
||||
|
||||
result_dataframe <- do.call("rbind", bettermc::mclapply(seq_len(nrow(conditions)), function(current) {
|
||||
# Looping over conditions
|
||||
prop_NAs <- conditions[current, ]$prop_NAs
|
||||
model <- as.character(conditions[current, ]$model)
|
||||
bipartite_collection <- collections[[model]]
|
||||
|
||||
# This is a list of the M incidence matrices
|
||||
bipartite_collection_incidence <- lapply(seq.int(M), function(m) {
|
||||
bipartite_collection[[m]]$incidence_matrix
|
||||
})
|
||||
|
||||
# Sampling values to replace by NAs
|
||||
NAs_index <- sample(
|
||||
seq_len(length(bipartite_collection_incidence[[1]])),
|
||||
floor(prop_NAs * length(bipartite_collection_incidence[[1]]))
|
||||
)
|
||||
|
||||
real_val_NAs <- bipartite_collection_incidence[[1]][NAs_index]
|
||||
bipartite_collection_incidence[[1]][NAs_index] <- NA
|
||||
NAs_coordinates <- which(is.na(bipartite_collection_incidence[[1]]),
|
||||
arr.ind = TRUE
|
||||
)
|
||||
|
||||
Z <- lapply(seq.int(M), function(m) {
|
||||
list(
|
||||
bipartite_collection[[m]]$row_blockmemberships,
|
||||
bipartite_collection[[m]]$col_blockmemberships
|
||||
)
|
||||
})
|
||||
|
||||
start_time <- Sys.time()
|
||||
mybisbmpop <- estimate_colBiSBM(
|
||||
netlist = bipartite_collection_incidence, colsbm_model = model,
|
||||
nb_run = 1,
|
||||
global_opts = list(
|
||||
nb_cores = parallel::detectCores() - 1, verbosity = 0
|
||||
), silent_parallelization = TRUE
|
||||
)
|
||||
stop_time <- Sys.time()
|
||||
|
||||
baseline_LBM <- estimate_colBiSBM(
|
||||
netlist = bipartite_collection_incidence[[1]], colsbm_model = model,
|
||||
nb_run = 1,
|
||||
global_opts = list(
|
||||
nb_cores = parallel::detectCores() - 1, verbosity = 0
|
||||
), silent_parallelization = TRUE
|
||||
)
|
||||
|
||||
# Predicted links
|
||||
X_hat_LBM <- baseline_LBM$best_fit$tau[[1]][[1]] %*% baseline_LBM$best_fit$alpha %*% t(baseline_LBM$best_fit$tau[[1]][[2]])
|
||||
X_hat <- mybisbmpop$best_fit$tau[[1]][[1]] %*% mybisbmpop$best_fit$alpha %*% t(mybisbmpop$best_fit$tau[[1]][[2]])
|
||||
|
||||
# Compute ROC and AUC
|
||||
auc_LBM <- auc(c(0, 1, real_val_NAs), c(0, 1, X_hat_LBM[NAs_index]))
|
||||
auc_colBiSBM <- auc(c(0, 1, real_val_NAs), c(0, 1, X_hat[NAs_index]))
|
||||
|
||||
# Computing ARI on the NAs
|
||||
return(data.frame(
|
||||
prop_NAs = prop_NAs,
|
||||
model = model,
|
||||
repetition = conditions[current, ]$repetition,
|
||||
auc_LBM = auc_LBM,
|
||||
auc_colBiSBM = auc_colBiSBM,
|
||||
LBM_ari_row = aricode::ARI(
|
||||
Z[[1]][[1]],
|
||||
baseline_LBM$best_fit$Z[[1]][[1]]
|
||||
),
|
||||
LBM_ari_col = aricode::ARI(
|
||||
Z[[1]][[2]],
|
||||
baseline_LBM$best_fit$Z[[1]][[2]]
|
||||
),
|
||||
NA_net_ari_row = aricode::ARI(
|
||||
Z[[1]][[1]],
|
||||
mybisbmpop$best_fit$Z[[1]][[1]]
|
||||
),
|
||||
NA_net_ari_col = aricode::ARI(
|
||||
Z[[1]][[2]],
|
||||
mybisbmpop$best_fit$Z[[1]][[2]]
|
||||
),
|
||||
elapsed_secs = difftime(stop_time, start_time, units = "sec")
|
||||
))
|
||||
},
|
||||
mc.cores = parallel::detectCores() - 1,
|
||||
mc.progress = TRUE
|
||||
))
|
||||
|
||||
saveRDS(
|
||||
result_dataframe,
|
||||
paste0(
|
||||
"simulation/data/",
|
||||
"NA_robustness_results-alpha_", toString(alpha),
|
||||
"-reps-", max_repetition, "-", format(Sys.time(), "%d-%m-%y_%H-%M"), ".Rds"
|
||||
)
|
||||
)
|
||||
29
Rcodes/simulation/analyze_results.R
Normal file
29
Rcodes/simulation/analyze_results.R
Normal file
|
|
@ -0,0 +1,29 @@
|
|||
library("ggplot2")
|
||||
library("dplyr")
|
||||
|
||||
|
||||
filename <- "divergence_modular_to_nested_with_3_repetitions_29-03-23_13:55:42.Rds"
|
||||
filename <- paste0(getwd(), "/simulation/data/", filename)
|
||||
|
||||
current_data <- readRDS(filename)
|
||||
|
||||
current_data$results <- current_data$results %>% mutate(diff_BICL = BICL - sep_LBM_BICL)
|
||||
|
||||
current_data$results <- current_data$results %>%
|
||||
group_by(Current_M, Network_id, divergence) %>%
|
||||
summarize(mean_diff_BICL = mean(diff_BICL), sd_diff_BICL = sd(diff_BICL))
|
||||
|
||||
ggplot(current_data$results) +
|
||||
aes(x = divergence, y = mean_diff_BICL, group = factor(Current_M), col = factor(Current_M)) +
|
||||
geom_point() +
|
||||
geom_line() +
|
||||
geom_ribbon(
|
||||
aes(
|
||||
ymin = mean_diff_BICL - sd_diff_BICL,
|
||||
ymax = mean_diff_BICL + sd_diff_BICL,
|
||||
fill = factor(Current_M),
|
||||
col = NULL
|
||||
),
|
||||
alpha = .25
|
||||
) +
|
||||
geom_hline(yintercept = 0)
|
||||
115
Rcodes/simulation/compare_parallelization_levels.R
Normal file
115
Rcodes/simulation/compare_parallelization_levels.R
Normal file
|
|
@ -0,0 +1,115 @@
|
|||
# Sourcing all necessary files
|
||||
require("sbm", quietly = T)
|
||||
require("dplyr", quietly = T)
|
||||
require("tictoc", quietly = T)
|
||||
require("ggplot2", quietly = T)
|
||||
|
||||
devtools::load_all(path = "R/")
|
||||
|
||||
eps <- 0.05
|
||||
M <- 2
|
||||
nr <- 100
|
||||
nc <- 250
|
||||
|
||||
pir1 <- c(0.2, 0, 0.8)
|
||||
pir2 <- c(0.4, 0.6, 0)
|
||||
|
||||
pic1 <- c(0.6, 0, 0.4)
|
||||
pic2 <- c(0.4, 0.6, 0)
|
||||
|
||||
Q <- c(length(pir1), length(pic1))
|
||||
|
||||
# Make a non common alpha structure
|
||||
|
||||
alpha <- matrix(
|
||||
c( # 12 2 1
|
||||
0.4, eps, eps, # 12
|
||||
eps, 0.5, 0, # 2
|
||||
eps, 0, 0.2 # 1
|
||||
),
|
||||
nrow = Q[1], ncol = Q[2], byrow = TRUE
|
||||
)
|
||||
|
||||
bipartite_collection <- list(
|
||||
generate_bipartite_network(nr, nc, pir1, pic1, alpha),
|
||||
generate_bipartite_network(nr, nc, pir2, pic2, alpha)
|
||||
)
|
||||
|
||||
# This is a list of the M incidence matrices
|
||||
bipartite_collection_incidence <- lapply(seq.int(M), function(m) {
|
||||
bipartite_collection[[m]]$incidence_matrix
|
||||
})
|
||||
|
||||
Z <- lapply(seq.int(M), function(m) {
|
||||
list(
|
||||
bipartite_collection[[m]]$row_clustering,
|
||||
bipartite_collection[[m]]$col_clustering
|
||||
)
|
||||
})
|
||||
|
||||
results <- data.frame(matrix(nrow = 0, ncol = 6))
|
||||
names(results) <- c(
|
||||
"repetition",
|
||||
"user_func_parallel",
|
||||
"exploration_parallel",
|
||||
"subexploration_parallel",
|
||||
"ari_sum",
|
||||
"time"
|
||||
)
|
||||
|
||||
repetition_number <- 3
|
||||
|
||||
conditions <- expand.grid(
|
||||
seq.int(3), c(TRUE),
|
||||
c(TRUE, FALSE), c(TRUE, FALSE)
|
||||
)
|
||||
|
||||
for (i in seq_len(nrow(conditions))) {
|
||||
cat(
|
||||
"\nCondition ", i, "/", nrow(conditions), "\n",
|
||||
"repetition:", conditions[i, 1],
|
||||
" -- user_func_parallel:", conditions[i, 2],
|
||||
"-- exploration_parallel:", conditions[i, 3],
|
||||
"-- subexploration_parallel:", conditions[i, 4],
|
||||
"\n"
|
||||
)
|
||||
t0 <- Sys.time()
|
||||
mybisbmpop <- estimate_colBiSBM(
|
||||
netlist = bipartite_collection_incidence,
|
||||
colsbm_model = "pirho",
|
||||
global_opts = list(
|
||||
nb_cores = parallel::detectCores() - 1,
|
||||
verbosity = 0,
|
||||
parallelization_vector = unlist(conditions[i,c(2,3,4)])
|
||||
)
|
||||
)
|
||||
t1 <- Sys.time()
|
||||
ari_sums <- sum(sapply(
|
||||
seq_along(mybisbmpop$best_fit$Z),
|
||||
function(m) {
|
||||
sum(
|
||||
aricode::ARI(
|
||||
Z[[m]][[1]],
|
||||
mybisbmpop$best_fit$Z[[m]][[1]]
|
||||
),
|
||||
aricode::ARI(
|
||||
Z[[m]][[2]],
|
||||
mybisbmpop$best_fit$Z[[m]][[2]]
|
||||
)
|
||||
)
|
||||
}
|
||||
))
|
||||
|
||||
results[nrow(results) + 1, ] <- c(
|
||||
conditions[i, 1],
|
||||
conditions[i, 2],
|
||||
conditions[i, 3],
|
||||
conditions[i, 4],
|
||||
ari_sums,
|
||||
t1 - t0
|
||||
)
|
||||
}
|
||||
|
||||
saveRDS(results,
|
||||
file = paste0("./simulation/data/parallel_levels_", Sys.time(), ".Rds")
|
||||
)
|
||||
BIN
Rcodes/simulation/data/NA_robustness_results-12-06-23_21-03.Rds
Normal file
BIN
Rcodes/simulation/data/NA_robustness_results-12-06-23_21-03.Rds
Normal file
Binary file not shown.
BIN
Rcodes/simulation/data/NA_robustness_results-13-06-23_16-07.Rds
Normal file
BIN
Rcodes/simulation/data/NA_robustness_results-13-06-23_16-07.Rds
Normal file
Binary file not shown.
BIN
Rcodes/simulation/data/NA_robustness_results-14-06-23_15-03.Rds
Normal file
BIN
Rcodes/simulation/data/NA_robustness_results-14-06-23_15-03.Rds
Normal file
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Some files were not shown because too many files have changed in this diff Show more
Loading…
Add table
Reference in a new issue