First commit
This commit is contained in:
commit
753179f341
420 changed files with 101172 additions and 0 deletions
301
.gitignore
vendored
Normal file
301
.gitignore
vendored
Normal file
|
|
@ -0,0 +1,301 @@
|
|||
## Core latex/pdflatex auxiliary files:
|
||||
*.aux
|
||||
*.lof
|
||||
*.log
|
||||
*.lot
|
||||
*.fls
|
||||
*.out
|
||||
*.toc
|
||||
*.fmt
|
||||
*.fot
|
||||
*.cb
|
||||
*.cb2
|
||||
.*.lb
|
||||
|
||||
## Intermediate documents:
|
||||
*.dvi
|
||||
*.xdv
|
||||
*-converted-to.*
|
||||
# these rules might exclude image files for figures etc.
|
||||
# *.ps
|
||||
# *.eps
|
||||
# *.pdf
|
||||
|
||||
## Generated if empty string is given at "Please type another file name for output:"
|
||||
.pdf
|
||||
|
||||
## Bibliography auxiliary files (bibtex/biblatex/biber):
|
||||
*.bbl
|
||||
*.bcf
|
||||
*.blg
|
||||
*-blx.aux
|
||||
*-blx.bib
|
||||
*.run.xml
|
||||
|
||||
## Build tool auxiliary files:
|
||||
*.fdb_latexmk
|
||||
*.synctex
|
||||
*.synctex(busy)
|
||||
*.synctex.gz
|
||||
*.synctex.gz(busy)
|
||||
*.pdfsync
|
||||
|
||||
## Build tool directories for auxiliary files
|
||||
# latexrun
|
||||
latex.out/
|
||||
|
||||
## Auxiliary and intermediate files from other packages:
|
||||
# algorithms
|
||||
*.alg
|
||||
*.loa
|
||||
|
||||
# achemso
|
||||
acs-*.bib
|
||||
|
||||
# amsthm
|
||||
*.thm
|
||||
|
||||
# beamer
|
||||
*.nav
|
||||
*.pre
|
||||
*.snm
|
||||
*.vrb
|
||||
|
||||
# changes
|
||||
*.soc
|
||||
|
||||
# comment
|
||||
*.cut
|
||||
|
||||
# cprotect
|
||||
*.cpt
|
||||
|
||||
# elsarticle (documentclass of Elsevier journals)
|
||||
*.spl
|
||||
|
||||
# endnotes
|
||||
*.ent
|
||||
|
||||
# fixme
|
||||
*.lox
|
||||
|
||||
# feynmf/feynmp
|
||||
*.mf
|
||||
*.mp
|
||||
*.t[1-9]
|
||||
*.t[1-9][0-9]
|
||||
*.tfm
|
||||
|
||||
#(r)(e)ledmac/(r)(e)ledpar
|
||||
*.end
|
||||
*.?end
|
||||
*.[1-9]
|
||||
*.[1-9][0-9]
|
||||
*.[1-9][0-9][0-9]
|
||||
*.[1-9]R
|
||||
*.[1-9][0-9]R
|
||||
*.[1-9][0-9][0-9]R
|
||||
*.eledsec[1-9]
|
||||
*.eledsec[1-9]R
|
||||
*.eledsec[1-9][0-9]
|
||||
*.eledsec[1-9][0-9]R
|
||||
*.eledsec[1-9][0-9][0-9]
|
||||
*.eledsec[1-9][0-9][0-9]R
|
||||
|
||||
# glossaries
|
||||
*.acn
|
||||
*.acr
|
||||
*.glg
|
||||
*.glo
|
||||
*.gls
|
||||
*.glsdefs
|
||||
*.lzo
|
||||
*.lzs
|
||||
*.slg
|
||||
*.slo
|
||||
*.sls
|
||||
|
||||
# uncomment this for glossaries-extra (will ignore makeindex's style files!)
|
||||
# *.ist
|
||||
|
||||
# gnuplot
|
||||
*.gnuplot
|
||||
*.table
|
||||
|
||||
# gnuplottex
|
||||
*-gnuplottex-*
|
||||
|
||||
# gregoriotex
|
||||
*.gaux
|
||||
*.glog
|
||||
*.gtex
|
||||
|
||||
# htlatex
|
||||
*.4ct
|
||||
*.4tc
|
||||
*.idv
|
||||
*.lg
|
||||
*.trc
|
||||
*.xref
|
||||
|
||||
# hyperref
|
||||
*.brf
|
||||
|
||||
# knitr
|
||||
*-concordance.tex
|
||||
# TODO Uncomment the next line if you use knitr and want to ignore its generated tikz files
|
||||
# *.tikz
|
||||
*-tikzDictionary
|
||||
|
||||
# listings
|
||||
*.lol
|
||||
|
||||
# luatexja-ruby
|
||||
*.ltjruby
|
||||
|
||||
# makeidx
|
||||
*.idx
|
||||
*.ilg
|
||||
*.ind
|
||||
|
||||
# minitoc
|
||||
*.maf
|
||||
*.mlf
|
||||
*.mlt
|
||||
*.mtc[0-9]*
|
||||
*.slf[0-9]*
|
||||
*.slt[0-9]*
|
||||
*.stc[0-9]*
|
||||
|
||||
# minted
|
||||
_minted*
|
||||
*.pyg
|
||||
|
||||
# morewrites
|
||||
*.mw
|
||||
|
||||
# newpax
|
||||
*.newpax
|
||||
|
||||
# nomencl
|
||||
*.nlg
|
||||
*.nlo
|
||||
*.nls
|
||||
|
||||
# pax
|
||||
*.pax
|
||||
|
||||
# pdfpcnotes
|
||||
*.pdfpc
|
||||
|
||||
# sagetex
|
||||
*.sagetex.sage
|
||||
*.sagetex.py
|
||||
*.sagetex.scmd
|
||||
|
||||
# scrwfile
|
||||
*.wrt
|
||||
|
||||
# svg
|
||||
svg-inkscape/
|
||||
|
||||
# sympy
|
||||
*.sout
|
||||
*.sympy
|
||||
sympy-plots-for-*.tex/
|
||||
|
||||
# pdfcomment
|
||||
*.upa
|
||||
*.upb
|
||||
|
||||
# pythontex
|
||||
*.pytxcode
|
||||
pythontex-files-*/
|
||||
|
||||
# tcolorbox
|
||||
*.listing
|
||||
|
||||
# thmtools
|
||||
*.loe
|
||||
|
||||
# TikZ & PGF
|
||||
*.dpth
|
||||
*.md5
|
||||
*.auxlock
|
||||
|
||||
# titletoc
|
||||
*.ptc
|
||||
|
||||
# todonotes
|
||||
*.tdo
|
||||
|
||||
# vhistory
|
||||
*.hst
|
||||
*.ver
|
||||
|
||||
# easy-todo
|
||||
*.lod
|
||||
|
||||
# xcolor
|
||||
*.xcp
|
||||
|
||||
# xmpincl
|
||||
*.xmpi
|
||||
|
||||
# xindy
|
||||
*.xdy
|
||||
|
||||
# xypic precompiled matrices and outlines
|
||||
*.xyc
|
||||
*.xyd
|
||||
|
||||
# endfloat
|
||||
*.ttt
|
||||
*.fff
|
||||
|
||||
# Latexian
|
||||
TSWLatexianTemp*
|
||||
|
||||
## Editors:
|
||||
# WinEdt
|
||||
*.bak
|
||||
*.sav
|
||||
|
||||
# Texpad
|
||||
.texpadtmp
|
||||
|
||||
# LyX
|
||||
*.lyx~
|
||||
|
||||
# Kile
|
||||
*.backup
|
||||
|
||||
# gummi
|
||||
.*.swp
|
||||
|
||||
# KBibTeX
|
||||
*~[0-9]*
|
||||
|
||||
# TeXnicCenter
|
||||
*.tps
|
||||
|
||||
# auto folder when using emacs and auctex
|
||||
./auto/*
|
||||
*.el
|
||||
|
||||
# expex forward references with \gathertags
|
||||
*-tags.tex
|
||||
|
||||
# standalone packages
|
||||
*.sta
|
||||
|
||||
# Makeindex log files
|
||||
*.lpz
|
||||
|
||||
# xwatermark package
|
||||
*.xwm
|
||||
|
||||
# REVTeX puts footnotes in the bibliography by default, unless the nofootinbib
|
||||
# option is specified. Footnotes are the stored in a file with suffix Notes.bib.
|
||||
# Uncomment the next line to have this generated file ignored.
|
||||
#*Notes.bib
|
||||
27
.pre-commit-config.yaml
Normal file
27
.pre-commit-config.yaml
Normal file
|
|
@ -0,0 +1,27 @@
|
|||
repos:
|
||||
- repo: https://github.com/jonasbb/pre-commit-latex-hooks
|
||||
rev: v1.4.0
|
||||
hooks:
|
||||
- id: american-eg-ie
|
||||
- id: cleveref-capitalization
|
||||
- id: consistent-spelling
|
||||
args:
|
||||
[
|
||||
"--emph=et al.",
|
||||
"--emph=a priori",
|
||||
"--emph=a posteriori",
|
||||
'--regex=naive=\bna(i|\\"i)ve',
|
||||
]
|
||||
#- id: csquotes
|
||||
# - id: ensure-labels-for-sections
|
||||
- id: no-space-in-cite
|
||||
# - id: tilde-cite
|
||||
# - id: unique-labels
|
||||
- id: cleveref-instead-of-autoref
|
||||
- repo: https://github.com/pre-commit/pre-commit-hooks
|
||||
rev: v3.3.0
|
||||
hooks:
|
||||
- id: check-merge-conflict
|
||||
- id: check-yaml
|
||||
- id: trailing-whitespace
|
||||
files: ".*\\.(?:tex|py)$"
|
||||
142
.vscode/settings.json
vendored
Normal file
142
.vscode/settings.json
vendored
Normal file
|
|
@ -0,0 +1,142 @@
|
|||
{
|
||||
"emeraldwalk.runonsave": {
|
||||
"commands": [
|
||||
{
|
||||
"match": ".*\\.Rmd",
|
||||
"isAsync": true,
|
||||
"cmd": "/bin/bash -c \"Rscript Rmd2Latex-fragment.R '${file}'\""
|
||||
},
|
||||
{
|
||||
"match": ".*",
|
||||
"isAsync": true,
|
||||
"cmd": "echo ${file} "
|
||||
}
|
||||
]
|
||||
},
|
||||
"latex-workshop.latex.tools": [
|
||||
{
|
||||
"name": "latexmk",
|
||||
"command": "latexmk",
|
||||
"args": [
|
||||
"-shell-escape",
|
||||
"-synctex=1",
|
||||
"-interaction=nonstopmode",
|
||||
"-file-line-error",
|
||||
"-pdf",
|
||||
"-outdir=%OUTDIR%",
|
||||
"%DOC%"
|
||||
],
|
||||
"env": {}
|
||||
},
|
||||
{
|
||||
"name": "lualatexmk",
|
||||
"command": "latexmk",
|
||||
"args": [
|
||||
"-synctex=1",
|
||||
"-interaction=nonstopmode",
|
||||
"-file-line-error",
|
||||
"-lualatex",
|
||||
"-outdir=%OUTDIR%",
|
||||
"%DOC%"
|
||||
],
|
||||
"env": {}
|
||||
},
|
||||
{
|
||||
"name": "xelatexmk",
|
||||
"command": "latexmk",
|
||||
"args": [
|
||||
"-synctex=1",
|
||||
"-interaction=nonstopmode",
|
||||
"-file-line-error",
|
||||
"-xelatex",
|
||||
"-outdir=%OUTDIR%",
|
||||
"%DOC%"
|
||||
],
|
||||
"env": {}
|
||||
},
|
||||
{
|
||||
"name": "latexmk_rconly",
|
||||
"command": "latexmk",
|
||||
"args": [
|
||||
"%DOC%"
|
||||
],
|
||||
"env": {}
|
||||
},
|
||||
{
|
||||
"name": "pdflatex",
|
||||
"command": "pdflatex",
|
||||
"args": [
|
||||
"-synctex=1",
|
||||
"-interaction=nonstopmode",
|
||||
"-file-line-error",
|
||||
"%DOC%"
|
||||
],
|
||||
"env": {}
|
||||
},
|
||||
{
|
||||
"name": "bibtex",
|
||||
"command": "bibtex",
|
||||
"args": [
|
||||
"%DOCFILE%"
|
||||
],
|
||||
"env": {}
|
||||
},
|
||||
{
|
||||
"name": "rnw2tex",
|
||||
"command": "Rscript",
|
||||
"args": [
|
||||
"-e",
|
||||
"knitr::opts_knit$set(concordance = TRUE); knitr::knit('%DOCFILE_EXT%')"
|
||||
],
|
||||
"env": {}
|
||||
},
|
||||
{
|
||||
"name": "jnw2tex",
|
||||
"command": "julia",
|
||||
"args": [
|
||||
"-e",
|
||||
"using Weave; weave(\"%DOC_EXT%\", doctype=\"tex\")"
|
||||
],
|
||||
"env": {}
|
||||
},
|
||||
{
|
||||
"name": "jnw2texminted",
|
||||
"command": "julia",
|
||||
"args": [
|
||||
"-e",
|
||||
"using Weave; weave(\"%DOC_EXT%\", doctype=\"texminted\")"
|
||||
],
|
||||
"env": {}
|
||||
},
|
||||
{
|
||||
"name": "pnw2tex",
|
||||
"command": "pweave",
|
||||
"args": [
|
||||
"-f",
|
||||
"tex",
|
||||
"%DOC_EXT%"
|
||||
],
|
||||
"env": {}
|
||||
},
|
||||
{
|
||||
"name": "pnw2texminted",
|
||||
"command": "pweave",
|
||||
"args": [
|
||||
"-f",
|
||||
"texminted",
|
||||
"%DOC_EXT%"
|
||||
],
|
||||
"env": {}
|
||||
},
|
||||
{
|
||||
"name": "tectonic",
|
||||
"command": "tectonic",
|
||||
"args": [
|
||||
"--synctex",
|
||||
"--keep-logs",
|
||||
"%DOC%.tex"
|
||||
],
|
||||
"env": {}
|
||||
}
|
||||
]
|
||||
}
|
||||
5
README.md
Normal file
5
README.md
Normal file
|
|
@ -0,0 +1,5 @@
|
|||
# Report for my 6 months intership at MIA Paris-Saclay Lab
|
||||
|
||||
I summarized in this report and presentation the work I performed at MIA Paris-Saclay Lab.
|
||||
|
||||
The following report may contain inaccurate observations, or wrong interpretations. If you notice such errors, don't hesitate to report them to me.
|
||||
287
Rcodes/real_data/CoOPLBM_completion_analyze.Rmd
Normal file
287
Rcodes/real_data/CoOPLBM_completion_analyze.Rmd
Normal file
|
|
@ -0,0 +1,287 @@
|
|||
---
|
||||
output:
|
||||
md_document:
|
||||
citation_package: biblatex
|
||||
---
|
||||
\subsection{Completing raw data using CoOPLBM~\parencite{anakokDisentanglingStructureEcological2022}}
|
||||
```{r, setup, include=FALSE, warning=FALSE}
|
||||
knitr::opts_chunk$set(echo = FALSE, dpi = 300)
|
||||
```
|
||||
|
||||
```{r libraries, echo = FALSE, include=FALSE}
|
||||
require("colSBM")
|
||||
require("aricode")
|
||||
require("ggplot2")
|
||||
```
|
||||
|
||||
```{r useful_functions, echo = FALSE}
|
||||
|
||||
if (getwd() == "/home/polarolouis/Nextcloud/Documents/APT/CEI/Stage Recherche Mathématiques/Depuis PC Portable/Stage MIA 2023/rapport-MIA-2023") {
|
||||
path_to_add <- "Rcodes/real_data/"
|
||||
} else {
|
||||
path_to_add <- ""
|
||||
}
|
||||
|
||||
extract_unlist <- function(data) {
|
||||
readRDS(data) |>
|
||||
extract_best_bipartite_partition() |>
|
||||
unlist()
|
||||
}
|
||||
|
||||
show_groups <- function(collection_list) {
|
||||
lapply(collection_list, function(collection) {
|
||||
collection$Q
|
||||
})
|
||||
}
|
||||
|
||||
partition_BICL <- function(collection_list) {
|
||||
sum(sapply(collection_list, function(collection) {
|
||||
collection$BICL
|
||||
}))
|
||||
}
|
||||
|
||||
extract_collection_clustering_id <- function(collection_list) {
|
||||
out_dataframe <- setNames(data.frame(matrix(nrow = 0, ncol = 2)), c("netid", "collection_id"))
|
||||
for (col_idx in seq_along(collection_list)) {
|
||||
for (net_id in collection_list[[col_idx]]$net_id) {
|
||||
out_dataframe[nrow(out_dataframe) + 1, ] <- c(net_id, col_idx)
|
||||
}
|
||||
}
|
||||
out_dataframe
|
||||
}
|
||||
|
||||
extract_full_models <- function(model_collections_list) {
|
||||
return(do.call(
|
||||
"rbind",
|
||||
lapply(
|
||||
seq_along(model_collections_list),
|
||||
function(idx) {
|
||||
model_name <- names(model_collections_list)[idx]
|
||||
df <- extract_collection_clustering_id(model_collections_list[[idx]])
|
||||
return(cbind(df, model = rep(model_name, length(df$netid))))
|
||||
}
|
||||
)
|
||||
))
|
||||
}
|
||||
|
||||
reorder_match_netids <- function(dataframe, target) {
|
||||
df <- dataframe[match(target, dataframe$netid), ]
|
||||
return(df)
|
||||
}
|
||||
|
||||
extract_full_reorder <- function(model_collections_list, target) {
|
||||
unordered <- extract_full_models(model_collections_list)
|
||||
return(do.call("rbind", lapply(
|
||||
names(model_collections_list),
|
||||
function(model_name) {
|
||||
return(reorder_match_netids(unordered[which(unordered$model == model_name), ], target))
|
||||
}
|
||||
)))
|
||||
}
|
||||
```
|
||||
|
||||
```{r data_importation, echo = FALSE}
|
||||
# Uncompleted
|
||||
uncompleted_model_list <- list(
|
||||
"iid" = extract_unlist(paste0(path_to_add, "data/dore_uncompleted_collection_clustering_nb_run_1_iid_70_networks_08-06-23-16:31:17.Rds")),
|
||||
"pi" = extract_unlist(paste0(path_to_add, "data/dore_uncompleted_collection_clustering_nb_run_1_pi_70_networks_08-06-23-16:52:16.Rds")),
|
||||
"rho" = extract_unlist(paste0(path_to_add, "data/dore_uncompleted_collection_clustering_nb_run_1_rho_70_networks_08-06-23-16:49:58.Rds")),
|
||||
"pirho" = extract_unlist(paste0(path_to_add, "data/dore_uncompleted_collection_clustering_nb_run_1_pirho_70_networks_08-06-23-16:41:33.Rds"))
|
||||
)
|
||||
|
||||
# Below we will need to have the netid in the same order so we choose to use the
|
||||
# uncompleted iid model order as reference
|
||||
netid_order <- extract_collection_clustering_id(uncompleted_model_list$iid)$netid
|
||||
model_order <- c("iid", "pi", "rho", "pirho")
|
||||
|
||||
uncompleted_clusterings <- extract_full_reorder(uncompleted_model_list, netid_order)
|
||||
|
||||
# 0.2 threshold
|
||||
point_2_model_list <- list(
|
||||
"iid" = extract_unlist(paste0(path_to_add, "data/dore_point_2_completed_collection_clustering_nb_run_1_iid_70_networks_07-06-23-18:40:10.Rds")),
|
||||
"pi" = extract_unlist(paste0(path_to_add, "data/dore_point_2_completed_collection_clustering_nb_run_1_pi_70_networks_07-06-23-19:22:19.Rds")),
|
||||
"rho" = extract_unlist(paste0(path_to_add, "data/dore_point_2_completed_collection_clustering_nb_run_1_rho_70_networks_07-06-23-20:03:53.Rds")),
|
||||
"pirho" = extract_unlist(paste0(path_to_add, "data/dore_point_2_completed_collection_clustering_nb_run_1_pirho_70_networks_07-06-23-21:09:12.Rds"))
|
||||
)
|
||||
point_2_clusterings <- extract_full_reorder(point_2_model_list, netid_order)
|
||||
|
||||
# 0.5 threshold
|
||||
point_5_model_list <- list(
|
||||
"iid" = extract_unlist(paste0(path_to_add, "data/dore_point_5_completed_collection_clustering_nb_run_1_iid_70_networks_07-06-23-19:19:53.Rds")),
|
||||
"pi" = extract_unlist(paste0(path_to_add, "data/dore_point_5_completed_collection_clustering_nb_run_1_pi_70_networks_07-06-23-21:31:20.Rds")),
|
||||
"rho" = extract_unlist(paste0(path_to_add, "data/dore_point_5_completed_collection_clustering_nb_run_1_rho_70_networks_07-06-23-21:03:50.Rds")),
|
||||
"pirho" = extract_unlist(paste0(path_to_add, "data/dore_point_5_completed_collection_clustering_nb_run_1_pirho_70_networks_07-06-23-21:13:10.Rds"))
|
||||
)
|
||||
point_5_clusterings <- extract_full_reorder(point_5_model_list, netid_order)
|
||||
|
||||
# Uniform re-sampled
|
||||
random_model_list <- list(
|
||||
"iid" = extract_unlist(paste0(path_to_add, "data/dore_random_completed_collection_clustering_nb_run_1_iid_70_networks_07-06-23-21:44:14.Rds")),
|
||||
"pi" = extract_unlist(paste0(path_to_add, "data/dore_random_completed_collection_clustering_nb_run_1_pi_70_networks_07-06-23-22:52:47.Rds")),
|
||||
"rho" = extract_unlist(paste0(path_to_add, "data/dore_random_completed_collection_clustering_nb_run_1_rho_70_networks_08-06-23-18:16:04.Rds")),
|
||||
"pirho" = extract_unlist(paste0(path_to_add, "data/dore_random_completed_collection_clustering_nb_run_1_pirho_70_networks_07-06-23-23:07:08.Rds"))
|
||||
)
|
||||
random_clusterings <- extract_full_reorder(random_model_list, netid_order)
|
||||
```
|
||||
|
||||
### Context of this analysis
|
||||
|
||||
After performing a netclustering on the raw data, we will see if the detect
|
||||
structure resulting in the clustering comes from the sampling effort. To test
|
||||
this we will use the CoOPLBM model by~\cite{anakokDisentanglingStructureEcological2022} to complete the data.
|
||||
|
||||
\emph{Note:}~\cite{anakokDisentanglingStructureEcological2022} provided data
|
||||
for the networks for which the method was applicable, this explains that
|
||||
there are fewer networks in the collections.
|
||||
|
||||
The CoOPLBM model assumes that the observed incidence matrix $R$ is an
|
||||
element-wise product of an $M$ matrix following an LBM and an $N$ matrix which
|
||||
elements follow Poisson distributions independent on $M$.
|
||||
|
||||
The model gives us the $\widehat{M}$ matrix, the elements of which are:
|
||||
|
||||
$$\widehat{M_{ij}} = \mathbb{P}(M_{ij} = 1)$$
|
||||
|
||||
Note that if $R_{ij} = 1$ then $\widehat{M_{ij}} = 1$
|
||||
|
||||
- 1 if the interaction was observed
|
||||
- a probability, that there should be an interaction but it wasn't observed
|
||||
|
||||
This *completed matrix* can be used in different manners to be fed to the colSBM
|
||||
model.
|
||||
|
||||
### Threshold based completions
|
||||
With the thresholds, the infered incidence matrix obtained by
|
||||
CoOPLBM is used to generate a completed incidence matrix by the following
|
||||
procedure :
|
||||
$$X_{ij} = \begin{cases}
|
||||
1 & \text{if the value is over the threshold} \\
|
||||
0 & \text{else} \\
|
||||
\end{cases}$$
|
||||
|
||||
#### 0.5 completed threshold
|
||||
```{r useful-functions, echo = FALSE, include=FALSE}
|
||||
ARI_netclustering_models <- function(
|
||||
clustering_compare,
|
||||
uncompleted_clustering = uncompleted_clustering,
|
||||
models = c("iid", "pi", "rho", "pirho"),
|
||||
models_names = c("$iid\\text{-}colSBM$", "$\\pi\\text{-}colSBM$", "$\\rho\\text{-}colSBM$", "$\\pi\\rho\\text{-}colSBM$")) {
|
||||
out <- sapply(models, function(model) {
|
||||
ARI(
|
||||
uncompleted_clusterings[
|
||||
which(uncompleted_clusterings$model == model),
|
||||
]$collection_id,
|
||||
clustering_compare[
|
||||
which(clustering_compare$model == model),
|
||||
]$collection_id
|
||||
)
|
||||
})
|
||||
names(out) <- models_names
|
||||
out
|
||||
}
|
||||
```
|
||||
|
||||
Here, the completion threshold is set to $0.5$.
|
||||
|
||||
First we will compute an ARI on the collection id given by the raw data and the
|
||||
completed matrix.
|
||||
|
||||
```{r 0.5_ARI, echo = FALSE}
|
||||
knitr::kable(ARI_netclustering_models(point_5_clusterings),
|
||||
col.names = c("ARI with uncompleted data"),
|
||||
escape = FALSE,
|
||||
booktabs = TRUE,
|
||||
digits = 2,
|
||||
position = "h!",
|
||||
caption = "\\label{tab:ari-table-0-5-completed} Table of ARI between 0.5 completed data and uncompleted data"
|
||||
)
|
||||
```
|
||||
|
||||
In the table \ref{tab:ari-table-0-5-completed}, one can see the network clustering obtained after applying
|
||||
CoOPLBM has not much in common with the clustering of the uncompleted data. Thus
|
||||
we can think that the completion changed significantly the interactions in
|
||||
the collections.
|
||||
|
||||
##### Number of sub-collections and details of each sub-collection
|
||||
```{r 0.5_partition_numbers, echo = FALSE}
|
||||
```
|
||||
|
||||
##### Supplementary information
|
||||
|
||||
```{r supinfo, echo = FALSE}
|
||||
supinfo <- readxl::read_xlsx(paste0(path_to_add, "data/supinfo.xlsx"), sheet = 2)
|
||||
interaction_data <- read.table(file = paste0(path_to_add, "data/interaction-data.txt"), sep = "\t", header = TRUE)
|
||||
|
||||
seq_ids_network_aggreg <- unique(interaction_data$id_network_aggreg)
|
||||
incidence_matrices <- readRDS(file = paste0(path_to_add, "data/dore-matrices.Rds"))
|
||||
names_aggreg_networks <- names(incidence_matrices)
|
||||
get_vector_clustering_net <- function(unlisted_model) {
|
||||
vectorClusteringNet <- numeric(nrow(supinfo))
|
||||
for (k in 1:length(unlisted_model)) {
|
||||
idclust <- match(unlisted_model[[k]]$net_id, names_aggreg_networks)
|
||||
supinfoclust <- match(seq_ids_network_aggreg[idclust], supinfo$Idweb)
|
||||
vectorClusteringNet[supinfoclust] <- k
|
||||
}
|
||||
vectorClusteringNet
|
||||
# [! vectorClusteringNet %in% 0] # Filtering the network not present in uncompleted data
|
||||
}
|
||||
```
|
||||
|
||||
```{r boxplot-function, echo = FALSE}
|
||||
supinfo_boxplot <- function(supinfo_vector, parameter, pretty_name) {
|
||||
return(ggplot(supinfo) +
|
||||
aes(
|
||||
x = factor(supinfo_vector), y = parameter,
|
||||
fill = as.factor(supinfo_vector), group = as.factor(supinfo_vector)
|
||||
) +
|
||||
geom_boxplot() +
|
||||
theme(axis.text.x = element_text(angle = 90, vjust = 0.5)) +
|
||||
labs(
|
||||
x = "Collection number", y = pretty_name,
|
||||
fill = "Collection number"
|
||||
))
|
||||
}
|
||||
```
|
||||
|
||||
```{r, supinfo_point_5}
|
||||
supinfo_point_5_iid <- get_vector_clustering_net(point_5_model_list[["iid"]])
|
||||
```
|
||||
|
||||
|
||||
|
||||
|
||||
### 0.2 completed threshold
|
||||
|
||||
The $0.2$ threshold adds a lot of interactions compared to raw matrix.
|
||||
|
||||
```{r 0.2_ARI, echo = FALSE, results="asis"}
|
||||
knitr::kable(ARI_netclustering_models(point_2_clusterings),
|
||||
col.names = c("ARI with uncompleted data"),
|
||||
escape = FALSE,
|
||||
booktabs = TRUE,
|
||||
digits = 2,
|
||||
position = "h!",
|
||||
caption = "\\label{tab:ari-table-0-2-completed} Table of ARI between 0.2 completed data and uncompleted data"
|
||||
)
|
||||
```
|
||||
|
||||
Same as for $0.5$, after applying CoOPLBM the obtained clustering doesn't match
|
||||
the uncompleted data.
|
||||
|
||||
### Sample based completions
|
||||
|
||||
The $M$ matrix is used to sample a new $X$ matrix which elements are the
|
||||
realisation of Bernoulli distributions of probability $M_{i,j}$.
|
||||
$$\mathbb{P}(X_{i,j} = 1) = M_{i,j} $$
|
||||
|
||||
```{r random_ARI, echo = FALSE, results="asis"}
|
||||
knitr::kable(ARI_netclustering_models(random_clusterings),
|
||||
col.names = c("ARI with uncompleted data"),
|
||||
escape = FALSE,
|
||||
booktabs = TRUE,
|
||||
digits = 2,
|
||||
position = "h!",
|
||||
caption = "\\label{tab:ari-table-random-completed} Table of ARI between
|
||||
randomly completed data and uncompleted data"
|
||||
)
|
||||
```
|
||||
135
Rcodes/real_data/CoOPLBM_completion_analyze.tex
Normal file
135
Rcodes/real_data/CoOPLBM_completion_analyze.tex
Normal file
|
|
@ -0,0 +1,135 @@
|
|||
\subsection{Completing raw data using CoOPLBM~\parencite{anakokDisentanglingStructureEcological2022}}
|
||||
|
||||
\hypertarget{context-of-this-analysis}{%
|
||||
\subsubsection{Context of this
|
||||
analysis}\label{context-of-this-analysis}}
|
||||
|
||||
After performing a netclustering on the raw data, we will see if the
|
||||
detect structure resulting in the clustering comes from the sampling
|
||||
effort. To test this we will use the CoOPLBM model
|
||||
by\textasciitilde{}\cite{anakokDisentanglingStructureEcological2022} to
|
||||
complete the data.
|
||||
|
||||
\emph{Note:}\textasciitilde{}\cite{anakokDisentanglingStructureEcological2022}
|
||||
provided data for the networks for which the method was applicable, this
|
||||
explains that there are fewer networks in the collections.
|
||||
|
||||
The CoOPLBM model assumes that the observed incidence matrix \(R\) is an
|
||||
element-wise product of an \(M\) matrix following an LBM and an \(N\)
|
||||
matrix which elements follow Poisson distributions independent on \(M\).
|
||||
|
||||
The model gives us the \(\widehat{M}\) matrix, the elements of which
|
||||
are:
|
||||
|
||||
\[\widehat{M_{ij}} = \mathbb{P}(M_{ij} = 1)\]
|
||||
|
||||
Note that if \(R_{ij} = 1\) then \(\widehat{M_{ij}} = 1\)
|
||||
|
||||
\begin{itemize}
|
||||
\tightlist
|
||||
\item
|
||||
1 if the interaction was observed
|
||||
\item
|
||||
a probability, that there should be an interaction but it wasn't
|
||||
observed
|
||||
\end{itemize}
|
||||
|
||||
This \emph{completed matrix} can be used in different manners to be fed
|
||||
to the colSBM model.
|
||||
|
||||
\hypertarget{threshold-based-completions}{%
|
||||
\subsubsection{Threshold based
|
||||
completions}\label{threshold-based-completions}}
|
||||
|
||||
With the thresholds, the infered incidence matrix obtained by CoOPLBM is
|
||||
used to generate a completed incidence matrix by the following procedure
|
||||
: \[X_{ij} = \begin{cases}
|
||||
1 & \text{if the value is over the threshold} \\
|
||||
0 & \text{else} \\
|
||||
\end{cases}\]
|
||||
|
||||
\hypertarget{completed-threshold}{%
|
||||
\paragraph{0.5 completed threshold}\label{completed-threshold}}
|
||||
|
||||
Here, the completion threshold is set to \(0.5\).
|
||||
|
||||
First we will compute an ARI on the collection id given by the raw data
|
||||
and the completed matrix.
|
||||
|
||||
\begin{table}[h!]
|
||||
|
||||
\caption{\label{tab:0.5_ARI}\label{tab:ari-table-0-5-completed} Table of ARI between 0.5 completed data and uncompleted data}
|
||||
\centering
|
||||
\begin{tabular}[t]{lr}
|
||||
\toprule
|
||||
& ARI with uncompleted data\\
|
||||
\midrule
|
||||
$iid\text{-}colSBM$ & 0.11\\
|
||||
$\pi\text{-}colSBM$ & 0.03\\
|
||||
$\rho\text{-}colSBM$ & 0.09\\
|
||||
$\pi\rho\text{-}colSBM$ & 0.22\\
|
||||
\bottomrule
|
||||
\end{tabular}
|
||||
\end{table}
|
||||
|
||||
In the table \ref{tab:ari-table-0-5-completed}, one can see the network
|
||||
clustering obtained after applying CoOPLBM has not much in common with
|
||||
the clustering of the uncompleted data. Thus we can think that the
|
||||
completion changed significantly the interactions in the collections.
|
||||
|
||||
\hypertarget{number-of-sub-collections-and-details-of-each-sub-collection}{%
|
||||
\subparagraph{Number of sub-collections and details of each
|
||||
sub-collection}\label{number-of-sub-collections-and-details-of-each-sub-collection}}
|
||||
|
||||
\hypertarget{supplementary-information}{%
|
||||
\subparagraph{Supplementary
|
||||
information}\label{supplementary-information}}
|
||||
|
||||
\hypertarget{completed-threshold-1}{%
|
||||
\subsubsection{0.2 completed threshold}\label{completed-threshold-1}}
|
||||
|
||||
The \(0.2\) threshold adds a lot of interactions compared to raw matrix.
|
||||
|
||||
\begin{table}[h!]
|
||||
|
||||
\caption{\label{tab:0.2_ARI}\label{tab:ari-table-0-2-completed} Table of ARI between 0.2 completed data and uncompleted data}
|
||||
\centering
|
||||
\begin{tabular}[t]{lr}
|
||||
\toprule
|
||||
& ARI with uncompleted data\\
|
||||
\midrule
|
||||
$iid\text{-}colSBM$ & 0.04\\
|
||||
$\pi\text{-}colSBM$ & 0.03\\
|
||||
$\rho\text{-}colSBM$ & 0.02\\
|
||||
$\pi\rho\text{-}colSBM$ & 0.04\\
|
||||
\bottomrule
|
||||
\end{tabular}
|
||||
\end{table}
|
||||
|
||||
Same as for \(0.5\), after applying CoOPLBM the obtained clustering
|
||||
doesn't match the uncompleted data.
|
||||
|
||||
\hypertarget{sample-based-completions}{%
|
||||
\subsubsection{Sample based
|
||||
completions}\label{sample-based-completions}}
|
||||
|
||||
The \(M\) matrix is used to sample a new \(X\) matrix which elements are
|
||||
the realisation of Bernoulli distributions of probability \(M_{i,j}\).
|
||||
\[\mathbb{P}(X_{i,j} = 1) = M_{i,j} \]
|
||||
|
||||
\begin{table}[h!]
|
||||
|
||||
\caption{\label{tab:random_ARI}\label{tab:ari-table-random-completed} Table of ARI between
|
||||
randomly completed data and uncompleted data}
|
||||
\centering
|
||||
\begin{tabular}[t]{lr}
|
||||
\toprule
|
||||
& ARI with uncompleted data\\
|
||||
\midrule
|
||||
$iid\text{-}colSBM$ & 0.01\\
|
||||
$\pi\text{-}colSBM$ & 0.03\\
|
||||
$\rho\text{-}colSBM$ & 0.01\\
|
||||
$\pi\rho\text{-}colSBM$ & 0.02\\
|
||||
\bottomrule
|
||||
\end{tabular}
|
||||
\end{table}
|
||||
158
Rcodes/real_data/application_dore.Rmd
Normal file
158
Rcodes/real_data/application_dore.Rmd
Normal file
|
|
@ -0,0 +1,158 @@
|
|||
# Application to \cite{doreRelativeEffectsAnthropogenic2021} data
|
||||
\label{sec:application-to-dorerelativeeffectsanthropogenic2021-data}
|
||||
|
||||
```{r, setup, include=FALSE, warning=FALSE}
|
||||
knitr::opts_chunk$set(echo = FALSE, dpi = 300)
|
||||
```
|
||||
|
||||
```{r}
|
||||
# import fix
|
||||
if (getwd() == "/home/polarolouis/Nextcloud/Documents/APT/CEI/Stage Recherche Mathématiques/Depuis PC Portable/Stage MIA 2023/rapport-MIA-2023") {
|
||||
path_to_add <- "Rcodes/real_data/"
|
||||
} else {
|
||||
path_to_add <- ""
|
||||
}
|
||||
```
|
||||
|
||||
```{r require_lib, echo = FALSE, include=FALSE, warning=FALSE}
|
||||
require("tidyverse")
|
||||
require("knitr")
|
||||
require("colSBM")
|
||||
require("ggplot2")
|
||||
require("patchwork")
|
||||
source(paste0(path_to_add, "temporary_plot.R"))
|
||||
```
|
||||
|
||||
```{r better_collection_extraction, echo = FALSE, warning=FALSE}
|
||||
extract_unlist_reorder <- function(clustering_data_path) {
|
||||
clustering <- readRDS(clustering_data_path)
|
||||
if (!is.list(clustering)) {
|
||||
clustering <- list(clustering)
|
||||
}
|
||||
best_partition <- extract_best_bipartite_partition(clustering)
|
||||
best_partition_unlist <- unlist(best_partition)
|
||||
if (!is.list(best_partition_unlist)) {
|
||||
best_partition_unlist <- list(best_partition_unlist)
|
||||
}
|
||||
size <- length(best_partition_unlist)
|
||||
return(setNames(lapply(best_partition_unlist, function(collection) {
|
||||
reorder_parameters(collection)
|
||||
}), paste(rep("Collection", size), seq_len(size))))
|
||||
}
|
||||
```
|
||||
|
||||
```{r load data, echo = FALSE, include = FALSE, warning=FALSE}
|
||||
# All results
|
||||
iid_unlist <- extract_unlist_reorder(paste0(path_to_add, "data/dore_collection_clustering_nb_run1_iid_123networks_24-05-23-21:40:42.Rds"))
|
||||
```
|
||||
|
||||
Here we apply the network clustering procedure (we refer to it as *netclustering*)
|
||||
to the data from \cite{doreRelativeEffectsAnthropogenic2021}. These data are
|
||||
plant-pollinator bipartite networks from differents areas and times.
|
||||
|
||||
In a second part we will use additional information for the networks to
|
||||
try to identify the impact and correlations with the observed structures.
|
||||
|
||||
## Netclustering with the $iid\text{-}colBiSBM$ model
|
||||
We obtained the more interpretable results with $iid\text{-}colBiSBM$ model.
|
||||
This resulted in `r length(iid_unlist)` collections to partition the $M = 123$
|
||||
networks.
|
||||
|
||||
```{r meso-plots, echo = FALSE, results='asis'}
|
||||
#| fig.cap=sapply(seq_along(iid_unlist), function(idx) paste0("Collection N°", idx)),
|
||||
##| fig.cap = "Structure of the collections in the partition and respective proportions of blocks",
|
||||
##| fig.subcap = sapply(seq_along(iid_unlist), function(idx) paste0("Collection N°", idx)),
|
||||
#| fig.asp = 0.5
|
||||
|
||||
|
||||
meso_print <- function(unlisted_partition) {
|
||||
for (idx in seq_along(unlisted_partition)) {
|
||||
print(plot(unlisted_partition[[idx]], type = "meso", mixture = TRUE) + ggtitle(paste("Collection ", idx)))
|
||||
cat("\\newline")
|
||||
}
|
||||
}
|
||||
meso_print(iid_unlist)
|
||||
|
||||
```
|
||||
|
||||
In all the obtained collections the structure is the classical nested structure.
|
||||
As this is a well-known structure for plant-pollinator data this tends to
|
||||
indicate that we are not going in a wrong direction.
|
||||
|
||||
The \nth{3} collection consists of only one network, indicating that for this
|
||||
model, the small76 network was really different of all the others.
|
||||
One reason might be that it's the oldest network and maybe the data collection
|
||||
protocol is different.
|
||||
|
||||
## Comparison with additional information
|
||||
|
||||
```{r supinfo, echo = FALSE}
|
||||
supinfo <- readxl::read_xlsx(paste0(path_to_add, "data/supinfo.xlsx"), sheet = 2)
|
||||
interaction_data <- read.table(file = paste0(path_to_add, "data/interaction-data.txt"), sep = "\t", header = TRUE)
|
||||
|
||||
seq_ids_network_aggreg <- unique(interaction_data$id_network_aggreg)
|
||||
incidence_matrices <- readRDS(file = paste0(path_to_add, "data/dore-matrices.Rds"))
|
||||
names_aggreg_networks <- names(incidence_matrices)
|
||||
vectorClusteringNet <- numeric(nrow(supinfo))
|
||||
for (k in 1:length(iid_unlist)) {
|
||||
idclust <- match(iid_unlist[[k]]$net_id, names_aggreg_networks)
|
||||
supinfoclust <- match(seq_ids_network_aggreg[idclust], supinfo$Idweb)
|
||||
vectorClusteringNet[supinfoclust] <- k
|
||||
}
|
||||
```
|
||||
|
||||
Using supplementary information we obtain the following boxplots.
|
||||
|
||||
|
||||
```{r boxplot-function, echo = FALSE}
|
||||
supinfo_boxplot <- function(parameter, pretty_name) {
|
||||
return(ggplot(supinfo) +
|
||||
aes(
|
||||
x = vectorClusteringNet, y = parameter,
|
||||
fill = as.factor(vectorClusteringNet), group = as.factor(vectorClusteringNet)
|
||||
) +
|
||||
geom_boxplot() +
|
||||
labs(
|
||||
x = "Collection number", y = pretty_name,
|
||||
fill = "Collection number"
|
||||
))
|
||||
}
|
||||
```
|
||||
|
||||
```{r boxplots_annual_timespan, echo = FALSE}
|
||||
#| fig.cap = "\\label{fig:boxplot-annual-time-span}Boxplot of annual time span in function of the collection number"
|
||||
ggplot(supinfo) +
|
||||
aes(x = vectorClusteringNet, y = Annual_time_span,
|
||||
fill = as.factor(vectorClusteringNet), group = as.factor(vectorClusteringNet)) +
|
||||
geom_boxplot() +
|
||||
labs(
|
||||
x = "Collection number", y = "Annual time span",
|
||||
fill = "Collection number"
|
||||
)
|
||||
```
|
||||
|
||||
The annual time span is the number of days the sampling period lasted. So we can
|
||||
thus see in figure \ref{fig:boxplot-annual-time-span} that collections 1 and 4 were
|
||||
sampled for a larger period of time than collections 2 and 5.
|
||||
This could explain observed differences in the structure detected : the
|
||||
"checkerboard" appearance of the alpha matrices representations may represent
|
||||
interactions that only occurs at a given period of time.
|
||||
Thus the shorter time period doesn't capture such interactions.
|
||||
|
||||
```{r boxplot_rainfall, echo = FALSE}
|
||||
#| fig.cap = "\\label{fig:boxplot-total-rainfall}Boxplot of total rainfall in function of the collection number"
|
||||
supinfo_boxplot(supinfo$Tot_Rainfall_IPCC, "Total rainfall")
|
||||
```
|
||||
|
||||
There seems to be the same trend for the total rainfall.
|
||||
|
||||
```{r boxplot_sampling_effort, echo = FALSE}
|
||||
#| fig.cap = "\\label{fig:boxplot-sampling-effort}Boxplot of the sampling effort in function of the collection number"
|
||||
supinfo_boxplot(supinfo$Sampling_effort, "Sampling effort")
|
||||
```
|
||||
|
||||
The sampling effort seems to be quite higher for collection 5 and a little
|
||||
higher for collection 2. And collection 1 and 4 have the inverse trend. The
|
||||
separation between collections 1,4 and 2,5 seems to still hold. And the sampling
|
||||
effort is related to the sampling time that is why it's higher for the
|
||||
collections that were sampled for a shorter time period.
|
||||
80
Rcodes/real_data/application_dore.tex
Normal file
80
Rcodes/real_data/application_dore.tex
Normal file
|
|
@ -0,0 +1,80 @@
|
|||
\hypertarget{application-to-data}{%
|
||||
\section{\texorpdfstring{Application to
|
||||
\cite{doreRelativeEffectsAnthropogenic2021}
|
||||
data}{Application to data}}\label{application-to-data}}
|
||||
|
||||
\label{sec:application-to-dorerelativeeffectsanthropogenic2021-data}
|
||||
|
||||
Here we apply the network clustering procedure (we refer to it as
|
||||
\emph{netclustering}) to the data from
|
||||
\cite{doreRelativeEffectsAnthropogenic2021}. These data are
|
||||
plant-pollinator bipartite networks from differents areas and times.
|
||||
|
||||
In a second part we will use additional information for the networks to
|
||||
try to identify the impact and correlations with the observed
|
||||
structures.
|
||||
|
||||
\hypertarget{netclustering-with-the-iidtext-colbisbm-model}{%
|
||||
\subsection{\texorpdfstring{Netclustering with the
|
||||
\(iid\text{-}colBiSBM\)
|
||||
model}{Netclustering with the iid\textbackslash text\{-\}colBiSBM model}}\label{netclustering-with-the-iidtext-colbisbm-model}}
|
||||
|
||||
We obtained the more interpretable results with \(iid\text{-}colBiSBM\)
|
||||
model. This resulted in 5 collections to partition the \(M = 123\)
|
||||
networks.
|
||||
|
||||
\includegraphics{./img/22d3409f045c956ffc0773e508871c61db4ad1e9.png}\newline\includegraphics{./img/2859d1c94af6539cced6aee6ee6bf6d49498518d.png}\newline\includegraphics{./img/037bcbcbc85f8a9562f98706ad7766c4099516ef.png}\newline\includegraphics{./img/f730f05cb60a7cdc837102601660f03edd767a60.png}\newline\includegraphics{./img/90d21c2459f68c2a6bc6cce93f9f1e10c3f0fef5.png}\newline
|
||||
|
||||
In all the obtained collections the structure is the classical nested
|
||||
structure. As this is a well-known structure for plant-pollinator data
|
||||
this tends to indicate that we are not going in a wrong direction.
|
||||
|
||||
The \nth{3} collection consists of only one network, indicating that for
|
||||
this model, the small76 network was really different of all the others.
|
||||
One reason might be that it's the oldest network and maybe the data
|
||||
collection protocol is different.
|
||||
|
||||
\hypertarget{comparison-with-additional-information}{%
|
||||
\subsection{Comparison with additional
|
||||
information}\label{comparison-with-additional-information}}
|
||||
|
||||
Using supplementary information we obtain the following boxplots.
|
||||
|
||||
\begin{figure}
|
||||
\centering
|
||||
\includegraphics{./img/de77b630fb66744d3a3ed68e45be765532d1eb0f.png}
|
||||
\caption{\label{fig:boxplot-annual-time-span}Boxplot of annual time span
|
||||
in function of the collection number}
|
||||
\end{figure}
|
||||
|
||||
The annual time span is the number of days the sampling period lasted.
|
||||
So we can thus see in figure \ref{fig:boxplot-annual-time-span} that
|
||||
collections 1 and 4 were sampled for a larger period of time than
|
||||
collections 2 and 5. This could explain observed differences in the
|
||||
structure detected : the ``checkerboard'' appearance of the alpha
|
||||
matrices representations may represent interactions that only occurs at
|
||||
a given period of time. Thus the shorter time period doesn't capture
|
||||
such interactions.
|
||||
|
||||
\begin{figure}
|
||||
\centering
|
||||
\includegraphics{./img/5bbc4b4b07c0e990a3ae2755165958ffbf517902.png}
|
||||
\caption{\label{fig:boxplot-total-rainfall}Boxplot of total rainfall in
|
||||
function of the collection number}
|
||||
\end{figure}
|
||||
|
||||
There seems to be the same trend for the total rainfall.
|
||||
|
||||
\begin{figure}
|
||||
\centering
|
||||
\includegraphics{./img/c75a33aa046b6f1bbcff45268346c4ec39067917.png}
|
||||
\caption{\label{fig:boxplot-sampling-effort}Boxplot of the sampling
|
||||
effort in function of the collection number}
|
||||
\end{figure}
|
||||
|
||||
The sampling effort seems to be quite higher for collection 5 and a
|
||||
little higher for collection 2. And collection 1 and 4 have the inverse
|
||||
trend. The separation between collections 1,4 and 2,5 seems to still
|
||||
hold. And the sampling effort is related to the sampling time that is
|
||||
why it's higher for the collections that were sampled for a shorter time
|
||||
period.
|
||||
BIN
Rcodes/real_data/data/Data.rds
Executable file
BIN
Rcodes/real_data/data/Data.rds
Executable file
Binary file not shown.
36188
Rcodes/real_data/data/DataTotherbi_updateJune2023.csv
Normal file
36188
Rcodes/real_data/data/DataTotherbi_updateJune2023.csv
Normal file
File diff suppressed because it is too large
Load diff
BIN
Rcodes/real_data/data/completed0.2.rds
Executable file
BIN
Rcodes/real_data/data/completed0.2.rds
Executable file
Binary file not shown.
BIN
Rcodes/real_data/data/completed0.5.rds
Executable file
BIN
Rcodes/real_data/data/completed0.5.rds
Executable file
Binary file not shown.
BIN
Rcodes/real_data/data/completedrandom.rds
Executable file
BIN
Rcodes/real_data/data/completedrandom.rds
Executable file
Binary file not shown.
BIN
Rcodes/real_data/data/dore-matrices.Rds
Normal file
BIN
Rcodes/real_data/data/dore-matrices.Rds
Normal file
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
55277
Rcodes/real_data/data/interaction-data.txt
Normal file
55277
Rcodes/real_data/data/interaction-data.txt
Normal file
File diff suppressed because it is too large
Load diff
|
|
@ -0,0 +1,162 @@
|
|||
publi;web;latitude;longitude;locationtot;feedingguild1;feedingguild2;samplingyear
|
||||
adaime;adaime2017_A;2.5;-51.5;terra firme forest, floodable areas and savannas, Cal<61>oene municipality, Amapa State, Brazil;fruit eater;NoOrthoptera;2013
|
||||
adaime;adaime2017_B;3.5;-51.8;terra firme forest, floodable areas and savannas, Oiapoque municipality, Amapa State, Brazil;fruit eater;NoOrthoptera;2013
|
||||
Aldryhim;Aldryhim1985;24;-46; Saudi Arabia, Saudi Arabia;sap sucker;NoOrthoptera;1995
|
||||
basset;basset1996;-7.4;146.73;grasslands and forest patches, dominated by secondary forests, Mt Kaindi, Wau, Papua New Guinea;leaf chewing;NoOrthoptera;1993
|
||||
bergamini;bergamini2016;-17.6;-43.6;grasslands, Brazilian Cerrado savannas, Brazil;flower eater;NoOrthoptera;2003
|
||||
bluthgen;bluthgen2006;4.97;117.8;mature lowland evergreen dipterocarp forest , Danun Valley conservation area, Danum Valley, Sabah, Malaysia;leaf chewing;NoOrthoptera;2004
|
||||
bodner;bodner2009;-3.97;-79.98;pristine montane forest, Reserva biologica San Francisco, Zamora-Chinchipe, Ecuador ;leaf chewing;NoOrthoptera;2006
|
||||
Brehm;Brehm2003;-3.97;-79.08;primary disturbed montane rainforest and early successional stages after human disturbances, Estacion Cientifica San Francisco, Zamora Chinchipe, South Ecuador;mixed;NoOrthoptera;2000
|
||||
Brown;Brown2019;14;99.7;lowland seasonal evergreen rainforest, Khao Chong Botanical Garden, Thailand;mixed;NoOrthoptera;2015
|
||||
coley;coley2006;9;-80;moist tropical lowland forest, Barro Colorado Island, Panama;leaf chewing;NoOrthoptera;1999
|
||||
cuevas-reyes;cuevas-reyes;19.49;-104.99;tropical deciduous forest and patches of tropical riparian forest, Chamela-Cuixmala Biosphere reserve, Pacific coast of Jalisco, Mexico;gall maker;NoOrthoptera;2001
|
||||
dyer&gentry;dyer&gentry2002;10.43;-83.98;wetland, La Selva biological station, Costa Rica;leaf chewing;NoOrthoptera;2002
|
||||
garcia&robledo;garcia&robledo2013;10.43;-83.98;aseasonal tropical wet forest, La Sevla Biological Station, Costa Rica;leaf chewing;NoOrthoptera;2011
|
||||
Hackett2019;Hackett2019_HH;50.72;-1.75;ocean-adjacent peninsula consisting of a mosaic of habitats, Hengistbury Head, UK;mixed;NoOrthoptera;2013
|
||||
Hackett2019;Hackett2019_TP;-46.59;169.43;ocean-adjacent peninsula consisting of a mosaic of habitats, Tautuku Peninsula, New Zealand;mixed;NoOrthoptera;2015
|
||||
heleno;heleno2009;37.78;-25.22;native vegetation was cleared for pastures, replaced by production forest or taken over by weeds, Serra da Tronqueira, Sao Miguel, Azores archipelago, Portugal;mixed;NoOrthoptera;2006
|
||||
hemidi;hemidi2013;34.8;5.73; Biskra, Alg<6C>rie;sap sucker;NoOrthoptera;2011
|
||||
henneman&memmott;henneman&memmott1;22.09;-159.56;isolated from agricultural areas, Alakai Swamp, island of Kauai, Hawaii;leaf chewing;NoOrthoptera;2000
|
||||
henneman&memmott;henneman&memmott2;22.09;-159.56;wetland, Alakai Swamp, island of Kauai, Hawaii;leaf chewing;NoOrthoptera;2000
|
||||
Ibanez;Ibanez2013;45.03;6.4;subalpine grasslands, Vilar d'Ar<41>ne, central french alps;leaf chewing;Orthoptera;2011
|
||||
Idechil;Idechil2007;7.5;134.5; Republic of Palau, islands of Palau;sap sucker;NoOrthoptera;2004
|
||||
janzen;janzen1980;10.8;-85.7;deciduous forest with moister habitats , Santa Rosa National Park, Costa Rica;frugivore_seed predator;NoOrthoptera;1980
|
||||
janzen;janzen2003;10.83;-85.6;rainforest, Area de conservation Guanacaste, Costa Rica;leaf chewing;NoOrthoptera;2003
|
||||
joern;joern_altuda;30.4;-103.6;mountainous area with arid grassland plant species, Altuda and Marathon, near Alpine, Texas;leaf chewing;Orthoptera;1975
|
||||
joern;joern_marathon;30.4;-103.6;arid grassland, Alpine, Texas, USA;leaf chewing;Orthoptera;1975
|
||||
Joern1985;Joern1985;41.578544;-101.707547;Arapaho Prairie, upland Sand Hills grassland, Arthur County, Nebraska, USA;leaf chewing;Orthoptera;1985
|
||||
kim&choi;kim&choi2021_A;35.41;127.48;temperate forest, mixed deciduous forest, Sangseonam hornbeam tree forest, South Korea;leaf chewing;NoOrthoptera;2016
|
||||
kim&choi;kim&choi2021_B;35.37;127.57;temperate forest, mixed deciduous forest, Mt Jirisan, banseon oak tree forest, South Korea;leaf chewing;NoOrthoptera;2016
|
||||
Knuff;Knuff2019;47.998955;8.205064;managed forests, southern Black Forest, southwestern Germany;gall maker;NoOrthoptera;2017
|
||||
kollars;kollars2013;48.32;18.1;alluvial soil with high content of clay fraction, Botanical garden of Slovak University of Agriculture, Nitra, Solavakia;mixed;NoOrthoptera;2012
|
||||
Konig2022;Konig2022;47.592457;12.940233;open grassland sites on calcareous bedrock, National Park Berchtesgaden and Lower Franconia, Bavaria, Germany;leaf chewing;Orthoptera;2019
|
||||
lewis;lewis2002;17.07;-88.69;moist tropical forest, Chiquibul Forest reserve, Cayo district, Belize, Central America;leaf miner;NoOrthoptera;1998
|
||||
Macfadyen;Macfadyen_A1;51.4;-2.69;temperate grassland, 10 organic farms in UK, south west England;mixed;NoOrthoptera;2006
|
||||
Macfadyen;Macfadyen_A10;51;-2.49;temperate grassland, 10 organic farms in UK, south west England;mixed;NoOrthoptera;2006
|
||||
Macfadyen;Macfadyen_A2;51.55;-2.49;temperate grassland, 10 organic farms in UK, south west England;mixed;NoOrthoptera;2006
|
||||
Macfadyen;Macfadyen_A3;51.51;-2.42;temperate grassland, 10 organic farms in UK, south west England;mixed;NoOrthoptera;2006
|
||||
Macfadyen;Macfadyen_A4;51.52;-2.49;temperate grassland, 10 organic farms in UK, south west England;mixed;NoOrthoptera;2006
|
||||
Macfadyen;Macfadyen_A5;51.51;-2.5;temperate grassland, 10 organic farms in UK, south west England;mixed;NoOrthoptera;2006
|
||||
Macfadyen;Macfadyen_A6;51.52;-2.48;temperate grassland, 10 organic farms in UK, south west England;mixed;NoOrthoptera;2006
|
||||
Macfadyen;Macfadyen_A7;51.32;-2.35;temperate grassland, 10 organic farms in UK, south west England;mixed;NoOrthoptera;2006
|
||||
Macfadyen;Macfadyen_A8;51.36;-2.47;temperate grassland, 10 organic farms in UK, south west England;mixed;NoOrthoptera;2006
|
||||
Macfadyen;Macfadyen_A9;51.32;-2.34;temperate grassland, 10 organic farms in UK, south west England;mixed;NoOrthoptera;2006
|
||||
Macfadyen;Macfadyen_B1;5.421563;-2.6801987;temperate grassland, 10 organic farms in UK, south west England;mixed;NoOrthoptera;2006
|
||||
Macfadyen;Macfadyen_B2;51.291623;-2.6028854;pasture fields, 10 organic farms in UK, south west England;mixed;NoOrthoptera;2006
|
||||
Macfadyen;Macfadyen_B3;51.29728;-2.4066504;pasture fields, 10 organic farms in UK, south west England;mixed;NoOrthoptera;2006
|
||||
Macfadyen;Macfadyen_B4;51.126799;-2.3271997;pasture fields, 10 organic farms in UK, south west England;mixed;NoOrthoptera;2006
|
||||
Macfadyen;Macfadyen_B5;51.069394;-2.4322512;pasture fields, 10 organic farms in UK, south west England;mixed;NoOrthoptera;2006
|
||||
Macfadyen;Macfadyen_B6;51.614902;-2.3436984;pasture fields, 10 organic farms in UK, south west England;mixed;NoOrthoptera;2006
|
||||
Macfadyen;Macfadyen_B7;51.694362;-2.1084028;pasture fields, 10 organic farms in UK, south west England;mixed;NoOrthoptera;2006
|
||||
Macfadyen;Macfadyen_B8;51.638776;-2.1769368;pasture fields, 10 organic farms in UK, south west England;mixed;NoOrthoptera;2006
|
||||
Macfadyen;Macfadyen_B9;51.685258;-2.0148078;pasture fields, 10 organic farms in UK, south west England;mixed;NoOrthoptera;2006
|
||||
Macfadyen;Macfadyen_B10;51.142195;-2.2890993;pasture fields, 10 organic farms in UK, south west England;mixed;NoOrthoptera;2006
|
||||
martins;martins2020;-17.6;-43.6;grasslands, Brazilian Cerrado , Brazil;flower eater;NoOrthoptera;2005
|
||||
Masetti;Masetti2004_A;44.16;12.22;near a 5 yr hedgerow, potato, wheat, alfalfa and wine grapes, Centro Ricerche produzioni Vegetali, Bologna Province, Italia;leaf miner;NoOrthoptera;1999
|
||||
Masetti;Masetti2004_B;45;12;near a 4 yr hedgerow, corn, wheat and beets, Bologna Province, Italia, Bologna Province, Italia;leaf miner;NoOrthoptera;1999
|
||||
Masetti;Masetti2004_C;45;12;near a 6 yr hederow and a ditch, Bologna Province, Italia, Bologna Province, Italia;leaf miner;NoOrthoptera;1999
|
||||
massa;massa2001_A;37.6;14; Sicily, Italy;leaf miner;NoOrthoptera;2000
|
||||
massa;massa2001_B;30;35; Al Bahhath, Aqaba and Dana Village, Jordan;leaf miner;NoOrthoptera;1999
|
||||
memmott;memmott1994;10.83;-85.7;tropical dry forest, Santa Rosa National Park, Guanacaste, Costa Rica;leaf miner;NoOrthoptera;1990
|
||||
muller;muller1999;51.4;-0.64;heavily grazed field, Silwood Park, Berkshire, England;sap sucker;NoOrthoptera;1995
|
||||
nakagawa;nakagawa_1;4.33;113.83;humult and udult soils, Lambir Hills National Park, Sarawak, Malaysia;seed predator;NoOrthoptera;1998
|
||||
nakagawa;nakagawa_2;4.33;113.83;tropical lowland forest, Lambir Hills National Park, Malaysia;seed predator;NoOrthoptera;1998
|
||||
novotny1;novotny2005;-5.23;145.4;primary and secondary forests, Madang province, Papua New Guinea;fruit eater;NoOrthoptera;2001
|
||||
novotny2;novotny2012;-5.23;145.4;tropical lowland forest, Madang province, Papua New Guinea;mixed;NoOrthoptera;2008
|
||||
pearson&altermatt;pearson&altermatt2013;48.54;9.04; Baden W<>rttemberg, Germany;leaf chewing;NoOrthoptera;2005
|
||||
Peralta;Peralta2017;-41.49228;173.022973;native Nothofagaceae forest and exotic pine plantations, native and exotic plantation forests, New Zealand;leaf chewing;NoOrthoptera;2011
|
||||
Pitteloud;Pitteloud2020_B1;46.25788;7.02235;alpine open grasslands, open grasslands in Swiss Alps, Switzerland;leaf chewing;Orthoptera;2016
|
||||
Pitteloud;Pitteloud2020_B2;46.26914;7.03241;alpine open grasslands, open grasslands in Swiss Alps, Switzerland;leaf chewing;Orthoptera;2016
|
||||
Pitteloud;Pitteloud2020_B3;46.27424;7.04833;alpine open grasslands, open grasslands in Swiss Alps, Switzerland;leaf chewing;Orthoptera;2016
|
||||
Pitteloud;Pitteloud2020_B4;46.28629;7.0957;alpine open grasslands, open grasslands in Swiss Alps, Switzerland;leaf chewing;Orthoptera;2016
|
||||
Pitteloud;Pitteloud2020_B5.1;46.28567;7.12451;alpine open grasslands, open grasslands in Swiss Alps, Switzerland;leaf chewing;Orthoptera;2016
|
||||
Pitteloud;Pitteloud2020_B5.2;46.29149;7.11251;alpine open grasslands, open grasslands in Swiss Alps, Switzerland;leaf chewing;Orthoptera;2016
|
||||
Pitteloud;Pitteloud2020_B6;46.26865;7.10755;alpine open grasslands, open grasslands in Swiss Alps, Switzerland;leaf chewing;Orthoptera;2016
|
||||
Pitteloud;Pitteloud2020_B7;46.27683;7.15518;alpine open grasslands, open grasslands in Swiss Alps, Switzerland;leaf chewing;Orthoptera;2016
|
||||
Pitteloud;Pitteloud2020_B8;46.26852;7.16232;alpine open grasslands, open grasslands in Swiss Alps, Switzerland;leaf chewing;Orthoptera;2016
|
||||
Pitteloud;Pitteloud2020_C1;46.86999;9.5166;alpine open grasslands, open grasslands in Swiss Alps, Switzerland;leaf chewing;Orthoptera;2016
|
||||
Pitteloud;Pitteloud2020_C2;46.87022;9.50919;alpine open grasslands, open grasslands in Swiss Alps, Switzerland;leaf chewing;Orthoptera;2016
|
||||
Pitteloud;Pitteloud2020_C3;46.8743;9.5083;alpine open grasslands, open grasslands in Swiss Alps, Switzerland;leaf chewing;Orthoptera;2016
|
||||
Pitteloud;Pitteloud2020_C4;46.87021;9.48978;alpine open grasslands, open grasslands in Swiss Alps, Switzerland;leaf chewing;Orthoptera;2016
|
||||
Pitteloud;Pitteloud2020_C5;46.87841;9.49396;alpine open grasslands, open grasslands in Swiss Alps, Switzerland;leaf chewing;Orthoptera;2016
|
||||
Pitteloud;Pitteloud2020_C6;46.88867;9.48989;alpine open grasslands, open grasslands in Swiss Alps, Switzerland;leaf chewing;Orthoptera;2016
|
||||
Pitteloud;Pitteloud2020_C7;46.88946;9.48001;alpine open grasslands, open grasslands in Swiss Alps, Switzerland;leaf chewing;Orthoptera;2016
|
||||
Pitteloud;Pitteloud2020_C8;46.89054;9.47624;alpine open grasslands, open grasslands in Swiss Alps, Switzerland;leaf chewing;Orthoptera;2016
|
||||
Pitteloud;Pitteloud2020_F1;46.4732;8.81948;alpine open grasslands, open grasslands in Swiss Alps, Switzerland;leaf chewing;Orthoptera;2016
|
||||
Pitteloud;Pitteloud2020_F2;46.4887;8.79271;alpine open grasslands, open grasslands in Swiss Alps, Switzerland;leaf chewing;Orthoptera;2016
|
||||
Pitteloud;Pitteloud2020_F3;46.4951;8.78085;alpine open grasslands, open grasslands in Swiss Alps, Switzerland;leaf chewing;Orthoptera;2016
|
||||
Pitteloud;Pitteloud2020_F4;46.5028;8.7816;alpine open grasslands, open grasslands in Swiss Alps, Switzerland;leaf chewing;Orthoptera;2016
|
||||
Pitteloud;Pitteloud2020_F5;46.50577;8.7947;alpine open grasslands, open grasslands in Swiss Alps, Switzerland;leaf chewing;Orthoptera;2016
|
||||
Pitteloud;Pitteloud2020_F6;46.50891;8.77677;alpine open grasslands, open grasslands in Swiss Alps, Switzerland;leaf chewing;Orthoptera;2016
|
||||
Pitteloud;Pitteloud2020_F7;46.51244;8.77881;alpine open grasslands, open grasslands in Swiss Alps, Switzerland;leaf chewing;Orthoptera;2016
|
||||
Pitteloud;Pitteloud2020_F8;46.51553;8.78595;alpine open grasslands, open grasslands in Swiss Alps, Switzerland;leaf chewing;Orthoptera;2016
|
||||
Pitteloud;Pitteloud2020_G1;46.63493;7.90025;alpine open grasslands, open grasslands in Swiss Alps, Switzerland;leaf chewing;Orthoptera;2016
|
||||
Pitteloud;Pitteloud2020_G2;46.63957;7.9796;alpine open grasslands, open grasslands in Swiss Alps, Switzerland;leaf chewing;Orthoptera;2016
|
||||
Pitteloud;Pitteloud2020_G3;46.64264;7.97271;alpine open grasslands, open grasslands in Swiss Alps, Switzerland;leaf chewing;Orthoptera;2016
|
||||
Pitteloud;Pitteloud2020_G4;46.64737;7.97086;alpine open grasslands, open grasslands in Swiss Alps, Switzerland;leaf chewing;Orthoptera;2016
|
||||
Pitteloud;Pitteloud2020_G5;46.64949;7.9509;alpine open grasslands, open grasslands in Swiss Alps, Switzerland;leaf chewing;Orthoptera;2016
|
||||
Pitteloud;Pitteloud2020_G6;46.65319;7.94267;alpine open grasslands, open grasslands in Swiss Alps, Switzerland;leaf chewing;Orthoptera;2016
|
||||
Pitteloud;Pitteloud2020_G7;46.65756;7.99056;alpine open grasslands, open grasslands in Swiss Alps, Switzerland;leaf chewing;Orthoptera;2016
|
||||
Pitteloud;Pitteloud2020_G8;46.66035;7.9805;alpine open grasslands, open grasslands in Swiss Alps, Switzerland;leaf chewing;Orthoptera;2016
|
||||
Pitteloud;Pitteloud2020_M1;46.12583;7.08096;alpine open grasslands, open grasslands in Swiss Alps, Switzerland;leaf chewing;Orthoptera;2016
|
||||
Pitteloud;Pitteloud2020_M2;46.06652;7.14868;alpine open grasslands, open grasslands in Swiss Alps, Switzerland;leaf chewing;Orthoptera;2016
|
||||
Pitteloud;Pitteloud2020_M3;46.05299;7.15534;alpine open grasslands, open grasslands in Swiss Alps, Switzerland;leaf chewing;Orthoptera;2016
|
||||
Pitteloud;Pitteloud2020_M4;46.01957;7.1621;alpine open grasslands, open grasslands in Swiss Alps, Switzerland;leaf chewing;Orthoptera;2016
|
||||
Pitteloud;Pitteloud2020_M5;46.02743;7.17717;alpine open grasslands, open grasslands in Swiss Alps, Switzerland;leaf chewing;Orthoptera;2016
|
||||
Pitteloud;Pitteloud2020_M6;46.03024;7.17648;alpine open grasslands, open grasslands in Swiss Alps, Switzerland;leaf chewing;Orthoptera;2016
|
||||
Pitteloud;Pitteloud2020_M7;46.03573;7.17791;alpine open grasslands, open grasslands in Swiss Alps, Switzerland;leaf chewing;Orthoptera;2016
|
||||
Pitteloud;Pitteloud2020_M8;46.03712;7.18249;alpine open grasslands, open grasslands in Swiss Alps, Switzerland;leaf chewing;Orthoptera;2016
|
||||
Pitteloud;Pitteloud2020_S1;46.31328;7.58466;alpine open grasslands, open grasslands in Swiss Alps, Switzerland;leaf chewing;Orthoptera;2016
|
||||
Pitteloud;Pitteloud2020_S2;46.32012;7.57929;alpine open grasslands, open grasslands in Swiss Alps, Switzerland;leaf chewing;Orthoptera;2016
|
||||
Pitteloud;Pitteloud2020_S3;46.3251;7.5532;alpine open grasslands, open grasslands in Swiss Alps, Switzerland;leaf chewing;Orthoptera;2016
|
||||
Pitteloud;Pitteloud2020_S4;46.33103;7.56003;alpine open grasslands, open grasslands in Swiss Alps, Switzerland;leaf chewing;Orthoptera;2016
|
||||
Pitteloud;Pitteloud2020_S5;46.3353;7.53348;alpine open grasslands, open grasslands in Swiss Alps, Switzerland;leaf chewing;Orthoptera;2016
|
||||
Pitteloud;Pitteloud2020_S6;46.34787;7.5386;alpine open grasslands, open grasslands in Swiss Alps, Switzerland;leaf chewing;Orthoptera;2016
|
||||
Pitteloud;Pitteloud2020_S7;46.34687;7.52317;alpine open grasslands, open grasslands in Swiss Alps, Switzerland;leaf chewing;Orthoptera;2016
|
||||
Pitteloud;Pitteloud2020_S8;46.35071;7.52571;alpine open grasslands, open grasslands in Swiss Alps, Switzerland;leaf chewing;Orthoptera;2016
|
||||
pocock;pocock2012_aphid;51.31;-2.32;mixed farm: 50% arable, 50% grass pasture or ley, Norwood Farm, Somerset, UK;mixed;NoOrthoptera;2008
|
||||
prado;prado2004;-19;-43;high plant diversity and endemism, Campos rupestres, Brazil;flower eater;NoOrthoptera;1996
|
||||
Saavedra;Saavedra2017_initial_A;19.575694;-105.035162;tropical dry forest succession, Chamela<6C>Cuixmala Biosphere Reserve, forest succession - recently abandoned pastures, early successional forests abandoned and intermediate successional forests abandoned, Mexico;leaf chewing;NoOrthoptera;2010
|
||||
Saavedra;Saavedra2017_initial_B;19.572095;-105.054989;tropical dry forest succession, Chamela<6C>Cuixmala Biosphere Reserve, forest succession - recently abandoned pastures, early successional forests abandoned and intermediate successional forests abandoned, Mexico;leaf chewing;NoOrthoptera;2010
|
||||
Saavedra;Saavedra2017_initial_C;19.416939;-104.895751;tropical dry forest succession, Chamela<6C>Cuixmala Biosphere Reserve, forest succession - recently abandoned pastures, early successional forests abandoned and intermediate successional forests abandoned, Mexico;leaf chewing;NoOrthoptera;2010
|
||||
Saavedra;Saavedra2017_middle_A;19.610383;-105.027566;tropical dry forest succession, Chamela<6C>Cuixmala Biosphere Reserve, forest succession - recently abandoned pastures, early successional forests abandoned and intermediate successional forests abandoned, Mexico;leaf chewing;NoOrthoptera;2010
|
||||
Saavedra;Saavedra2017_middle_B;19.578039;-105.040484;tropical dry forest succession, Chamela<6C>Cuixmala Biosphere Reserve, forest succession - recently abandoned pastures, early successional forests abandoned and intermediate successional forests abandoned, Mexico;leaf chewing;NoOrthoptera;2010
|
||||
Saavedra;Saavedra2017_middle_C;19.509043;-104.9256;tropical dry forest succession, Chamela<6C>Cuixmala Biosphere Reserve, forest succession - recently abandoned pastures, early successional forests abandoned and intermediate successional forests abandoned, Mexico;leaf chewing;NoOrthoptera;2010
|
||||
Saavedra;Saavedra2017_late_A;19.603551;-105.090781;tropical dry forest succession, Chamela<6C>Cuixmala Biosphere Reserve, forest succession - recently abandoned pastures, early successional forests abandoned and intermediate successional forests abandoned, Mexico;leaf chewing;NoOrthoptera;2010
|
||||
Saavedra;Saavedra2017_late_B;19.596921;-105.033532;tropical dry forest succession, Chamela<6C>Cuixmala Biosphere Reserve, forest succession - recently abandoned pastures, early successional forests abandoned and intermediate successional forests abandoned, Mexico;leaf chewing;NoOrthoptera;2010
|
||||
Saavedra;Saavedra2017_late_C;19.498545;-104.931694;tropical dry forest succession, Chamela<6C>Cuixmala Biosphere Reserve, forest succession - recently abandoned pastures, early successional forests abandoned and intermediate successional forests abandoned, Mexico;leaf chewing;NoOrthoptera;2010
|
||||
Sanjaya2016;Sanjaya2016;-6.862444;107.595211;Botanical Garden UPI, Bandung, Indonesia;fruit eater;NoOrthoptera;2012
|
||||
Santos;Santos2006;-29.8;-51.77; Taquari, Rio Grande do Sul, Bazil;leaf miner;NoOrthoptera;2004
|
||||
SantosdeAraujo2019;SantosdeAraujo2019;48.318611;18.081944;Nitra City Park, Nitra, Slovakia;gall maker;NoOrthoptera;2008
|
||||
seifert;seifert2020_A;42.71;141.6;temperate lowland forest, Tomakomai, Hokkaido, Japan;leaf chewing;NoOrthoptera;2015
|
||||
seifert;seifert2020_B;38.9;-78.41;temperate lowland forest, Toms Brook, Virginia, USA;leaf chewing;NoOrthoptera;2017
|
||||
seifert;seifert2020_C;48.7;16.95;temperate lowland forest, Lanzhot , Czech Republic;leaf chewing;NoOrthoptera;2015
|
||||
shimada;shimada2020;35.039324;135.187705;paddy fields and grasslands, agricultural landscape, Japan;sap sucker;NoOrthoptera;2019
|
||||
Shinohara;Shinohara2019_Abandoned_1;35.495752;135.893636;seminatural grasslands, agricultural landscape, Wakasa town, Fukui Prefecture in central Japan, Japan;mixed;NoOrthoptera;2016
|
||||
Shinohara;Shinohara2019_Abandoned_2;35.507486;135.909609;seminatural grasslands, agricultural landscape, Wakasa town, Fukui Prefecture in central Japan, Japan;mixed;NoOrthoptera;2016
|
||||
Shinohara;Shinohara2019_Abandoned_3;35.489386;135.897067;seminatural grasslands, agricultural landscape, Wakasa town, Fukui Prefecture in central Japan, Japan;mixed;NoOrthoptera;2016
|
||||
Shinohara;Shinohara2019_Abandoned_4;35.565453;135.908289;seminatural grasslands, agricultural landscape, Wakasa town, Fukui Prefecture in central Japan, Japan;mixed;NoOrthoptera;2016
|
||||
Shinohara;Shinohara2019_Extensively_managed_1;35.498043;135.895563;seminatural grasslands, agricultural landscape, Wakasa town, Fukui Prefecture in central Japan, Japan;mixed;NoOrthoptera;2016
|
||||
Shinohara;Shinohara2019_Extensively_managed_2;35.50826;135.908896;seminatural grasslands, agricultural landscape, Wakasa town, Fukui Prefecture in central Japan, Japan;mixed;NoOrthoptera;2016
|
||||
Shinohara;Shinohara2019_Extensively_managed_3;35.489949;135.89785;seminatural grasslands, agricultural landscape, Wakasa town, Fukui Prefecture in central Japan, Japan;mixed;NoOrthoptera;2016
|
||||
Shinohara;Shinohara2019_Extensively_managed_4;35.561526;135.905618;seminatural grasslands, agricultural landscape, Wakasa town, Fukui Prefecture in central Japan, Japan;mixed;NoOrthoptera;2016
|
||||
Shinohara;Shinohara2019_Intensively_managed_1;35.498448;135.898817;seminatural grasslands, agricultural landscape, Wakasa town, Fukui Prefecture in central Japan, Japan;mixed;NoOrthoptera;2016
|
||||
Shinohara;Shinohara2019_Intensively_managed_2;35.509203;135.907169;seminatural grasslands, agricultural landscape, Wakasa town, Fukui Prefecture in central Japan, Japan;mixed;NoOrthoptera;2016
|
||||
Shinohara;Shinohara2019_Intensively_managed_3;35.489111;135.89987;seminatural grasslands, agricultural landscape, Wakasa town, Fukui Prefecture in central Japan, Japan;mixed;NoOrthoptera;2016
|
||||
Shinohara;Shinohara2019_Intensively_managed_4;35.563264;135.907279;seminatural grasslands, agricultural landscape, Wakasa town, Fukui Prefecture in central Japan, Japan;mixed;NoOrthoptera;2016
|
||||
Silveira2021;Silveira2021_nonpreserved;-16.788637;-43.880394;Neotropical savanna areas in anthropized landscapes, northern Minas Gerais State, Brazil;mixed;mixed;2019
|
||||
Silveira2021;Silveira2021_preserved;-17.358135;-44.259766;Neotropical savanna areas in preserved landscapes, northern Minas Gerais State, Brazil;mixed;mixed;2019
|
||||
stary;stary2008;49.51;14.93;managed forests, Techobuz, Czech;sap sucker;NoOrthoptera;2007
|
||||
sugiura;sugiura2020;26.67;142.98;many endemic organisms: vascular plants and insects, Ogasawara Village, Tokyo metropolitan, Japan;mixed;NoOrthoptera;2008
|
||||
Sugiura2010;Sugiura2010;27.073072;142.217277;subtropical Ogasawara islands, Japan;mixed;NoOrthoptera;2008
|
||||
Szpeiner2008;Szpeiner2008;-31.403573;-64.144999;ornemental plants, Cordoba city, Argentina;sap sucker;NoOrthoptera;2002
|
||||
Tahar2015;Tahar2015;34.834667;5.738862;Biskra province, Algeria;sap sucker;NoOrthoptera;2014
|
||||
tavakilian;tavakilian1997;5.41;-52.97;swamp forests to mixed forests on well-drained soil, Sinnamary River Basin, French Guiana;deadwood eater;NoOrthoptera;1993
|
||||
tcharntke;tcharntke;49;8.32; Karlsruhe, southwest Germany;leaf miner;NoOrthoptera;1989
|
||||
Todorov2014;Todorov2014;42.157088;24.82175;Experimentally Field of Agriculture University, Agro-ecosystems, Plovdiv Region, Bulgaria;sap sucker;NoOrthoptera;2006
|
||||
ueckert;ueckert;40.17;-103.21;mixed-grass prairie, 27 km north of Akron, Washington county, Colorado;leaf chewing;Orthoptera;1968
|
||||
Volf2017;Volf2017_Tomakomai;42.716667;141.6;Tomakomai forest, Japan;mixed;NoOrthoptera;2014
|
||||
Volf2017;Volf2017_Lanzhot;48.8;17.083333;Lanzhot forest, Czech Republic;mixed;NoOrthoptera;2014
|
||||
Volf2017;Volf2017_Mikulcice;48.683333;16.933333;Mikulcice forest, Czech Republic;mixed;NoOrthoptera;2014
|
||||
Xi2020;Xi2020;32.763004;102.518629;alpine meadow in the eastern Qinghai-Tibetan Plateau, Hongyuan County, Sichuan Province, China;seed predator;NoOrthoptera;2016
|
||||
Zhu;Zhu2018;23.45;111.883333;subtropical forest, subtropical forest located within Heishiding Nature Reserve, south China, China;leaf chewing;NoOrthoptera;2014
|
||||
|
BIN
Rcodes/real_data/data/supinfo.xlsx
Normal file
BIN
Rcodes/real_data/data/supinfo.xlsx
Normal file
Binary file not shown.
115
Rcodes/real_data/netclustering_CoOPLBM_completed.R
Normal file
115
Rcodes/real_data/netclustering_CoOPLBM_completed.R
Normal file
|
|
@ -0,0 +1,115 @@
|
|||
require("ggplot2")
|
||||
require("tictoc")
|
||||
require("tidyverse")
|
||||
|
||||
devtools::load_all("R/")
|
||||
|
||||
# Importation of data
|
||||
interaction_data <- read.table(file = "real_data/data/interaction-data.txt", sep = "\t", header = TRUE)
|
||||
|
||||
seq_ids_network_aggreg <- unique(interaction_data$id_network_aggreg)
|
||||
names_aggreg_networks <- sapply(
|
||||
seq_ids_network_aggreg,
|
||||
function(id) {
|
||||
paste0(
|
||||
unique(interaction_data[which(interaction_data$id_network_aggreg == id), ]$web),
|
||||
collapse = "+"
|
||||
)
|
||||
}
|
||||
)
|
||||
|
||||
# Full data
|
||||
if (!file.exists("real_data/data/dore-matrices.Rds")) {
|
||||
# Computation of incidence matrices
|
||||
incidence_matrices <- lapply(
|
||||
seq_ids_network_aggreg,
|
||||
function(m) {
|
||||
current_interaction_data <- interaction_data[which(interaction_data$id_network_aggreg == m), ] %>%
|
||||
mutate(
|
||||
plantaggreg = paste(plantorder,
|
||||
plantfamily, plantgenus, plantspecies,
|
||||
sep = " "
|
||||
),
|
||||
insectaggreg = paste(insectorder,
|
||||
insectfamily, insectgenus, insectspecies,
|
||||
sep = " "
|
||||
)
|
||||
)
|
||||
current_interaction_data <- table(current_interaction_data$insectaggreg, current_interaction_data$plantaggreg)
|
||||
|
||||
current_incidence_matrix <- matrix(current_interaction_data,
|
||||
ncol = ncol(current_interaction_data), dimnames = dimnames(current_interaction_data)
|
||||
)
|
||||
|
||||
current_incidence_matrix[which(current_incidence_matrix > 0)] <- 1
|
||||
return(current_incidence_matrix)
|
||||
}
|
||||
)
|
||||
|
||||
names(incidence_matrices) <- names_aggreg_networks
|
||||
|
||||
saveRDS(incidence_matrices, file = "real_data/data/dore-matrices.Rds")
|
||||
} else {
|
||||
incidence_matrices <- readRDS(file = "real_data/data/dore-matrices.Rds")
|
||||
}
|
||||
# Emre completed Data
|
||||
completed_networks_ids <- as.numeric(names(readRDS("real_data/data/Data.rds")))
|
||||
completed_networks_names <- sapply(
|
||||
completed_networks_ids,
|
||||
function(id) {
|
||||
paste0(
|
||||
unique(interaction_data[which(interaction_data$id_network_aggreg == id), ]$web),
|
||||
collapse = "+"
|
||||
)
|
||||
}
|
||||
)
|
||||
|
||||
uncompleted <- incidence_matrices[match(completed_networks_names, names(incidence_matrices))]
|
||||
point_2_completed <- setNames(readRDS("real_data/data/completed0.2.rds"), completed_networks_names)
|
||||
point_5_completed <- setNames(readRDS("real_data/data/completed0.5.rds"), completed_networks_names)
|
||||
random_completed <- setNames(readRDS("real_data/data/completedrandom.rds"), completed_networks_names)
|
||||
|
||||
if (!exists("arg")) {
|
||||
arg <- commandArgs(trailingOnly = TRUE)
|
||||
}
|
||||
|
||||
if (identical(arg, character(0))) {
|
||||
number_of_net <- length(uncompleted)
|
||||
model <- "iid"
|
||||
nb_run <- 1
|
||||
source_data <- eval(as.name("uncompleted"))
|
||||
data_name <- "uncompleted"
|
||||
} else {
|
||||
number_of_net <- as.numeric(arg[1])
|
||||
model <- arg[2]
|
||||
nb_run <- as.numeric(arg[3])
|
||||
source_data <- eval(as.name(arg[4]))
|
||||
data_name <- arg[4]
|
||||
}
|
||||
print(number_of_net)
|
||||
print(model)
|
||||
print(nb_run)
|
||||
print(data_name)
|
||||
tic()
|
||||
list_collection <- clusterize_bipartite_networks(
|
||||
netlist = source_data[1:number_of_net],
|
||||
net_id = names(source_data)[1:number_of_net],
|
||||
colsbm_model = model,
|
||||
nb_run = nb_run,
|
||||
global_opts = list(
|
||||
nb_cores = parallel::detectCores() - 1, verbosity = 4,
|
||||
plot_details = 0
|
||||
),
|
||||
silent_parallelization = FALSE
|
||||
)
|
||||
toc()
|
||||
|
||||
saveRDS(list_collection, file = paste0(
|
||||
"real_data/data/",
|
||||
"dore_",
|
||||
data_name,
|
||||
"_collection_clustering_nb_run_", nb_run, "_", model, "_",
|
||||
number_of_net, "_networks_",
|
||||
format(Sys.time(), "%d-%m-%y-%X"),
|
||||
".Rds"
|
||||
))
|
||||
90
Rcodes/real_data/netclustering_check_real_data.R
Normal file
90
Rcodes/real_data/netclustering_check_real_data.R
Normal file
|
|
@ -0,0 +1,90 @@
|
|||
require("ggplot2")
|
||||
require("tictoc")
|
||||
require("tidyverse")
|
||||
|
||||
devtools::load_all("R/")
|
||||
|
||||
# Importation of data
|
||||
if (!file.exists("real_data/data/dore-matrices.Rds")) {
|
||||
interaction_data <- read.table(file = "real_data/data/interaction-data.txt", sep = "\t", header = TRUE)
|
||||
|
||||
seq_ids_network_aggreg <- unique(interaction_data$id_network_aggreg)
|
||||
names_aggreg_networks <- sapply(
|
||||
seq_ids_network_aggreg,
|
||||
function(id) {
|
||||
paste0(
|
||||
unique(interaction_data[which(interaction_data$id_network_aggreg == id), ]$web),
|
||||
collapse = "+"
|
||||
)
|
||||
}
|
||||
)
|
||||
# Computation of incidence matrices
|
||||
incidence_matrices <- lapply(
|
||||
seq_ids_network_aggreg,
|
||||
function(m) {
|
||||
current_interaction_data <- interaction_data[which(interaction_data$id_network_aggreg == m), ] %>%
|
||||
mutate(
|
||||
plantaggreg = paste(plantorder,
|
||||
plantfamily, plantgenus, plantspecies,
|
||||
sep = "-"
|
||||
),
|
||||
insectaggreg = paste(insectorder,
|
||||
insectfamily, insectgenus, insectspecies,
|
||||
sep = "-"
|
||||
)
|
||||
)
|
||||
current_interaction_data <- table(current_interaction_data$plantaggreg, current_interaction_data$insectaggreg)
|
||||
|
||||
current_incidence_matrix <- matrix(current_interaction_data,
|
||||
ncol = ncol(current_interaction_data), dimnames = dimnames(current_interaction_data)
|
||||
)
|
||||
|
||||
current_incidence_matrix[which(current_incidence_matrix > 0)] <- 1
|
||||
return(current_incidence_matrix)
|
||||
}
|
||||
)
|
||||
|
||||
names(incidence_matrices) <- names_aggreg_networks
|
||||
|
||||
saveRDS(incidence_matrices, file = "real_data/data/dore-matrices.Rds")
|
||||
} else {
|
||||
incidence_matrices <- readRDS(file = "real_data/data/dore-matrices.Rds")
|
||||
}
|
||||
|
||||
if (!exists("arg")) {
|
||||
arg <- commandArgs(trailingOnly = TRUE)
|
||||
}
|
||||
|
||||
if (identical(arg, character(0))) {
|
||||
number_of_net <- length(incidence_matrices)
|
||||
model <- "pirho"
|
||||
nb_run <- 3
|
||||
} else {
|
||||
number_of_net <- as.numeric(arg[1])
|
||||
model <- arg[2]
|
||||
nb_run <- as.numeric(arg[3])
|
||||
}
|
||||
print(number_of_net)
|
||||
print(model)
|
||||
print(nb_run)
|
||||
tic()
|
||||
list_collection <- clusterize_bipartite_networks(
|
||||
netlist = incidence_matrices[1:number_of_net],
|
||||
net_id = names(incidence_matrices)[1:number_of_net],
|
||||
colsbm_model = model,
|
||||
nb_run = nb_run,
|
||||
global_opts = list(
|
||||
nb_cores = parallel::detectCores() - 1, verbosity = 4,
|
||||
plot_details = 0
|
||||
),
|
||||
silent_parallelization = TRUE
|
||||
)
|
||||
toc()
|
||||
|
||||
saveRDS(list_collection, file = paste0(
|
||||
"real_data/data/",
|
||||
"dore_collection_clustering_nb_run", nb_run,"_",model,"_",
|
||||
number_of_net, "networks_",
|
||||
format(Sys.time(), "%d-%m-%y-%X"),
|
||||
".Rds"
|
||||
))
|
||||
76
Rcodes/real_data/netclustering_dore_analyze.R
Normal file
76
Rcodes/real_data/netclustering_dore_analyze.R
Normal file
|
|
@ -0,0 +1,76 @@
|
|||
require("ggplot2")
|
||||
devtools::load_all("R/")
|
||||
|
||||
## Preparing data for analysis
|
||||
interaction_data <- read.table(file = "real_data/data/interaction-data.txt", sep = "\t", header = TRUE)
|
||||
|
||||
insect_orders <- unique(interaction_data$insectorder)
|
||||
plant_family <- unique(interaction_data$plantfamily)
|
||||
|
||||
# All results
|
||||
iid_clustering <- readRDS("real_data/data/dore_collection_clustering_nb_run1_iid_123networks_24-05-23-21:40:42.Rds")
|
||||
iid_best_partition <- extract_best_bipartite_partition(iid_clustering)
|
||||
iid_unlist <- unlist(iid_best_partition)
|
||||
|
||||
|
||||
rho_clustering <- readRDS("real_data/data/dore_collection_clustering_nb_run1_rho_123networks_25-05-23-13:58:30.Rds")
|
||||
rho_best_partition <- extract_best_bipartite_partition(rho_clustering)
|
||||
rho_unlist <- unlist(rho_best_partition)
|
||||
|
||||
pi_clustering <- readRDS("real_data/data/dore_collection_clustering_nb_run1_pi_123networks_25-05-23-17:31:25.Rds")
|
||||
pi_best_partition <- extract_best_bipartite_partition(pi_clustering)
|
||||
pi_unlist <- unlist(pi_best_partition)
|
||||
|
||||
pirho_clustering <- readRDS("real_data/data/dore_collection_clustering_nb_run1_pirho_123networks_26-05-23-19:22:55.Rds")
|
||||
pirho_best_partition <- extract_best_bipartite_partition(pirho_clustering)
|
||||
pirho_unlist <- unlist(pirho_best_partition)
|
||||
|
||||
### Matching taxonomy
|
||||
taxonomy_in_clusters <- function(unlisted_model) {
|
||||
lapply(seq_len(length(unlisted_model)), function(col_idx) {
|
||||
# Per collection
|
||||
# Empty init
|
||||
insect_count <- t(sapply(insect_orders, function(order) {
|
||||
out_count <- rep(0, unlisted_model[[col_idx]]$Q[2])
|
||||
out_count
|
||||
}))
|
||||
|
||||
plant_count <- t(sapply(plant_family, function(order) {
|
||||
out_count <- rep(0, unlisted_model[[col_idx]]$Q[1])
|
||||
out_count
|
||||
}))
|
||||
|
||||
for (m in seq.int(unlisted_model[[col_idx]]$M)) {
|
||||
#### Insect
|
||||
insect_names <- names(unlisted_model[[col_idx]]$Z[[1]][[2]])
|
||||
|
||||
insect_count <- insect_count + t(sapply(insect_orders, function(order) {
|
||||
out_count <- rep(0, unlisted_model[[col_idx]]$Q[2])
|
||||
names(out_count) <- seq.int(unlisted_model[[col_idx]]$Q[2])
|
||||
insect_count <- table(unlisted_model[[col_idx]]$Z[[m]][[2]][grep(order, insect_names)])
|
||||
out_count[names(insect_count)] <- insect_count
|
||||
out_count
|
||||
}))
|
||||
#### Plants
|
||||
plant_names <- names(unlisted_model[[col_idx]]$Z[[1]][[1]])
|
||||
|
||||
plant_count <- t(sapply(plant_family, function(order) {
|
||||
out_count <- rep(0, unlisted_model[[col_idx]]$Q[1])
|
||||
names(out_count) <- seq.int(unlisted_model[[col_idx]]$Q[1])
|
||||
plant_count <- table(unlisted_model[[col_idx]]$Z[[m]][[1]][grep(order, plant_names)])
|
||||
out_count[names(plant_count)] <- plant_count
|
||||
out_count
|
||||
}))
|
||||
}
|
||||
return(list(insects = insect_count, plants = plant_count))
|
||||
})
|
||||
}
|
||||
|
||||
taxonomy_remove_empty <- function(taxonomy_collections_list) {
|
||||
lapply(taxonomy_collections_list, function(collection) {
|
||||
list(
|
||||
insects = collection$insects[which(rowSums(collection$insects != 0) > 0), ],
|
||||
plants = collection$plants[which(rowSums(collection$plants != 0) > 0), ]
|
||||
)
|
||||
})
|
||||
}
|
||||
526
Rcodes/real_data/presentation_dore.Rmd
Normal file
526
Rcodes/real_data/presentation_dore.Rmd
Normal file
|
|
@ -0,0 +1,526 @@
|
|||
```{r, setup, include=FALSE, warning=FALSE}
|
||||
knitr::opts_chunk$set(echo = FALSE)
|
||||
```
|
||||
|
||||
```{r require_lib, echo = FALSE, include=FALSE, warning=FALSE}
|
||||
require("tidyverse")
|
||||
require("knitr")
|
||||
require("colSBM")
|
||||
source("temporary_plot.R")
|
||||
```
|
||||
|
||||
```{r pretty_matrix_print, echo = FALSE, warning=FALSE}
|
||||
# Define a generic method that transforms an object x in a LaTeX string
|
||||
as_latex <- function(x, ...) {
|
||||
UseMethod("as_latex", x)
|
||||
}
|
||||
|
||||
# Define a class latex for LaTeX expressions
|
||||
as_latex.character <- function(x) {
|
||||
structure(
|
||||
paste(x, collapse = " "),
|
||||
class = c("latex", "character")
|
||||
)
|
||||
}
|
||||
|
||||
# A character string of class latex is rendered in display mode
|
||||
# Define a knit_print() method for the latex class
|
||||
knit_print.latex <- function(x, ...) {
|
||||
knitr::asis_output(
|
||||
paste0("$$", x, "$$")
|
||||
)
|
||||
}
|
||||
|
||||
# Now, define a method as_latex for matrix
|
||||
as_latex.matrix <- function(x, ...) {
|
||||
as_latex(c(
|
||||
"\\begin{pmatrix}",
|
||||
paste(
|
||||
t(x),
|
||||
rep(c(rep("&", nrow(x) - 1), "\\\\"), ncol(x)),
|
||||
collapse = ""
|
||||
),
|
||||
"\\end{pmatrix}"
|
||||
))
|
||||
}
|
||||
|
||||
# Indicate to knitr that matrix are rendered as latex
|
||||
knit_print.matrix <- function(x, ...) {
|
||||
knitr::knit_print(as_latex(round(x, 2)))
|
||||
}
|
||||
```
|
||||
|
||||
```{r better_collection_extraction, echo = FALSE, warning=FALSE}
|
||||
extract_unlist_reorder <- function(clustering_data_path) {
|
||||
clustering <- readRDS(clustering_data_path)
|
||||
if (!is.list(clustering)) {
|
||||
clustering <- list(clustering)
|
||||
}
|
||||
best_partition <- extract_best_bipartite_partition(clustering)
|
||||
best_partition_unlist <- unlist(best_partition)
|
||||
if (!is.list(best_partition_unlist)) {
|
||||
best_partition_unlist <- list(best_partition_unlist)
|
||||
}
|
||||
size <- length(best_partition_unlist)
|
||||
return(setNames(lapply(best_partition_unlist, function(collection) {
|
||||
reorder_parameters(collection)
|
||||
}), paste(rep("Collection", size), seq_len(size))))
|
||||
}
|
||||
|
||||
meso_print <- function(unlisted_partition) {
|
||||
for (idx in seq_along(unlisted_partition)) {
|
||||
cat(paste("\\subsubsection{Pour la collection", idx, "}"))
|
||||
print(plot(unlisted_partition[[idx]], type = "meso", mixture = TRUE))
|
||||
cat("\\newline \\tiny")
|
||||
print(knitr::kable(unlisted_partition[[idx]]$net_id,
|
||||
col.names = "Networks",
|
||||
format = "latex",
|
||||
position = "!h",
|
||||
booktabs = TRUE
|
||||
))
|
||||
cat("\\normalsize\\newline")
|
||||
cat(knitr::knit_print(unlisted_partition[[idx]]$alpha))
|
||||
}
|
||||
}
|
||||
|
||||
alpha_print <- function(unlisted_partition) {
|
||||
for (idx in seq_along(unlisted_partition)) {
|
||||
cat(paste("\nPour la collection ", idx, ":\n"))
|
||||
cat(knitr::knit_print(unlisted_partition[[idx]]$alpha))
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
```{r taxonomy_functions, echo = FALSE, warning=FALSE}
|
||||
interaction_data <- read.table(file = "data/interaction-data.txt", sep = "\t", header = TRUE)
|
||||
|
||||
insect_orders <- unique(interaction_data$insectorder)
|
||||
plant_family <- unique(interaction_data$plantorder)
|
||||
|
||||
insect_orders[is.na(insect_orders)] <- "NA"
|
||||
plant_family[is.na(plant_family)] <- "NA"
|
||||
### Matching taxonomy
|
||||
taxonomy_in_clusters <- function(unlisted_model) {
|
||||
if (is.list(unlisted_model)) {
|
||||
lapply(seq_len(length(unlisted_model)), function(col_idx) {
|
||||
# Per collection
|
||||
# Empty init
|
||||
insect_count <- t(sapply(insect_orders, function(order) {
|
||||
out_count <- rep(0, unlisted_model[[col_idx]]$Q[2])
|
||||
out_count
|
||||
}))
|
||||
|
||||
plant_count <- t(sapply(plant_family, function(order) {
|
||||
out_count <- rep(0, unlisted_model[[col_idx]]$Q[1])
|
||||
out_count
|
||||
}))
|
||||
|
||||
for (m in seq.int(unlisted_model[[col_idx]]$M)) {
|
||||
#### Insect
|
||||
insect_names <- names(unlisted_model[[col_idx]]$Z[[1]][[2]])
|
||||
|
||||
insect_count <- insect_count + t(sapply(insect_orders, function(order) {
|
||||
out_count <- rep(0, unlisted_model[[col_idx]]$Q[2])
|
||||
names(out_count) <- seq.int(unlisted_model[[col_idx]]$Q[2])
|
||||
insect_count <- table(unlisted_model[[col_idx]]$Z[[m]][[2]][grep(order, insect_names)])
|
||||
out_count[names(insect_count)] <- insect_count
|
||||
out_count
|
||||
}))
|
||||
#### Plants
|
||||
plant_names <- names(unlisted_model[[col_idx]]$Z[[1]][[1]])
|
||||
|
||||
plant_count <- t(sapply(plant_family, function(order) {
|
||||
out_count <- rep(0, unlisted_model[[col_idx]]$Q[1])
|
||||
names(out_count) <- seq.int(unlisted_model[[col_idx]]$Q[1])
|
||||
plant_count <- table(unlisted_model[[col_idx]]$Z[[m]][[1]][grep(order, plant_names)])
|
||||
out_count[names(plant_count)] <- plant_count
|
||||
out_count
|
||||
}))
|
||||
}
|
||||
return(list(insects = insect_count, plants = plant_count))
|
||||
})
|
||||
} else {
|
||||
# Per collection
|
||||
# Empty init
|
||||
insect_count <- t(sapply(insect_orders, function(order) {
|
||||
out_count <- rep(0, unlisted_model$Q[2])
|
||||
out_count
|
||||
}))
|
||||
|
||||
plant_count <- t(sapply(plant_family, function(order) {
|
||||
out_count <- rep(0, unlisted_model$Q[1])
|
||||
out_count
|
||||
}))
|
||||
|
||||
for (m in seq.int(unlisted_model$M)) {
|
||||
#### Insect
|
||||
insect_names <- names(unlisted_model$Z[[1]][[2]])
|
||||
|
||||
insect_count <- insect_count + t(sapply(insect_orders, function(order) {
|
||||
out_count <- rep(0, unlisted_model$Q[2])
|
||||
names(out_count) <- seq.int(unlisted_model$Q[2])
|
||||
insect_count <- table(unlisted_model$Z[[m]][[2]][grep(order, insect_names)])
|
||||
out_count[names(insect_count)] <- insect_count
|
||||
out_count
|
||||
}))
|
||||
#### Plants
|
||||
plant_names <- names(unlisted_model$Z[[1]][[1]])
|
||||
|
||||
plant_count <- t(sapply(plant_family, function(order) {
|
||||
out_count <- rep(0, unlisted_model$Q[1])
|
||||
names(out_count) <- seq.int(unlisted_model$Q[1])
|
||||
plant_count <- table(unlisted_model$Z[[m]][[1]][grep(order, plant_names)])
|
||||
out_count[names(plant_count)] <- plant_count
|
||||
out_count
|
||||
}))
|
||||
}
|
||||
return(list(list(insects = insect_count, plants = plant_count)))
|
||||
}
|
||||
}
|
||||
|
||||
taxonomy_remove_empty <- function(taxonomy_collections_list) {
|
||||
lapply(taxonomy_collections_list, function(collection) {
|
||||
list(
|
||||
insects = collection$insects[which(rowSums(collection$insects != 0) > 0), ],
|
||||
plants = collection$plants[which(rowSums(collection$plants != 0) > 0), ]
|
||||
)
|
||||
})
|
||||
}
|
||||
|
||||
get_formatted_data <- function(collection, group, max_rank = 6) {
|
||||
collection[[group]] %>%
|
||||
as.data.frame() %>% # Transformation en data frame
|
||||
mutate(
|
||||
Total = rowSums(.),
|
||||
Rang = rank(-Total, ties.method = "min")
|
||||
) %>% # Creation d'une colonne Total
|
||||
rownames_to_column(var = "TaxonBrut") %>%
|
||||
mutate(Taxon = ifelse(Rang <= max_rank & Total > 0, TaxonBrut, "Other")) %>%
|
||||
arrange(Rang) %>%
|
||||
mutate(Taxon = factor(Taxon, levels = unique(Taxon))) %>%
|
||||
select(-Total, -TaxonBrut, -Rang) %>%
|
||||
pivot_longer(
|
||||
cols = -c("Taxon"),
|
||||
names_to = "Bloc",
|
||||
values_to = "Nombre",
|
||||
names_prefix = "V"
|
||||
) %>%
|
||||
group_by(Taxon, Bloc) %>%
|
||||
summarise_all(sum) %>%
|
||||
ungroup() %>%
|
||||
group_by(Bloc) %>%
|
||||
mutate(Proportion = Nombre / sum(Nombre)) %>%
|
||||
ungroup() %>%
|
||||
mutate(Group = group)
|
||||
}
|
||||
|
||||
taxonomy_plot <- function(data, insects_or_plants, model, stack_or_fill) {
|
||||
plots <- filter(data, Group == insects_or_plants) %>%
|
||||
ggplot(aes(x = Bloc, y = Nombre, fill = Taxon)) +
|
||||
geom_bar(stat = "identity", position = stack_or_fill) +
|
||||
labs(x = "Block", y = "Number of Nodes", fill = "Taxonomy") +
|
||||
theme_minimal() +
|
||||
theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 1)) +
|
||||
ggtitle(paste(
|
||||
ifelse(insects_or_plants == "insects",
|
||||
"Pollinators", "Plants"
|
||||
), "repartition (",
|
||||
ifelse(stack_or_fill == "stack", "absolute", "proportion"),
|
||||
") in the", model, "clustering"
|
||||
)) +
|
||||
facet_wrap(~Collection, ncol = 3, scales = "free_x")
|
||||
|
||||
# Arrange the plots in a grid layout
|
||||
gridExtra::grid.arrange(plots, newpage = TRUE)
|
||||
}
|
||||
```
|
||||
|
||||
```{r load data, echo = FALSE, include = FALSE, warning=FALSE}
|
||||
# All results
|
||||
iid_unlist <- extract_unlist_reorder("data/dore_collection_clustering_nb_run1_iid_123networks_24-05-23-21:40:42.Rds")
|
||||
|
||||
rho_unlist <- extract_unlist_reorder("data/dore_collection_clustering_nb_run1_rho_123networks_25-05-23-13:58:30.Rds")
|
||||
|
||||
pi_unlist <- extract_unlist_reorder("data/dore_collection_clustering_nb_run1_pi_123networks_25-05-23-17:31:25.Rds")
|
||||
|
||||
pirho_unlist <- extract_unlist_reorder("data/dore_collection_clustering_nb_run1_pirho_123networks_26-05-23-19:22:55.Rds")
|
||||
```
|
||||
# Application to \cite{doreRelativeEffectsAnthropogenic2021} data
|
||||
\label{sec:application-to-dorerelativeeffectsanthropogenic2021-data}
|
||||
|
||||
## Clustering with model iid
|
||||
With the *iid-colBiSBM* we obtain `r length(iid_unlist)` collections with the
|
||||
following structures:
|
||||
|
||||
```{r iid_meso_plot, echo = FALSE, message=FALSE, results="asis", warning=FALSE}
|
||||
#| fig.cap=paste(names(iid_unlist), rep("- iid",length(iid_unlist))),
|
||||
#| fig.asp = 0.5,
|
||||
#| dpi = 300
|
||||
meso_print(iid_unlist)
|
||||
```
|
||||
|
||||
Et voici donc les valeurs numériques pour les $\alpha$ (paramètres de connectivité).
|
||||
|
||||
```{r iid_alpha, echo = FALSE, results="asis", warning=FALSE}
|
||||
alpha_print(iid_unlist)
|
||||
```
|
||||
### Comparaison avec des infos supplémentaires
|
||||
```{r supinfo, echo = FALSE}
|
||||
supinfo <- readxl::read_xlsx("data/supinfo.xlsx", sheet = 2)
|
||||
interaction_data <- read.table(file = "data/interaction-data.txt", sep = "\t", header = TRUE)
|
||||
|
||||
seq_ids_network_aggreg <- unique(interaction_data$id_network_aggreg)
|
||||
incidence_matrices <- readRDS(file = "data/dore-matrices.Rds")
|
||||
names_aggreg_networks <- names(incidence_matrices)
|
||||
vectorClusteringNet <- numeric(nrow(supinfo))
|
||||
for (k in 1:length(iid_unlist)) {
|
||||
idclust <- match(iid_unlist[[k]]$net_id, names_aggreg_networks)
|
||||
supinfoclust <- match(seq_ids_network_aggreg[idclust], supinfo$Idweb)
|
||||
vectorClusteringNet[supinfoclust] <- k
|
||||
}
|
||||
```
|
||||
|
||||
```{r Annual_timespan_plot, echo = FALSE}
|
||||
ggplot(supinfo) +
|
||||
aes(
|
||||
y = Annual_time_span,
|
||||
x = vectorClusteringNet, group = vectorClusteringNet, fill = as.factor(vectorClusteringNet)
|
||||
) +
|
||||
xlab("Numéro de collection") +
|
||||
ylab("Annual time span") +
|
||||
guides(fill = guide_legend(title = "Numéro\nde collection")) +
|
||||
geom_boxplot()
|
||||
```
|
||||
|
||||
### Répartition dans les clusters selon la taxonomie
|
||||
```{r iid_taxonomy, echo = FALSE, warning=FALSE}
|
||||
iid_taxonomy <- taxonomy_in_clusters(iid_unlist)
|
||||
|
||||
iid_taxonomy_long <- map_dfr(iid_taxonomy,
|
||||
function(collection) {
|
||||
map_dfr(
|
||||
c("insects", "plants"),
|
||||
function(group) {
|
||||
get_formatted_data(collection, group)
|
||||
}
|
||||
)
|
||||
},
|
||||
.id = "Collection"
|
||||
)
|
||||
```
|
||||
|
||||
```{r iid_plot_taxonomy_pollinators, echo = FALSE, message = FALSE,fig.cap = 'Pollinators repartition for the iid model regarding taxonomy', warning=FALSE}
|
||||
# Pollinators
|
||||
taxonomy_plot(
|
||||
data = iid_taxonomy_long,
|
||||
insects_or_plants = "insects",
|
||||
stack_or_fill = "stack", model = "iid"
|
||||
)
|
||||
taxonomy_plot(
|
||||
data = iid_taxonomy_long,
|
||||
insects_or_plants = "insects",
|
||||
stack_or_fill = "fill", model = "iid"
|
||||
)
|
||||
```
|
||||
|
||||
```{r iid_plot_taxonomy_plants, echo = FALSE, message = FALSE,fig.cap = 'Plants repartition for the iid model regarding taxonomy', warning=FALSE}
|
||||
taxonomy_plot(
|
||||
data = iid_taxonomy_long,
|
||||
insects_or_plants = "plants",
|
||||
stack_or_fill = "stack", model = "iid"
|
||||
)
|
||||
taxonomy_plot(
|
||||
data = iid_taxonomy_long,
|
||||
insects_or_plants = "plants",
|
||||
stack_or_fill = "fill", model = "iid"
|
||||
)
|
||||
```
|
||||
|
||||
#### Tables
|
||||
```{r iid_taxo_tables, echo = FALSE}
|
||||
iid_taxonomy_long %>%
|
||||
filter(Group == "insects") %>%
|
||||
group_by(Taxon) %>%
|
||||
mutate(row_id = row_number()) %>%
|
||||
pivot_wider(names_from = c(Collection, Bloc), values_from = Nombre, names_glue = "Collection_{Collection}_Bloc_{Bloc}") %>%
|
||||
ungroup() %>%
|
||||
select(-row_id) %>%
|
||||
group_by(Taxon) %>%
|
||||
summarize(across(starts_with("Collection"), ~ na.omit(.)[1])) %>%
|
||||
ungroup()
|
||||
```
|
||||
|
||||
|
||||
## Clustering with model pi
|
||||
Avec le modèle *pi* nous obtenons les `r length(pi_unlist)` collections et
|
||||
les structures suivantes:
|
||||
|
||||
```{r pi_meso_plot, echo = FALSE, message=FALSE, fig.cap=paste(names(pi_unlist), rep("- pi",length(pi_unlist))), results="asis", warning=FALSE}
|
||||
meso_print(pi_unlist)
|
||||
```
|
||||
|
||||
Et voici donc les valeurs numériques pour les $\alpha$ (paramètres de connectivité).
|
||||
|
||||
```{r pi_alpha, echo = FALSE, results="asis", warning=FALSE}
|
||||
alpha_print(pi_unlist)
|
||||
```
|
||||
|
||||
### Répartition dans les clusters selon la taxonomie
|
||||
```{r pi_taxonomy, echo = FALSE, warning=FALSE}
|
||||
pi_taxonomy <- taxonomy_in_clusters(pi_unlist)
|
||||
|
||||
pi_taxonomy_long <- map_dfr(pi_taxonomy,
|
||||
function(collection) {
|
||||
map_dfr(
|
||||
c("insects", "plants"),
|
||||
function(group) {
|
||||
get_formatted_data(collection, group)
|
||||
}
|
||||
)
|
||||
},
|
||||
.id = "Collection"
|
||||
)
|
||||
```
|
||||
|
||||
```{r pi_plot_taxonomy_pollinators, echo = FALSE, message = FALSE,fig.cap = 'Pollinators repartition for the pi model regarding taxonomy', warning=FALSE}
|
||||
# Pollinators
|
||||
taxonomy_plot(
|
||||
data = pi_taxonomy_long,
|
||||
insects_or_plants = "insects",
|
||||
stack_or_fill = "stack", model = "pi"
|
||||
)
|
||||
taxonomy_plot(
|
||||
data = pi_taxonomy_long,
|
||||
insects_or_plants = "insects",
|
||||
stack_or_fill = "fill", model = "pi"
|
||||
)
|
||||
```
|
||||
|
||||
```{r pi_plot_taxonomy_plants, echo = FALSE, message = FALSE,fig.cap = 'Plants repartition for the pi model regarding taxonomy', warning=FALSE}
|
||||
taxonomy_plot(
|
||||
data = pi_taxonomy_long,
|
||||
insects_or_plants = "plants",
|
||||
stack_or_fill = "stack", model = "pi"
|
||||
)
|
||||
taxonomy_plot(
|
||||
data = pi_taxonomy_long,
|
||||
insects_or_plants = "plants",
|
||||
stack_or_fill = "fill", model = "pi"
|
||||
)
|
||||
```
|
||||
|
||||
## Clustering with model rho
|
||||
Avec le modèle *rho* nous obtenons les `r length(rho_unlist)` collections et
|
||||
les structures suivantes:
|
||||
|
||||
```{r rho_meso_plot, echo = FALSE, message=FALSE, fig.cap=paste(names(rho_unlist), rep("- rho",length(rho_unlist))), results="asis", warning=FALSE}
|
||||
meso_print(rho_unlist)
|
||||
```
|
||||
|
||||
Et voici donc les valeurs numériques pour les $\alpha$ (paramètres de connectivité).
|
||||
|
||||
```{r rho_alpha, echo = FALSE, results="asis", warning=FALSE}
|
||||
alpha_print(rho_unlist)
|
||||
```
|
||||
|
||||
### Répartition dans les clusters selon la taxonomie
|
||||
```{r rho_taxonomy, echo = FALSE, warning=FALSE}
|
||||
rho_taxonomy <- taxonomy_in_clusters(rho_unlist)
|
||||
|
||||
rho_taxonomy_long <- map_dfr(rho_taxonomy,
|
||||
function(collection) {
|
||||
map_dfr(
|
||||
c("insects", "plants"),
|
||||
function(group) {
|
||||
get_formatted_data(collection, group)
|
||||
}
|
||||
)
|
||||
},
|
||||
.id = "Collection"
|
||||
)
|
||||
```
|
||||
|
||||
```{r rho_plot_taxonomy_pollinators, echo = FALSE, message = FALSE,fig.cap = 'Pollinators repartition for the rho model regarding taxonomy', warning=FALSE}
|
||||
# Pollinators
|
||||
taxonomy_plot(
|
||||
data = rho_taxonomy_long,
|
||||
insects_or_plants = "insects",
|
||||
stack_or_fill = "stack", model = "rho"
|
||||
)
|
||||
taxonomy_plot(
|
||||
data = rho_taxonomy_long,
|
||||
insects_or_plants = "insects",
|
||||
stack_or_fill = "fill", model = "rho"
|
||||
)
|
||||
```
|
||||
|
||||
```{r rho_plot_taxonomy_plants, echo = FALSE, message = FALSE,fig.cap = 'Plants repartition for the rho model regarding taxonomy', warning=FALSE}
|
||||
taxonomy_plot(
|
||||
data = rho_taxonomy_long,
|
||||
insects_or_plants = "plants",
|
||||
stack_or_fill = "stack", model = "rho"
|
||||
)
|
||||
taxonomy_plot(
|
||||
data = rho_taxonomy_long,
|
||||
insects_or_plants = "plants",
|
||||
stack_or_fill = "fill", model = "rho"
|
||||
)
|
||||
```
|
||||
|
||||
## Clustering with model pirho
|
||||
Avec le modèle *pirho* nous obtenons les `r length(pirho_unlist)` collections et
|
||||
les structures suivantes:
|
||||
|
||||
```{r pirho_meso_plot, echo = FALSE, message=FALSE, fig.cap=paste(names(pirho_unlist), rep("- pirho",length(pirho_unlist))), results="asis", warning=FALSE}
|
||||
meso_print(pirho_unlist)
|
||||
```
|
||||
|
||||
Et voici donc les valeurs numériques pour les $\alpha$ (paramètres de connectivité).
|
||||
|
||||
```{r pirho_alpha, echo = FALSE, results="asis", warning=FALSE}
|
||||
alpha_print(pirho_unlist)
|
||||
```
|
||||
|
||||
### Répartition dans les clusters selon la taxonomie
|
||||
```{r pirho_taxonomy, echo = FALSE, warning=FALSE}
|
||||
pirho_taxonomy <- taxonomy_in_clusters(pirho_unlist)
|
||||
|
||||
pirho_taxonomy_long <- map_dfr(pirho_taxonomy,
|
||||
function(collection) {
|
||||
map_dfr(
|
||||
c("insects", "plants"),
|
||||
function(group) {
|
||||
get_formatted_data(collection, group)
|
||||
}
|
||||
)
|
||||
},
|
||||
.id = "Collection"
|
||||
)
|
||||
```
|
||||
|
||||
```{r pirho_plot_taxonomy_pollinators, echo = FALSE, message = FALSE, fig.cap = 'Pollinators repartition for the pirho model regarding taxonomy', warning=FALSE}
|
||||
# Pollinators
|
||||
taxonomy_plot(
|
||||
data = pirho_taxonomy_long,
|
||||
insects_or_plants = "insects",
|
||||
stack_or_fill = "stack", model = "pirho"
|
||||
)
|
||||
taxonomy_plot(
|
||||
data = pirho_taxonomy_long,
|
||||
insects_or_plants = "insects",
|
||||
stack_or_fill = "fill", model = "pirho"
|
||||
)
|
||||
```
|
||||
|
||||
```{r pirho_plot_taxonomy_plants, echo = FALSE, message = FALSE,fig.cap = 'Plants repartition for the pirho model regarding taxonomy', warning=FALSE}
|
||||
taxonomy_plot(
|
||||
data = pirho_taxonomy_long,
|
||||
insects_or_plants = "plants",
|
||||
stack_or_fill = "stack", model = "pirho"
|
||||
)
|
||||
taxonomy_plot(
|
||||
data = pirho_taxonomy_long,
|
||||
insects_or_plants = "plants",
|
||||
stack_or_fill = "fill", model = "pirho"
|
||||
)
|
||||
```
|
||||
1031
Rcodes/real_data/presentation_dore.tex
Normal file
1031
Rcodes/real_data/presentation_dore.tex
Normal file
File diff suppressed because it is too large
Load diff
52
Rcodes/real_data/references.bib
Normal file
52
Rcodes/real_data/references.bib
Normal file
|
|
@ -0,0 +1,52 @@
|
|||
@misc{anakokDisentanglingStructureEcological2022,
|
||||
title = {Disentangling the Structure of Ecological Bipartite Networks from Observation Processes},
|
||||
author = {Anakok, Emre and Barbillon, Pierre and Fontaine, Colin and Thebault, Elisa},
|
||||
year = {2022},
|
||||
month = nov,
|
||||
number = {arXiv:2211.16364},
|
||||
eprint = {2211.16364},
|
||||
primaryclass = {stat},
|
||||
publisher = {{arXiv}},
|
||||
urldate = {2023-06-14},
|
||||
abstract = {The structure of a bipartite interaction network can be described by providing a clustering for each of the two types of nodes. Such clusterings are outputted by fitting a Latent Block Model (LBM) on an observed network that comes from a sampling of species interactions in the field. However, the sampling is limited and possibly uneven. This may jeopardize the fit of the LBM and then the description of the structure of the network by detecting structures which result from the sampling and not from actual underlying ecological phenomena. If the observed interaction network consists of a weighted bipartite network where the number of observed interactions between two species is available, the sampling efforts for all species can be estimated and used to correct the LBM fit. We propose to combine an observation model that accounts for sampling and an LBM for describing the structure of underlying possible ecological interactions. We develop an original inference procedure for this model, the efficiency of which is demonstrated in simulation studies. The practical interest in ecology of our model is highlighted on a large dataset of plant-pollinator network.},
|
||||
archiveprefix = {arxiv},
|
||||
langid = {english},
|
||||
keywords = {Statistics - Methodology},
|
||||
file = {/home/polarolouis/Zotero/storage/LQ3FINZG/Anakok et al. - 2022 - Disentangling the structure of ecological bipartit.pdf}
|
||||
}
|
||||
|
||||
@article{celisseConsistencyMaximumlikelihoodVariational2012,
|
||||
title = {Consistency of Maximum-Likelihood and Variational Estimators in the Stochastic Block Model},
|
||||
author = {Celisse, Alain and Daudin, Jean-Jacques and Pierre, Laurent},
|
||||
year = {2012},
|
||||
month = jan,
|
||||
journal = {Electronic Journal of Statistics},
|
||||
volume = {6},
|
||||
number = {none},
|
||||
pages = {1847--1899},
|
||||
publisher = {{Institute of Mathematical Statistics and Bernoulli Society}},
|
||||
issn = {1935-7524, 1935-7524},
|
||||
doi = {10.1214/12-EJS729},
|
||||
urldate = {2023-06-06},
|
||||
abstract = {The stochastic block model (SBM) is a probabilistic model designed to describe heterogeneous directed and undirected graphs. In this paper, we address the asymptotic inference in SBM by use of maximum-likelihood and variational approaches. The identifiability of SBM is proved while asymptotic properties of maximum-likelihood and variational estimators are derived. In particular, the consistency of these estimators is settled for the probability of an edge between two vertices (and for the group proportions at the price of an additional assumption), which is to the best of our knowledge the first result of this type for variational estimators in random graphs.},
|
||||
keywords = {62E17,62G05,62G20,62H30,Concentration inequalities,consistency,maximum likelihood estimators,Random graphs,Stochastic block model,variational estimators},
|
||||
file = {/home/polarolouis/Zotero/storage/JNWRIYKG/celisse2012.pdf.pdf;/home/polarolouis/Zotero/storage/XG463B5I/Celisse et al. - 2012 - Consistency of maximum-likelihood and variational .pdf}
|
||||
}
|
||||
|
||||
@misc{chabert-liddellLearningCommonStructures2023,
|
||||
type = {Article},
|
||||
title = {Learning Common Structures in a Collection of Networks. {{An}} Application to Food Webs},
|
||||
author = {{Chabert-Liddell}, Saint-Clair and Barbillon, Pierre and Donnet, Sophie},
|
||||
year = {2023},
|
||||
month = mar,
|
||||
number = {arXiv:2206.00560},
|
||||
eprint = {2206.00560},
|
||||
primaryclass = {stat},
|
||||
publisher = {{arXiv}},
|
||||
doi = {10.48550/arXiv.2206.00560},
|
||||
urldate = {2023-05-22},
|
||||
abstract = {Let a collection of networks represent interactions within several (social or ecological) systems. We pursue two objectives: identifying similarities in the topological structures that are held in common between the networks and clustering the collection into sub-collections of structurally homogeneous networks. We tackle these two questions with a probabilistic model based approach. We propose an extension of the Stochastic Block Model (SBM) adapted to the joint modeling of a collection of networks. The networks in the collection are assumed to be independent realizations of SBMs. The common connectivity structure is imposed through the equality of some parameters. The model parameters are estimated with a variational Expectation-Maximization (EM) algorithm. We derive an ad-hoc penalized likelihood criterion to select the number of blocks and to assess the adequacy of the consensus found between the structures of the different networks. This same criterion can also be used to cluster networks on the basis of their connectivity structure. It thus provides a partition of the collection into subsets of structurally homogeneous networks. The relevance of our proposition is assessed on two collections of ecological networks. First, an application to three stream food webs reveals the homogeneity of their structures and the correspondence between groups of species in different ecosystems playing equivalent ecological roles. Moreover, the joint analysis allows a finer analysis of the structure of smaller networks. Second, we cluster 67 food webs according to their connectivity structures and demonstrate that five mesoscale structures are sufficient to describe this collection.},
|
||||
archiveprefix = {arxiv},
|
||||
keywords = {Statistics - Applications,Statistics - Methodology},
|
||||
file = {/home/polarolouis/Zotero/storage/M74TXGCF/Chabert-Liddell et al. - 2023 - Learning common structures in a collection of netw.pdf;/home/polarolouis/Zotero/storage/A35M8KNP/2206.html}
|
||||
}
|
||||
197
Rcodes/real_data/temporary_plot.R
Normal file
197
Rcodes/real_data/temporary_plot.R
Normal file
|
|
@ -0,0 +1,197 @@
|
|||
#' The function to plot the fitBipartite objects
|
||||
#' @importFrom patchwork
|
||||
#' @importFrom reshape2
|
||||
#' @importFrom purrr
|
||||
plotFitBipartite <- function(model, type = "graphon", oRow = NULL, oCol = NULL, mixture = FALSE, net_id = NULL, ...) {
|
||||
# The below order use mean over all networks to have a consistent display
|
||||
if (is.null(oRow)) {
|
||||
if (model$Q[2] == 1) {
|
||||
mean_rho <- 1
|
||||
} else {
|
||||
mean_rho <- matrixStats::rowMeans2(sapply(model$pi, function(pi) pi[[2]]))
|
||||
}
|
||||
oRow <- order(model$alpha %*% mean_rho, decreasing = TRUE)
|
||||
}
|
||||
if (is.null(oCol)) {
|
||||
if (model$Q[1] == 1) {
|
||||
mean_pi <- 1
|
||||
} else {
|
||||
mean_pi <- matrixStats::rowMeans2(sapply(model$pi, function(pi) pi[[1]]))
|
||||
}
|
||||
oCol <- order(mean_pi %*% model$alpha, decreasing = TRUE)
|
||||
}
|
||||
p <- switch(type,
|
||||
graphon = {
|
||||
(model$alpha[oRow, oCol] * mean(model$delta)) %>%
|
||||
t() %>%
|
||||
reshape2::melt() %>%
|
||||
dplyr::mutate(
|
||||
xmin = rep(c(0, cumsum(model$pi[[net_id]][[2]][oCol][1:(model$Q[2] - 1)])), model$Q[1]),
|
||||
ymin = rep(c(0, cumsum(model$pi[[net_id]][[1]][oRow][1:(model$Q[1] - 1)])), each = model$Q[2]),
|
||||
xmax = rep(cumsum(model$pi[[net_id]][[2]][oCol]), model$Q[1]),
|
||||
ymax = rep(cumsum(model$pi[[net_id]][[1]][oRow]), each = model$Q[2])
|
||||
) %>%
|
||||
ggplot2::ggplot(ggplot2::aes(
|
||||
xmin = xmin, ymin = ymin,
|
||||
xmax = xmax, ymax = ymax, fill = value
|
||||
)) +
|
||||
ggplot2::geom_rect() +
|
||||
ggplot2::scale_fill_gradient2("alpha", low = "white", mid = "red", midpoint = 1) +
|
||||
ggplot2::geom_hline(yintercept = cumsum(model$pi[[net_id]][[1]][oRow][1:(model$Q[1] - 1)]), size = .2) +
|
||||
ggplot2::geom_vline(xintercept = cumsum(model$pi[[net_id]][[2]][oCol][1:(model$Q[2] - 1)]), size = .2) +
|
||||
ggplot2::scale_y_reverse() +
|
||||
ggplot2::theme_bw(base_size = 15, base_rect_size = 1, base_line_size = 1) +
|
||||
ggplot2::xlab("Column Blocks") +
|
||||
ggplot2::ylab("Row Blocks") +
|
||||
ggplot2::coord_equal(expand = FALSE)
|
||||
},
|
||||
meso = {
|
||||
p_alpha <- model$alpha[oRow, oCol] %>%
|
||||
t() %>%
|
||||
reshape2::melt() %>%
|
||||
ggplot2::ggplot(ggplot2::aes(x = Var1, y = Var2, fill = value)) +
|
||||
ggplot2::geom_tile() +
|
||||
ggplot2::scale_fill_gradient2("alpha", low = "white", high = "red") +
|
||||
ggplot2::geom_hline(yintercept = seq(model$Q[1]) + .5) +
|
||||
ggplot2::geom_vline(xintercept = seq(model$Q[2]) + .5) +
|
||||
ggplot2::scale_y_reverse() +
|
||||
ggplot2::theme_bw(base_size = 15, base_rect_size = 1, base_line_size = 1) +
|
||||
ggplot2::xlab("") +
|
||||
ggplot2::ylab("") +
|
||||
ggplot2::coord_fixed(expand = FALSE)
|
||||
# scale_y_reverse()
|
||||
if (model$free_density) {
|
||||
xl <- paste(round(model$delta, 1))
|
||||
} else {
|
||||
xl <- ""
|
||||
}
|
||||
df_pi <- purrr::map_dfc(
|
||||
seq_along(model$net_id),
|
||||
function(m) setNames(data.frame(model$pim[[m]][[1]][oRow]), m)
|
||||
)
|
||||
df_rho <- purrr::map_dfc(
|
||||
seq_along(model$net_id),
|
||||
function(m) setNames(data.frame(model$pim[[m]][[2]][oCol]), m)
|
||||
)
|
||||
# names(df_pi) <- model$net_id
|
||||
if (mixture) {
|
||||
p_pi <-
|
||||
df_pi %>%
|
||||
# rename() %>%
|
||||
dplyr::mutate(q = seq(model$Q[1])) %>%
|
||||
tidyr::pivot_longer(cols = -c(q)) %>%
|
||||
mutate(Proportion = value) %>%
|
||||
ggplot2::ggplot(ggplot2::aes(
|
||||
fill = as.factor(q), y = name,
|
||||
x = Proportion
|
||||
)) +
|
||||
ggplot2::geom_col() +
|
||||
ggplot2::coord_flip(expand = FALSE) +
|
||||
ggplot2::scale_fill_brewer("Row block",
|
||||
type = "qual", palette = "Paired",
|
||||
direction = -1
|
||||
) +
|
||||
ggplot2::guides(fill = ggplot2::guide_legend(
|
||||
ncol = model$Q[1] %/% 3 + 1,
|
||||
byrow = TRUE
|
||||
)) +
|
||||
ggplot2::ylab("") +
|
||||
ggplot2::ylab(xl) +
|
||||
ggplot2::theme_bw(base_size = 15)
|
||||
p_rho <- df_rho %>%
|
||||
# rename() %>%
|
||||
dplyr::mutate(q = seq(model$Q[2])) %>%
|
||||
tidyr::pivot_longer(cols = -c(q)) %>%
|
||||
mutate(Proportion = value) %>%
|
||||
ggplot2::ggplot(ggplot2::aes(
|
||||
fill = as.factor(q), y = name,
|
||||
x = Proportion
|
||||
)) +
|
||||
ggplot2::geom_col() +
|
||||
# ggplot2::coord_flip(expand = FALSE) +
|
||||
ggplot2::scale_fill_brewer("Column block",
|
||||
type = "qual", palette = "Set2",
|
||||
direction = -1
|
||||
) +
|
||||
ggplot2::guides(fill = ggplot2::guide_legend(
|
||||
ncol = model$Q[2] %/% 3 + 1,
|
||||
byrow = TRUE
|
||||
)) +
|
||||
ggplot2::ylab("") +
|
||||
ggplot2::ylab(xl) +
|
||||
ggplot2::theme_bw(base_size = 15)
|
||||
# Merging the plots with patchwork
|
||||
mixture_layout <- "
|
||||
##CCCC
|
||||
##CCCC
|
||||
RRAAAA
|
||||
RRAAAA
|
||||
RRAAAA
|
||||
"
|
||||
p_alpha <- patchwork::wrap_plots(
|
||||
R = p_pi, C = p_rho, A = p_alpha,
|
||||
design = mixture_layout
|
||||
) +
|
||||
patchwork::plot_layout(
|
||||
guides = "collect",
|
||||
design = mixture_layout
|
||||
)
|
||||
}
|
||||
return(p_alpha)
|
||||
},
|
||||
"block" = {
|
||||
as.matrix(model$A[[net_id]])[
|
||||
order(model$Z[[net_id]][[1]]),
|
||||
order(model$Z[[net_id]][[2]])
|
||||
] %>%
|
||||
reshape2::melt() %>%
|
||||
ggplot2::ggplot(ggplot2::aes(x = Var2, y = rev(Var1), fill = value)) +
|
||||
ggplot2::geom_tile(show.legend = FALSE) +
|
||||
ggplot2::geom_hline(
|
||||
yintercept = cumsum(tabulate(model$Z[[net_id]][[1]])[model$Q[1]:2]) + .5,
|
||||
col = "red", size = .5
|
||||
) +
|
||||
ggplot2::geom_vline(
|
||||
xintercept = cumsum(tabulate(model$Z[[net_id]][[2]])[1:(model$Q[2] - 1)]) + .5,
|
||||
col = "red", size = .5
|
||||
) +
|
||||
ggplot2::scale_fill_gradient(low = "white", high = "black", na.value = "transparent") +
|
||||
ggplot2::ylab("") +
|
||||
ggplot2::xlab(model$net_id[net_id]) +
|
||||
ggplot2::scale_x_discrete(
|
||||
breaks = ""
|
||||
) +
|
||||
# ggplot2::scale_y_reverse() +
|
||||
ggplot2::scale_y_discrete(
|
||||
breaks = "",
|
||||
guide = ggplot2::guide_axis(angle = 0)
|
||||
) +
|
||||
ggplot2::coord_equal(expand = FALSE) +
|
||||
ggplot2::theme_bw(base_size = 15) +
|
||||
ggplot2::theme(axis.ticks = ggplot2::element_blank())
|
||||
}
|
||||
)
|
||||
return(p)
|
||||
}
|
||||
|
||||
#' Plot matrix summaries of the collection mesoscale structure
|
||||
#'
|
||||
#' @param x a fitBipartiteSBMPop object.
|
||||
#' @param type The type of the plot. Could be "graphon", "meso" or "block".
|
||||
#' @param ord A reordering of the blocks.
|
||||
#' @param mixture Should the block proportions of each network be plotted as
|
||||
#' well?
|
||||
#' @param net_id Use to plot only on network in "graphon" view.
|
||||
#' @param ... Further argument to be passed
|
||||
#' @return A plot, a ggplot2 object.
|
||||
#' @export
|
||||
#'
|
||||
#' @examples
|
||||
plot.fitBipartiteSBMPop <- function(x, type = "graphon", oRow = NULL, oCol = NULL, mixture = FALSE, net_id = 1, ...) {
|
||||
stopifnot(inherits(x, "fitBipartiteSBMPop"))
|
||||
p <- plotFitBipartite(x,
|
||||
type = type, oRow = oRow, oCol = oCol, mixture = mixture,
|
||||
net_id = net_id, ...
|
||||
)
|
||||
p
|
||||
}
|
||||
178
Rcodes/simulation/NA_robustness_analyse.Rmd
Normal file
178
Rcodes/simulation/NA_robustness_analyse.Rmd
Normal file
|
|
@ -0,0 +1,178 @@
|
|||
---
|
||||
title: "Analyzing the capacity of the colBiSBM to recover structure for missing data from other networks"
|
||||
output:
|
||||
html_document:
|
||||
toc: true
|
||||
theme: journal
|
||||
pdf_document:
|
||||
keep_tex: true
|
||||
---
|
||||
|
||||
```{r setup, include=FALSE}
|
||||
knitr::opts_chunk$set(echo = TRUE)
|
||||
```
|
||||
|
||||
```{r required_libs, echo = FALSE, include=FALSE}
|
||||
require("ggplot2")
|
||||
require("tidyverse")
|
||||
```
|
||||
|
||||
```{r import_data, echo=FALSE, include=FALSE}
|
||||
NA_robustness_raw <- readRDS("simulation/data/NA_robustness_results-alpha_0.7, 0.4, 0.3, 0.4, 0.2, 0.05, 0.3, 0.05, 0.05-reps-10-14-06-23_17-41.Rds")
|
||||
NA_robustness_df <- NA_robustness_raw %>%
|
||||
mutate(
|
||||
auc_diff = auc_colBiSBM - auc_LBM,
|
||||
ari_row_diff = NA_net_ari_row - LBM_ari_row,
|
||||
ari_col_diff = NA_net_ari_col - LBM_ari_col
|
||||
) %>%
|
||||
group_by(prop_NAs, model) %>%
|
||||
summarise(
|
||||
mean_auc_diff = mean(auc_diff),
|
||||
sd_auc_diff = sd(auc_diff),
|
||||
mean_ari_row_diff = mean(ari_row_diff),
|
||||
sd_ari_row_diff = sd(ari_row_diff),
|
||||
mean_ari_col_diff = mean(ari_col_diff),
|
||||
sd_ari_col_diff = sd(ari_col_diff),
|
||||
mean_LBM_ari_row = mean(LBM_ari_row),
|
||||
sd_LBM_ari_row = sd(LBM_ari_row),
|
||||
mean_LBM_ari_col = mean(LBM_ari_col),
|
||||
sd_LBM_ari_col = sd(LBM_ari_col),
|
||||
mean_NA_net_ari_row = mean(NA_net_ari_row),
|
||||
sd_NA_net_ari_row = sd(NA_net_ari_row),
|
||||
mean_NA_net_ari_col = mean(NA_net_ari_col),
|
||||
sd_NA_net_ari_col = sd(NA_net_ari_col),
|
||||
mean_elapsed_secs = mean(elapsed_secs),
|
||||
sd_elapsed_secs = sd(elapsed_secs)
|
||||
) %>%
|
||||
ungroup()
|
||||
```
|
||||
|
||||
```{r useful_function, echo = FALSE}
|
||||
write_matex2 <- function(x) {
|
||||
if (!is.matrix(x)) {
|
||||
x <- matrix(x)
|
||||
}
|
||||
begin <- "\\begin{bmatrix}"
|
||||
end <- "\\end{bmatrix}"
|
||||
X <-
|
||||
apply(x, 1, function(x) {
|
||||
paste(
|
||||
paste(x, collapse = "&"),
|
||||
"\\\\"
|
||||
)
|
||||
})
|
||||
paste(c(begin, X, end), collapse = "")
|
||||
}
|
||||
```
|
||||
|
||||
# Simulation context
|
||||
|
||||
The idea is to benchmark the capacity of the models when NAs are in the data.
|
||||
|
||||
To do this, we chose the below structure:
|
||||
|
||||
```{r simulation_parameters, echo = FALSE}
|
||||
eps <- 0.05
|
||||
|
||||
M <- 3
|
||||
|
||||
# Defining parameters
|
||||
nr <- 100
|
||||
nc <- 150
|
||||
pir <- c(0.5, 0.3, 0.2)
|
||||
pic <- c(0.5, 0.3, 0.2)
|
||||
alpha <- matrix(c(
|
||||
0.7, 0.4, 0.3,
|
||||
0.4, 0.2, eps,
|
||||
0.3, eps, eps
|
||||
), byrow = TRUE, nrow = length(pir), ncol = length(pic))
|
||||
```
|
||||
|
||||
$$M = `r M`, n_r = `r nr`, n_c = `r nc` \\ \alpha = `r write_matex2(alpha)`
|
||||
\\ \pi = `r write_matex2(pir)` \rho = `r write_matex2(pic)`$$
|
||||
|
||||
With $M$ the number of networks, $n_r$ the number of nodes in row of the incidence
|
||||
matrix, $n_c$ the number of nodes in column, $\alpha$ the connectivity
|
||||
parameters between the row and column clusters. $\pi$ and $\rho$ are
|
||||
the proportion of nodes in the row and columns clusters.
|
||||
|
||||
And set some randomly chosen interactions to NA. The below plots will show the
|
||||
different quality indicators in function of proportion of NAs in the first of
|
||||
the 3 networks.
|
||||
|
||||
# AUC in function of the proportion of NAs
|
||||
|
||||
```{r auc_plots, echo = FALSE}
|
||||
auc_plot <- NA_robustness_df %>% ggplot() +
|
||||
geom_ribbon(aes(ymin = mean_auc_diff - sd_auc_diff, ymax = mean_auc_diff + sd_auc_diff, x = prop_NAs, fill = model), alpha = 0.1) +
|
||||
geom_line(aes(x = prop_NAs, y = mean_auc_diff, color = model)) +
|
||||
geom_point(aes(x = prop_NAs, y = mean_auc_diff, color = model)) +
|
||||
xlab("NA proportion") +
|
||||
ylab("AUC difference (colBiSBM - LBM)") +
|
||||
scale_x_continuous(breaks = seq(0, 0.9, 0.1))
|
||||
|
||||
|
||||
auc_plot
|
||||
```
|
||||
|
||||
```{r auc_, echo = FALSE}
|
||||
auc_plot <- NA_robustness_df %>% ggplot() +
|
||||
geom_ribbon(aes(ymin = mean_auc_diff - sd_auc_diff, ymax = mean_auc_diff + sd_auc_diff, x = prop_NAs, fill = model), alpha = 0.1) +
|
||||
geom_line(aes(x = prop_NAs, y = mean_auc_diff, color = model)) +
|
||||
geom_point(aes(x = prop_NAs, y = mean_auc_diff, color = model)) +
|
||||
xlab("NA proportion") +
|
||||
ylab("AUC difference (colBiSBM - LBM)") +
|
||||
scale_x_continuous(breaks = seq(0, 0.9, 0.1))
|
||||
|
||||
|
||||
auc_plot
|
||||
```
|
||||
<!-- FAUX
|
||||
At $0$, there is no NAs in the $1^{st}$ network so the information is completely
|
||||
retrieved.
|
||||
|
||||
At $0.1$, the AUC is degraded and falls around 0.85 for the *iid, pi and rho*
|
||||
models. The *pirho* model seems to perform slightly better, but it's confidence interval
|
||||
intersects the others.
|
||||
|
||||
For the next values, two "behaviours" can be observed :
|
||||
- the *iid* decreases and seems to stabilize until 0.8.
|
||||
- the *pi, rho and pirho* seem to remain stable until 0.7, from 0.8 they go down.
|
||||
But the *pirho* performs overall the best, staying around 0.86 and with $90\%$
|
||||
NAs in ther first network it gives an AUC of 0.83 !
|
||||
|
||||
The *pi* and *rho* models perform the same (their confidence intervals overlap),
|
||||
and they give an AUC of around 0.83 between $20\%$ to $80\%$ of NAs. The *rho*
|
||||
ends at 0.8 AUC, where the *pi* ends at 0.76.
|
||||
|
||||
The 4 models maintain an AUC over 0.75 for $90%$ -->
|
||||
|
||||
# ARI in function of the proportion of NAs
|
||||
|
||||
```{r ARI_row_plot, echo = FALSE, fig.cap="Difference of ARI for the row clusterings"}
|
||||
ari_row_plot <- NA_robustness_df %>% ggplot() +
|
||||
# ylim(-1, 1) +
|
||||
geom_ribbon(aes(ymin = mean_ari_row_diff - sd_ari_row_diff, ymax = mean_ari_row_diff + sd_ari_row_diff, x = prop_NAs, fill = model), alpha = 0.25) +
|
||||
geom_line(aes(x = prop_NAs, y = mean_ari_row_diff, color = model)) +
|
||||
geom_point(aes(x = prop_NAs, y = mean_ari_row_diff, color = model)) +
|
||||
xlab("NA proportion") +
|
||||
ylab("ARI difference") +
|
||||
ggtitle("ARI on the row clustering, difference (colBiSBM - LBM)") +
|
||||
scale_x_continuous(breaks = seq(0, 0.9, 0.1))
|
||||
|
||||
ari_row_plot
|
||||
```
|
||||
|
||||
```{r ARI_col_plot, echo = FALSE, fig.cap="Difference of ARI for the columns clusterings"}
|
||||
ari_col_plot <- NA_robustness_df %>% ggplot() +
|
||||
# ylim(-1, 1) +
|
||||
geom_ribbon(aes(ymin = mean_ari_col_diff - sd_ari_col_diff, ymax = mean_ari_col_diff + sd_ari_col_diff, x = prop_NAs, fill = model), alpha = 0.25) +
|
||||
geom_line(aes(x = prop_NAs, y = mean_ari_col_diff, color = model)) +
|
||||
geom_point(aes(x = prop_NAs, y = mean_ari_col_diff, color = model)) +
|
||||
xlab("NA proportion") +
|
||||
ylab("ARI difference") +
|
||||
ggtitle("ARI on the column clustering, difference (colBiSBM - LBM)") +
|
||||
scale_x_continuous(breaks = seq(0, 0.9, 0.1))
|
||||
|
||||
ari_col_plot
|
||||
```
|
||||
110
Rcodes/simulation/NA_robustness_analyse.tex
Normal file
110
Rcodes/simulation/NA_robustness_analyse.tex
Normal file
|
|
@ -0,0 +1,110 @@
|
|||
% Options for packages loaded elsewhere
|
||||
\PassOptionsToPackage{unicode}{hyperref}
|
||||
\PassOptionsToPackage{hyphens}{url}
|
||||
%
|
||||
\documentclass[
|
||||
]{article}
|
||||
\usepackage{lmodern}
|
||||
\usepackage{amssymb,amsmath}
|
||||
\usepackage{ifxetex,ifluatex}
|
||||
\ifnum 0\ifxetex 1\fi\ifluatex 1\fi=0 % if pdftex
|
||||
\usepackage[T1]{fontenc}
|
||||
\usepackage[utf8]{inputenc}
|
||||
\usepackage{textcomp} % provide euro and other symbols
|
||||
\else % if luatex or xetex
|
||||
\usepackage{unicode-math}
|
||||
\defaultfontfeatures{Scale=MatchLowercase}
|
||||
\defaultfontfeatures[\rmfamily]{Ligatures=TeX,Scale=1}
|
||||
\fi
|
||||
% Use upquote if available, for straight quotes in verbatim environments
|
||||
\IfFileExists{upquote.sty}{\usepackage{upquote}}{}
|
||||
\IfFileExists{microtype.sty}{% use microtype if available
|
||||
\usepackage[]{microtype}
|
||||
\UseMicrotypeSet[protrusion]{basicmath} % disable protrusion for tt fonts
|
||||
}{}
|
||||
\makeatletter
|
||||
\@ifundefined{KOMAClassName}{% if non-KOMA class
|
||||
\IfFileExists{parskip.sty}{%
|
||||
\usepackage{parskip}
|
||||
}{% else
|
||||
\setlength{\parindent}{0pt}
|
||||
\setlength{\parskip}{6pt plus 2pt minus 1pt}}
|
||||
}{% if KOMA class
|
||||
\KOMAoptions{parskip=half}}
|
||||
\makeatother
|
||||
\usepackage{xcolor}
|
||||
\IfFileExists{xurl.sty}{\usepackage{xurl}}{} % add URL line breaks if available
|
||||
\IfFileExists{bookmark.sty}{\usepackage{bookmark}}{\usepackage{hyperref}}
|
||||
\hypersetup{
|
||||
pdftitle={Analyzing the capacity of the colBiSBM to recover structure for missing data from other networks},
|
||||
hidelinks,
|
||||
pdfcreator={LaTeX via pandoc}}
|
||||
\urlstyle{same} % disable monospaced font for URLs
|
||||
\usepackage[margin=1in]{geometry}
|
||||
\usepackage{graphicx}
|
||||
\makeatletter
|
||||
\def\maxwidth{\ifdim\Gin@nat@width>\linewidth\linewidth\else\Gin@nat@width\fi}
|
||||
\def\maxheight{\ifdim\Gin@nat@height>\textheight\textheight\else\Gin@nat@height\fi}
|
||||
\makeatother
|
||||
% Scale images if necessary, so that they will not overflow the page
|
||||
% margins by default, and it is still possible to overwrite the defaults
|
||||
% using explicit options in \includegraphics[width, height, ...]{}
|
||||
\setkeys{Gin}{width=\maxwidth,height=\maxheight,keepaspectratio}
|
||||
% Set default figure placement to htbp
|
||||
\makeatletter
|
||||
\def\fps@figure{htbp}
|
||||
\makeatother
|
||||
\setlength{\emergencystretch}{3em} % prevent overfull lines
|
||||
\providecommand{\tightlist}{%
|
||||
\setlength{\itemsep}{0pt}\setlength{\parskip}{0pt}}
|
||||
\setcounter{secnumdepth}{-\maxdimen} % remove section numbering
|
||||
|
||||
\title{Analyzing the capacity of the colBiSBM to recover structure for
|
||||
missing data from other networks}
|
||||
\author{}
|
||||
\date{\vspace{-2.5em}}
|
||||
|
||||
\begin{document}
|
||||
\maketitle
|
||||
|
||||
\begin{verbatim}
|
||||
## `summarise()` has grouped output by 'prop_NAs'. You can override using the
|
||||
## `.groups` argument.
|
||||
\end{verbatim}
|
||||
|
||||
\hypertarget{simulation-context}{%
|
||||
\section{Simulation context}\label{simulation-context}}
|
||||
|
||||
The idea is to benchmark the capacity of the models when NAs are in the
|
||||
data.
|
||||
|
||||
To do this, whe choose the below structure: ! PARAMETERS OF THE
|
||||
SIMULATION !
|
||||
|
||||
And set some randomly chosen interactions to NA. The below plots will
|
||||
show the different quality indicators in function of proportion of NAs
|
||||
in the first of the 3 networks.
|
||||
|
||||
\hypertarget{auc-in-function-of-the-proportion-of-nas}{%
|
||||
\section{AUC in function of the proportion of
|
||||
NAs}\label{auc-in-function-of-the-proportion-of-nas}}
|
||||
|
||||
\includegraphics{NA_robustness_analyse_files/figure-latex/auc_plots-1.pdf}
|
||||
|
||||
\hypertarget{ari-in-function-of-the-proportion-of-nas}{%
|
||||
\section{ARI in function of the proportion of
|
||||
NAs}\label{ari-in-function-of-the-proportion-of-nas}}
|
||||
|
||||
\begin{figure}
|
||||
\centering
|
||||
\includegraphics{NA_robustness_analyse_files/figure-latex/ARI_row_plot-1.pdf}
|
||||
\caption{Difference of ARI for the row clusterings}
|
||||
\end{figure}
|
||||
|
||||
\begin{figure}
|
||||
\centering
|
||||
\includegraphics{NA_robustness_analyse_files/figure-latex/ARI_col_plot-1.pdf}
|
||||
\caption{Difference of ARI for the columns clusterings}
|
||||
\end{figure}
|
||||
|
||||
\end{document}
|
||||
153
Rcodes/simulation/NA_robustness_check.R
Normal file
153
Rcodes/simulation/NA_robustness_check.R
Normal file
|
|
@ -0,0 +1,153 @@
|
|||
devtools::load_all()
|
||||
require("sbm")
|
||||
require("pROC")
|
||||
|
||||
set.seed(1234)
|
||||
|
||||
eps <- 0.05
|
||||
|
||||
M <- 3
|
||||
|
||||
# Defining parameters
|
||||
nr <- 100
|
||||
nc <- 150
|
||||
pir <- c(0.5, 0.3, 0.2)
|
||||
pic <- c(0.5, 0.3, 0.2)
|
||||
alpha <- matrix(c(
|
||||
0.7, 0.4, 0.3,
|
||||
0.4, 0.2, eps,
|
||||
0.3, eps, eps
|
||||
), byrow = TRUE, nrow = length(pir), ncol = length(pic))
|
||||
max_repetition <- 10
|
||||
|
||||
# Collections
|
||||
collections <- list(
|
||||
"iid" = generate_bipartite_collection(nr, nc,
|
||||
pir, pic,
|
||||
alpha, M,
|
||||
model = "iid",
|
||||
return_memberships = TRUE
|
||||
),
|
||||
"pi" = generate_bipartite_collection(nr, nc,
|
||||
pir, pic,
|
||||
alpha, M,
|
||||
model = "pi",
|
||||
return_memberships = TRUE
|
||||
),
|
||||
"rho" = generate_bipartite_collection(nr, nc,
|
||||
pir, pic,
|
||||
alpha, M,
|
||||
model = "rho",
|
||||
return_memberships = TRUE
|
||||
),
|
||||
"pirho" = generate_bipartite_collection(nr, nc,
|
||||
pir, pic,
|
||||
alpha, M,
|
||||
model = "pirho",
|
||||
return_memberships = TRUE
|
||||
)
|
||||
)
|
||||
|
||||
|
||||
conditions <- expand.grid(
|
||||
prop_NAs = seq(from = 0, to = 0.9, by = 0.1),
|
||||
model = c("iid", "pi", "rho", "pirho"),
|
||||
repetition = seq.int(max_repetition)
|
||||
)
|
||||
|
||||
|
||||
|
||||
result_dataframe <- do.call("rbind", bettermc::mclapply(seq_len(nrow(conditions)), function(current) {
|
||||
# Looping over conditions
|
||||
prop_NAs <- conditions[current, ]$prop_NAs
|
||||
model <- as.character(conditions[current, ]$model)
|
||||
bipartite_collection <- collections[[model]]
|
||||
|
||||
# This is a list of the M incidence matrices
|
||||
bipartite_collection_incidence <- lapply(seq.int(M), function(m) {
|
||||
bipartite_collection[[m]]$incidence_matrix
|
||||
})
|
||||
|
||||
# Sampling values to replace by NAs
|
||||
NAs_index <- sample(
|
||||
seq_len(length(bipartite_collection_incidence[[1]])),
|
||||
floor(prop_NAs * length(bipartite_collection_incidence[[1]]))
|
||||
)
|
||||
|
||||
real_val_NAs <- bipartite_collection_incidence[[1]][NAs_index]
|
||||
bipartite_collection_incidence[[1]][NAs_index] <- NA
|
||||
NAs_coordinates <- which(is.na(bipartite_collection_incidence[[1]]),
|
||||
arr.ind = TRUE
|
||||
)
|
||||
|
||||
Z <- lapply(seq.int(M), function(m) {
|
||||
list(
|
||||
bipartite_collection[[m]]$row_blockmemberships,
|
||||
bipartite_collection[[m]]$col_blockmemberships
|
||||
)
|
||||
})
|
||||
|
||||
start_time <- Sys.time()
|
||||
mybisbmpop <- estimate_colBiSBM(
|
||||
netlist = bipartite_collection_incidence, colsbm_model = model,
|
||||
nb_run = 1,
|
||||
global_opts = list(
|
||||
nb_cores = parallel::detectCores() - 1, verbosity = 0
|
||||
), silent_parallelization = TRUE
|
||||
)
|
||||
stop_time <- Sys.time()
|
||||
|
||||
baseline_LBM <- estimate_colBiSBM(
|
||||
netlist = bipartite_collection_incidence[[1]], colsbm_model = model,
|
||||
nb_run = 1,
|
||||
global_opts = list(
|
||||
nb_cores = parallel::detectCores() - 1, verbosity = 0
|
||||
), silent_parallelization = TRUE
|
||||
)
|
||||
|
||||
# Predicted links
|
||||
X_hat_LBM <- baseline_LBM$best_fit$tau[[1]][[1]] %*% baseline_LBM$best_fit$alpha %*% t(baseline_LBM$best_fit$tau[[1]][[2]])
|
||||
X_hat <- mybisbmpop$best_fit$tau[[1]][[1]] %*% mybisbmpop$best_fit$alpha %*% t(mybisbmpop$best_fit$tau[[1]][[2]])
|
||||
|
||||
# Compute ROC and AUC
|
||||
auc_LBM <- auc(c(0, 1, real_val_NAs), c(0, 1, X_hat_LBM[NAs_index]))
|
||||
auc_colBiSBM <- auc(c(0, 1, real_val_NAs), c(0, 1, X_hat[NAs_index]))
|
||||
|
||||
# Computing ARI on the NAs
|
||||
return(data.frame(
|
||||
prop_NAs = prop_NAs,
|
||||
model = model,
|
||||
repetition = conditions[current, ]$repetition,
|
||||
auc_LBM = auc_LBM,
|
||||
auc_colBiSBM = auc_colBiSBM,
|
||||
LBM_ari_row = aricode::ARI(
|
||||
Z[[1]][[1]],
|
||||
baseline_LBM$best_fit$Z[[1]][[1]]
|
||||
),
|
||||
LBM_ari_col = aricode::ARI(
|
||||
Z[[1]][[2]],
|
||||
baseline_LBM$best_fit$Z[[1]][[2]]
|
||||
),
|
||||
NA_net_ari_row = aricode::ARI(
|
||||
Z[[1]][[1]],
|
||||
mybisbmpop$best_fit$Z[[1]][[1]]
|
||||
),
|
||||
NA_net_ari_col = aricode::ARI(
|
||||
Z[[1]][[2]],
|
||||
mybisbmpop$best_fit$Z[[1]][[2]]
|
||||
),
|
||||
elapsed_secs = difftime(stop_time, start_time, units = "sec")
|
||||
))
|
||||
},
|
||||
mc.cores = parallel::detectCores() - 1,
|
||||
mc.progress = TRUE
|
||||
))
|
||||
|
||||
saveRDS(
|
||||
result_dataframe,
|
||||
paste0(
|
||||
"simulation/data/",
|
||||
"NA_robustness_results-alpha_", toString(alpha),
|
||||
"-reps-", max_repetition, "-", format(Sys.time(), "%d-%m-%y_%H-%M"), ".Rds"
|
||||
)
|
||||
)
|
||||
29
Rcodes/simulation/analyze_results.R
Normal file
29
Rcodes/simulation/analyze_results.R
Normal file
|
|
@ -0,0 +1,29 @@
|
|||
library("ggplot2")
|
||||
library("dplyr")
|
||||
|
||||
|
||||
filename <- "divergence_modular_to_nested_with_3_repetitions_29-03-23_13:55:42.Rds"
|
||||
filename <- paste0(getwd(), "/simulation/data/", filename)
|
||||
|
||||
current_data <- readRDS(filename)
|
||||
|
||||
current_data$results <- current_data$results %>% mutate(diff_BICL = BICL - sep_LBM_BICL)
|
||||
|
||||
current_data$results <- current_data$results %>%
|
||||
group_by(Current_M, Network_id, divergence) %>%
|
||||
summarize(mean_diff_BICL = mean(diff_BICL), sd_diff_BICL = sd(diff_BICL))
|
||||
|
||||
ggplot(current_data$results) +
|
||||
aes(x = divergence, y = mean_diff_BICL, group = factor(Current_M), col = factor(Current_M)) +
|
||||
geom_point() +
|
||||
geom_line() +
|
||||
geom_ribbon(
|
||||
aes(
|
||||
ymin = mean_diff_BICL - sd_diff_BICL,
|
||||
ymax = mean_diff_BICL + sd_diff_BICL,
|
||||
fill = factor(Current_M),
|
||||
col = NULL
|
||||
),
|
||||
alpha = .25
|
||||
) +
|
||||
geom_hline(yintercept = 0)
|
||||
115
Rcodes/simulation/compare_parallelization_levels.R
Normal file
115
Rcodes/simulation/compare_parallelization_levels.R
Normal file
|
|
@ -0,0 +1,115 @@
|
|||
# Sourcing all necessary files
|
||||
require("sbm", quietly = T)
|
||||
require("dplyr", quietly = T)
|
||||
require("tictoc", quietly = T)
|
||||
require("ggplot2", quietly = T)
|
||||
|
||||
devtools::load_all(path = "R/")
|
||||
|
||||
eps <- 0.05
|
||||
M <- 2
|
||||
nr <- 100
|
||||
nc <- 250
|
||||
|
||||
pir1 <- c(0.2, 0, 0.8)
|
||||
pir2 <- c(0.4, 0.6, 0)
|
||||
|
||||
pic1 <- c(0.6, 0, 0.4)
|
||||
pic2 <- c(0.4, 0.6, 0)
|
||||
|
||||
Q <- c(length(pir1), length(pic1))
|
||||
|
||||
# Make a non common alpha structure
|
||||
|
||||
alpha <- matrix(
|
||||
c( # 12 2 1
|
||||
0.4, eps, eps, # 12
|
||||
eps, 0.5, 0, # 2
|
||||
eps, 0, 0.2 # 1
|
||||
),
|
||||
nrow = Q[1], ncol = Q[2], byrow = TRUE
|
||||
)
|
||||
|
||||
bipartite_collection <- list(
|
||||
generate_bipartite_network(nr, nc, pir1, pic1, alpha),
|
||||
generate_bipartite_network(nr, nc, pir2, pic2, alpha)
|
||||
)
|
||||
|
||||
# This is a list of the M incidence matrices
|
||||
bipartite_collection_incidence <- lapply(seq.int(M), function(m) {
|
||||
bipartite_collection[[m]]$incidence_matrix
|
||||
})
|
||||
|
||||
Z <- lapply(seq.int(M), function(m) {
|
||||
list(
|
||||
bipartite_collection[[m]]$row_clustering,
|
||||
bipartite_collection[[m]]$col_clustering
|
||||
)
|
||||
})
|
||||
|
||||
results <- data.frame(matrix(nrow = 0, ncol = 6))
|
||||
names(results) <- c(
|
||||
"repetition",
|
||||
"user_func_parallel",
|
||||
"exploration_parallel",
|
||||
"subexploration_parallel",
|
||||
"ari_sum",
|
||||
"time"
|
||||
)
|
||||
|
||||
repetition_number <- 3
|
||||
|
||||
conditions <- expand.grid(
|
||||
seq.int(3), c(TRUE),
|
||||
c(TRUE, FALSE), c(TRUE, FALSE)
|
||||
)
|
||||
|
||||
for (i in seq_len(nrow(conditions))) {
|
||||
cat(
|
||||
"\nCondition ", i, "/", nrow(conditions), "\n",
|
||||
"repetition:", conditions[i, 1],
|
||||
" -- user_func_parallel:", conditions[i, 2],
|
||||
"-- exploration_parallel:", conditions[i, 3],
|
||||
"-- subexploration_parallel:", conditions[i, 4],
|
||||
"\n"
|
||||
)
|
||||
t0 <- Sys.time()
|
||||
mybisbmpop <- estimate_colBiSBM(
|
||||
netlist = bipartite_collection_incidence,
|
||||
colsbm_model = "pirho",
|
||||
global_opts = list(
|
||||
nb_cores = parallel::detectCores() - 1,
|
||||
verbosity = 0,
|
||||
parallelization_vector = unlist(conditions[i,c(2,3,4)])
|
||||
)
|
||||
)
|
||||
t1 <- Sys.time()
|
||||
ari_sums <- sum(sapply(
|
||||
seq_along(mybisbmpop$best_fit$Z),
|
||||
function(m) {
|
||||
sum(
|
||||
aricode::ARI(
|
||||
Z[[m]][[1]],
|
||||
mybisbmpop$best_fit$Z[[m]][[1]]
|
||||
),
|
||||
aricode::ARI(
|
||||
Z[[m]][[2]],
|
||||
mybisbmpop$best_fit$Z[[m]][[2]]
|
||||
)
|
||||
)
|
||||
}
|
||||
))
|
||||
|
||||
results[nrow(results) + 1, ] <- c(
|
||||
conditions[i, 1],
|
||||
conditions[i, 2],
|
||||
conditions[i, 3],
|
||||
conditions[i, 4],
|
||||
ari_sums,
|
||||
t1 - t0
|
||||
)
|
||||
}
|
||||
|
||||
saveRDS(results,
|
||||
file = paste0("./simulation/data/parallel_levels_", Sys.time(), ".Rds")
|
||||
)
|
||||
BIN
Rcodes/simulation/data/NA_robustness_results-12-06-23_21-03.Rds
Normal file
BIN
Rcodes/simulation/data/NA_robustness_results-12-06-23_21-03.Rds
Normal file
Binary file not shown.
BIN
Rcodes/simulation/data/NA_robustness_results-13-06-23_16-07.Rds
Normal file
BIN
Rcodes/simulation/data/NA_robustness_results-13-06-23_16-07.Rds
Normal file
Binary file not shown.
BIN
Rcodes/simulation/data/NA_robustness_results-14-06-23_15-03.Rds
Normal file
BIN
Rcodes/simulation/data/NA_robustness_results-14-06-23_15-03.Rds
Normal file
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Some files were not shown because too many files have changed in this diff Show more
Loading…
Add table
Reference in a new issue