Graph for time and size of OTUs

This commit is contained in:
Louis 2026-01-29 10:37:32 +01:00
parent 2fc1cf4e65
commit 9f589a1fdc

View file

@ -0,0 +1,44 @@
library(here)
library(stringr)
library(tidyverse)
library(phyloseq)
library(biomformat)
source("utils.R")
the_data <- import_biom("data/mach/kinetic.biom")
per_taxa_networks <- collapse_otu_at_taxo(the_data)
otu_df <- sapply(per_taxa_networks, nrow) %>%
data.frame() %>%
rownames_to_column() %>%
rename(Nb_OTU = ".", Rank = "rowname")
flist <- list.files(here("results", "lbm-seq"), full.names = TRUE, pattern = ".Rds")
para_flist <- grepv(pattern = "para.Rds", flist)
seq_flist <- grepv(pattern = "seq.Rds", flist)
notrans_flist <- grepv(pattern = "notrans.Rds", flist)
bench_df <- do.call("rbind", lapply(flist, function(file) readRDS(file)$benchmark))
bench_df <- bench_df %>%
mutate(expr = as.character(expr)) %>%
separate_wider_regex(cols = "expr", patterns = c(Rank = "Rank[0-9]", type = "para|seq|notrans")) %>%
mutate(Rank = as.factor(Rank), type = as.factor(type)) %>%
left_join(otu_df, by = "Rank") %>%
mutate(Rank = as.factor(Rank), type = as.factor(type))
levels(bench_df$Rank) <- c("Phylum", "Class", "Order", "Family", "Genus")
library(ggplot2)
coeff <- 220000000000
ggplot(bench_df, aes(x = Rank, col = type)) +
geom_boxplot(aes(y = time)) +
geom_point(aes(y = coeff * Nb_OTU, size = Nb_OTU), shape = 13) +
scale_color_manual(values = c("#363634", "#009E73", "#CC79A7"), labels = c("No transfer", "Parallelized", "Sequential")) +
scale_y_continuous(sec.axis = sec_axis(~ . / coeff, name = "Number of OTUs")) +
labs(size = "Number of OTUs", color = "Algorithm Type", y = "Time (seconds)") +
theme_minimal()