library(here) library(stringr) library(tidyverse) library(phyloseq) library(biomformat) source("utils.R") the_data <- import_biom("data/mach/kinetic.biom") per_taxa_networks <- collapse_otu_at_taxo(the_data) otu_df <- sapply(per_taxa_networks, nrow) %>% data.frame() %>% rownames_to_column() %>% rename(Nb_OTU = ".", Rank = "rowname") flist <- list.files(here("results", "lbm-seq"), full.names = TRUE, pattern = ".Rds") para_flist <- grepv(pattern = "para.Rds", flist) seq_flist <- grepv(pattern = "seq.Rds", flist) notrans_flist <- grepv(pattern = "notrans.Rds", flist) bench_df <- do.call("rbind", lapply(flist, function(file) readRDS(file)$benchmark)) bench_df <- bench_df %>% mutate(expr = as.character(expr)) %>% separate_wider_regex(cols = "expr", patterns = c(Rank = "Rank[0-9]", type = "para|seq|notrans")) %>% mutate(Rank = as.factor(Rank), type = as.factor(type)) %>% left_join(otu_df, by = "Rank") %>% mutate(Rank = as.factor(Rank), type = as.factor(type)) levels(bench_df$Rank) <- c("Phylum", "Class", "Order", "Family", "Genus") library(ggplot2) coeff <- 220000000000 ggplot(bench_df, aes(x = Rank, col = type)) + geom_boxplot(aes(y = time)) + geom_point(aes(y = coeff * Nb_OTU, size = Nb_OTU), shape = 13) + scale_color_manual(values = c("#363634", "#009E73", "#CC79A7"), labels = c("No transfer", "Parallelized", "Sequential")) + scale_y_continuous(sec.axis = sec_axis(~ . / coeff, name = "Number of OTUs")) + labs(size = "Number of OTUs", color = "Algorithm Type", y = "Time (seconds)") + theme_minimal()