--- title: "Stats at Taxo" format: html --- ```{r} library(knitr) printer_tabset <- function(x, options, ...) { if (is.null(names(x))) names(x) <- seq_along(x) header <- ":::: {.panel-tabset}" footer <- "::::" res <- lapply(seq_along(x), function(i) { knitr::knit_child( text = c( "##### `r names(x)[i]`", "", "```{r}", "#| echo: false", "x[[i]]", "```" ), envir = environment(), quiet = TRUE ) }) out <- paste(c(header, res, footer), collapse = "\n\n") knitr::asis_output(out) } registerS3method("knit_print", "list", printer_tabset) ``` ```{r} source("utils.R") library(biomformat) library(phyloseq) library(tidytree) library(treeio) library(ggplot2) library(ggtree) library(tidyverse) library(R.utils) library(sbm) library(blockmodels) the_data <- import_biom("data/chaillou/chaillou.biom") the_tree <- read.newick("data/chaillou/tree.nwk") ggtree(the_tree, aes(x, y)) + geom_tree() + theme_tree() taxa_names <- c("Kingdom", "Phylum", "Class", "Order", "Family", "Genus", "Species/OTU") taxo <- tax_table(the_data) per_taxa_network <- collapse_otu_at_taxo(the_data) names(per_taxa_network) <- taxa_names ``` ```{r} #| freeze: true plot_mat <- function(matrix, rank) { dat2 <- matrix %>% as_tibble(rownames = "Var1") %>% pivot_longer(-Var1, names_to = "Var2", values_to = "value") %>% mutate(Var1 = stringr::str_split(Var1, "-", simplify = TRUE)[, rank]) p <- ggplot(dat2, aes(Var1, Var2)) + geom_tile(aes(fill = value)) + # geom_text(aes(label = round(value, 1)), data = dat2 %>% filter(value > 0)) + scale_fill_gradientn( colours = c("white", "#FEECE3", "red"), values = c(0, .Machine$double.eps, 1) ) + # To set 0 in pure white and smallest non zero as off-white theme_bw() + labs(x = "Taxa", y = "Samples", title = paste0(length(unique(dat2$Var1)), " different taxa")) + theme( axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 1), axis.text.y = element_blank() ) return(p) } otu_table_plot <- lapply(seq_along(per_taxa_network), function(idx) plot_mat(per_taxa_network[[idx]], rank = idx)) names(otu_table_plot) <- taxa_names ``` # Count matrix plot ```{r} otu_table_plot ``` ```{r} # Below the graph of taxa abundance # otus <- per_taxa_no_names <- collapse_otu_at_taxo(phylo_data = the_data, renameOTUs = FALSE) otu_mat <- as(otu_table(the_data), "matrix") if (!taxa_are_rows(the_data)) { otu_mat <- t(otu_mat) } taxa_abund <- rowSums(otu_mat) abund_df <- tibble( label = names(otu_abund), # MUST match tree tip labels abundance = as.numeric(otu_abund) ) stopifnot(max(abund_df$abundance) > 0) tree_tbl <- as_tibble(the_tree) %>% left_join(abund_df, by = "label") %>% mutate(abundance = replace_na(abundance, 0)) ## =============================== ## Step 4 — Aggregate abundance up to the root ## =============================== for (i in seq_len(nrow(tree_tbl))) { parent <- tree_tbl$parent[i] if (!is.na(parent)) { tree_tbl$abundance[tree_tbl$node == parent] <- tree_tbl$abundance[tree_tbl$node == parent] + tree_tbl$abundance[i] } } ## =============================== ## Step 5 — Log transform (recommended) ## =============================== tree_tbl <- tree_tbl %>% mutate(log_abundance = log10(abundance + 1)) ## =============================== ## Step 6 — Plot ggtree with edge coloring ## =============================== p <- ggtree(the_tree) %<+% tree_tbl + geom_tree( aes(color = edge_log_abundance), size = 1 ) + scale_color_gradient( low = "#FEECE3", high = "red", name = "Log10 abundance" ) + theme_tree() p ggtree(the_tree) %<+% tree_tbl + geom_tree(color = "grey70", size = 0.6) + geom_point( aes(color = log_abundance), size = 2 ) + scale_color_gradient( low = "#FEECE3", high = "red", name = "Log10\nabundance" ) + theme_tree() ``` # Standard deviation ```{r} kables_sd <- lapply(per_taxa_network, function(matrix) { sd_vec <- as.matrix(apply(matrix, 1, sd)) sd_vec <- sd_vec[order(rownames(sd_vec)), ] tax_df_names <- do.call("rbind", rownames(matrix) %>% str_split("-")) %>% as.data.frame() sd_df <- cbind(tax_df_names, sd_vec) kable(sd_df, row.names = FALSE) }) kables_sd ```