diff --git a/stats_panel.qmd b/stats_panel.qmd index 8d8d3c2..5f2b140 100644 --- a/stats_panel.qmd +++ b/stats_panel.qmd @@ -36,13 +36,19 @@ registerS3method("knit_print", "list", printer_tabset) source("utils.R") library(biomformat) library(phyloseq) +library(tidytree) +library(treeio) library(ggplot2) +library(ggtree) library(tidyverse) library(R.utils) library(sbm) library(blockmodels) the_data <- import_biom("data/chaillou/chaillou.biom") +the_tree <- read.newick("data/chaillou/tree.nwk") + +ggtree(the_tree, aes(x, y)) + geom_tree() + theme_tree() taxa_names <- c("Kingdom", "Phylum", "Class", "Order", "Family", "Genus", "Species/OTU") @@ -84,6 +90,75 @@ names(otu_table_plot) <- taxa_names otu_table_plot ``` +```{r} +# Below the graph of taxa abundance +# otus <- per_taxa_no_names <- collapse_otu_at_taxo(phylo_data = the_data, renameOTUs = FALSE) +otu_mat <- as(otu_table(the_data), "matrix") + +if (!taxa_are_rows(the_data)) { + otu_mat <- t(otu_mat) +} + +taxa_abund <- rowSums(otu_mat) + +abund_df <- tibble( + label = names(otu_abund), # MUST match tree tip labels + abundance = as.numeric(otu_abund) +) +stopifnot(max(abund_df$abundance) > 0) +tree_tbl <- as_tibble(the_tree) %>% + left_join(abund_df, by = "label") %>% + mutate(abundance = replace_na(abundance, 0)) + +## =============================== +## Step 4 — Aggregate abundance up to the root +## =============================== +for (i in seq_len(nrow(tree_tbl))) { + parent <- tree_tbl$parent[i] + if (!is.na(parent)) { + tree_tbl$abundance[tree_tbl$node == parent] <- + tree_tbl$abundance[tree_tbl$node == parent] + + tree_tbl$abundance[i] + } +} + + +## =============================== +## Step 5 — Log transform (recommended) +## =============================== +tree_tbl <- tree_tbl %>% + mutate(log_abundance = log10(abundance + 1)) +## =============================== +## Step 6 — Plot ggtree with edge coloring +## =============================== +p <- ggtree(the_tree) %<+% tree_tbl + + geom_tree( + aes(color = edge_log_abundance), + size = 1 + ) + + scale_color_gradient( + low = "#FEECE3", + high = "red", + name = "Log10 abundance" + ) + + theme_tree() + +p +ggtree(the_tree) %<+% tree_tbl + + geom_tree(color = "grey70", size = 0.6) + + geom_point( + aes(color = log_abundance), + size = 2 + ) + + scale_color_gradient( + low = "#FEECE3", + high = "red", + name = "Log10\nabundance" + ) + + theme_tree() + +``` + # Standard deviation ```{r}