human-microbiome-compendium/stats_panel.qmd

190 lines
No EOL
4.8 KiB
Text

---
title: "Stats at Taxo"
format: html
---
```{r}
library(knitr)
printer_tabset <- function(x, options, ...) {
if (is.null(names(x))) names(x) <- seq_along(x)
header <- ":::: {.panel-tabset}"
footer <- "::::"
res <- lapply(seq_along(x), function(i) {
knitr::knit_child(
text = c(
"##### `r names(x)[i]`",
"",
"```{r}",
"#| echo: false",
"x[[i]]",
"```"
),
envir = environment(),
quiet = TRUE
)
})
out <- paste(c(header, res, footer), collapse = "\n\n")
knitr::asis_output(out)
}
registerS3method("knit_print", "list", printer_tabset)
```
```{r}
source("utils.R")
library(biomformat)
library(phyloseq)
library(tidytree)
library(treeio)
library(ggplot2)
library(ggtree)
library(tidyverse)
library(R.utils)
library(sbm)
library(blockmodels)
the_data <- import_biom("data/chaillou/chaillou.biom")
the_tree <- read.newick("data/chaillou/tree.nwk")
ggtree(the_tree, aes(x, y)) + geom_tree() + theme_tree()
taxa_names <- c("Kingdom", "Phylum", "Class", "Order", "Family", "Genus", "Species/OTU")
taxo <- tax_table(the_data)
per_taxa_network <- collapse_otu_at_taxo(the_data)
names(per_taxa_network) <- taxa_names
```
```{r}
#| freeze: true
plot_mat <- function(matrix, rank) {
dat2 <-
matrix %>%
as_tibble(rownames = "Var1") %>%
pivot_longer(-Var1, names_to = "Var2", values_to = "value") %>%
mutate(Var1 = stringr::str_split(Var1, ";_;", simplify = TRUE)[, rank])
p <- ggplot(dat2, aes(Var1, Var2)) +
geom_tile(aes(fill = value)) +
# geom_text(aes(label = round(value, 1)), data = dat2 %>% filter(value > 0)) +
scale_fill_gradientn(
colours = c("white", "#FEECE3", "red"),
values = c(0, .Machine$double.eps, 1)
) + # To set 0 in pure white and smallest non zero as off-white
theme_bw() +
labs(x = "Taxa", y = "Samples", title = paste0(length(unique(dat2$Var1)), " different taxa")) +
theme(
axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 1),
axis.text.y = element_blank()
)
return(p)
}
otu_table_plot <- lapply(seq_along(per_taxa_network), function(idx) plot_mat(per_taxa_network[[idx]], rank = idx))
names(otu_table_plot) <- taxa_names
```
# Count matrix plot
```{r}
otu_table_plot
```
```{r}
#| eval: false
# Below the graph of taxa abundance
# otus <- per_taxa_no_names <- collapse_otu_at_taxo(phylo_data = the_data, renameOTUs = FALSE)
otu_mat <- as(otu_table(the_data), "matrix")
if (!taxa_are_rows(the_data)) {
otu_mat <- t(otu_mat)
}
taxa_abund <- rowSums(otu_mat)
abund_df <- tibble(
label = names(otu_abund), # MUST match tree tip labels
abundance = as.numeric(otu_abund)
)
stopifnot(max(abund_df$abundance) > 0)
tree_tbl <- as_tibble(the_tree) %>%
left_join(abund_df, by = "label") %>%
mutate(abundance = replace_na(abundance, 0))
## ===============================
## Step 4 — Aggregate abundance up to the root
## ===============================
for (i in seq_len(nrow(tree_tbl))) {
parent <- tree_tbl$parent[i]
if (!is.na(parent)) {
tree_tbl$abundance[tree_tbl$node == parent] <-
tree_tbl$abundance[tree_tbl$node == parent] +
tree_tbl$abundance[i]
}
}
## ===============================
## Step 5 — Log transform (recommended)
## ===============================
tree_tbl <- tree_tbl %>%
mutate(log_abundance = log10(abundance + 1))
## ===============================
## Step 6 — Plot ggtree with edge coloring
## ===============================
p <- ggtree(the_tree) %<+% tree_tbl +
geom_tree(
aes(color = edge_log_abundance),
size = 1
) +
scale_color_gradient(
low = "#FEECE3",
high = "red",
name = "Log10 abundance"
) +
theme_tree()
p
ggtree(the_tree) %<+% tree_tbl +
geom_tree(color = "grey70", size = 0.6) +
geom_point(
aes(color = log_abundance),
size = 2
) +
scale_color_gradient(
low = "#FEECE3",
high = "red",
name = "Log10\nabundance"
) +
theme_tree()
```
# Standard deviation
```{r}
kables_sd <- lapply(per_taxa_network, function(matrix) {
sd_vec <- as.matrix(apply(matrix, 1, sd))
sd_vec <- sd_vec[order(rownames(sd_vec)), ]
tax_df_names <- do.call("rbind", rownames(matrix) %>% str_split("-")) %>% as.data.frame()
sd_df <- cbind(tax_df_names, sd_vec)
kable(sd_df, row.names = FALSE)
})
kables_sd
```
```{r}
boxplots <- lapply(per_taxa_network, function(matrix) {
df_mat <- stack(as.data.frame(t(matrix)))
ggplot(df_mat, aes(y = values, x = ind, fill = ind)) +
geom_boxplot()
})
boxplots[-7]
```
```{r}
seq_effort <- lapply(per_taxa_network, function(matrix) {
colSums(matrix)
})
seq_effort
```