190 lines
No EOL
4.8 KiB
Text
190 lines
No EOL
4.8 KiB
Text
---
|
|
title: "Stats at Taxo"
|
|
format: html
|
|
---
|
|
|
|
```{r}
|
|
library(knitr)
|
|
printer_tabset <- function(x, options, ...) {
|
|
if (is.null(names(x))) names(x) <- seq_along(x)
|
|
|
|
header <- ":::: {.panel-tabset}"
|
|
footer <- "::::"
|
|
|
|
res <- lapply(seq_along(x), function(i) {
|
|
knitr::knit_child(
|
|
text = c(
|
|
"##### `r names(x)[i]`",
|
|
"",
|
|
"```{r}",
|
|
"#| echo: false",
|
|
"x[[i]]",
|
|
"```"
|
|
),
|
|
envir = environment(),
|
|
quiet = TRUE
|
|
)
|
|
})
|
|
|
|
out <- paste(c(header, res, footer), collapse = "\n\n")
|
|
knitr::asis_output(out)
|
|
}
|
|
registerS3method("knit_print", "list", printer_tabset)
|
|
```
|
|
|
|
```{r}
|
|
source("utils.R")
|
|
library(biomformat)
|
|
library(phyloseq)
|
|
library(tidytree)
|
|
library(treeio)
|
|
library(ggplot2)
|
|
library(ggtree)
|
|
library(tidyverse)
|
|
library(R.utils)
|
|
library(sbm)
|
|
library(blockmodels)
|
|
|
|
the_data <- import_biom("data/chaillou/chaillou.biom")
|
|
the_tree <- read.newick("data/chaillou/tree.nwk")
|
|
|
|
ggtree(the_tree, aes(x, y)) + geom_tree() + theme_tree()
|
|
|
|
taxa_names <- c("Kingdom", "Phylum", "Class", "Order", "Family", "Genus", "Species/OTU")
|
|
|
|
taxo <- tax_table(the_data)
|
|
per_taxa_network <- collapse_otu_at_taxo(the_data)
|
|
names(per_taxa_network) <- taxa_names
|
|
```
|
|
|
|
```{r}
|
|
#| freeze: true
|
|
plot_mat <- function(matrix, rank) {
|
|
dat2 <-
|
|
matrix %>%
|
|
as_tibble(rownames = "Var1") %>%
|
|
pivot_longer(-Var1, names_to = "Var2", values_to = "value") %>%
|
|
mutate(Var1 = stringr::str_split(Var1, ";_;", simplify = TRUE)[, rank])
|
|
p <- ggplot(dat2, aes(Var1, Var2)) +
|
|
geom_tile(aes(fill = value)) +
|
|
# geom_text(aes(label = round(value, 1)), data = dat2 %>% filter(value > 0)) +
|
|
scale_fill_gradientn(
|
|
colours = c("white", "#FEECE3", "red"),
|
|
values = c(0, .Machine$double.eps, 1)
|
|
) + # To set 0 in pure white and smallest non zero as off-white
|
|
theme_bw() +
|
|
labs(x = "Taxa", y = "Samples", title = paste0(length(unique(dat2$Var1)), " different taxa")) +
|
|
theme(
|
|
axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 1),
|
|
axis.text.y = element_blank()
|
|
)
|
|
|
|
return(p)
|
|
}
|
|
otu_table_plot <- lapply(seq_along(per_taxa_network), function(idx) plot_mat(per_taxa_network[[idx]], rank = idx))
|
|
names(otu_table_plot) <- taxa_names
|
|
```
|
|
|
|
# Count matrix plot
|
|
```{r}
|
|
otu_table_plot
|
|
```
|
|
|
|
```{r}
|
|
#| eval: false
|
|
# Below the graph of taxa abundance
|
|
# otus <- per_taxa_no_names <- collapse_otu_at_taxo(phylo_data = the_data, renameOTUs = FALSE)
|
|
otu_mat <- as(otu_table(the_data), "matrix")
|
|
|
|
if (!taxa_are_rows(the_data)) {
|
|
otu_mat <- t(otu_mat)
|
|
}
|
|
|
|
taxa_abund <- rowSums(otu_mat)
|
|
|
|
abund_df <- tibble(
|
|
label = names(otu_abund), # MUST match tree tip labels
|
|
abundance = as.numeric(otu_abund)
|
|
)
|
|
stopifnot(max(abund_df$abundance) > 0)
|
|
tree_tbl <- as_tibble(the_tree) %>%
|
|
left_join(abund_df, by = "label") %>%
|
|
mutate(abundance = replace_na(abundance, 0))
|
|
|
|
## ===============================
|
|
## Step 4 — Aggregate abundance up to the root
|
|
## ===============================
|
|
for (i in seq_len(nrow(tree_tbl))) {
|
|
parent <- tree_tbl$parent[i]
|
|
if (!is.na(parent)) {
|
|
tree_tbl$abundance[tree_tbl$node == parent] <-
|
|
tree_tbl$abundance[tree_tbl$node == parent] +
|
|
tree_tbl$abundance[i]
|
|
}
|
|
}
|
|
|
|
|
|
## ===============================
|
|
## Step 5 — Log transform (recommended)
|
|
## ===============================
|
|
tree_tbl <- tree_tbl %>%
|
|
mutate(log_abundance = log10(abundance + 1))
|
|
## ===============================
|
|
## Step 6 — Plot ggtree with edge coloring
|
|
## ===============================
|
|
p <- ggtree(the_tree) %<+% tree_tbl +
|
|
geom_tree(
|
|
aes(color = edge_log_abundance),
|
|
size = 1
|
|
) +
|
|
scale_color_gradient(
|
|
low = "#FEECE3",
|
|
high = "red",
|
|
name = "Log10 abundance"
|
|
) +
|
|
theme_tree()
|
|
|
|
p
|
|
ggtree(the_tree) %<+% tree_tbl +
|
|
geom_tree(color = "grey70", size = 0.6) +
|
|
geom_point(
|
|
aes(color = log_abundance),
|
|
size = 2
|
|
) +
|
|
scale_color_gradient(
|
|
low = "#FEECE3",
|
|
high = "red",
|
|
name = "Log10\nabundance"
|
|
) +
|
|
theme_tree()
|
|
|
|
```
|
|
|
|
# Standard deviation
|
|
|
|
```{r}
|
|
kables_sd <- lapply(per_taxa_network, function(matrix) {
|
|
sd_vec <- as.matrix(apply(matrix, 1, sd))
|
|
sd_vec <- sd_vec[order(rownames(sd_vec)), ]
|
|
tax_df_names <- do.call("rbind", rownames(matrix) %>% str_split("-")) %>% as.data.frame()
|
|
sd_df <- cbind(tax_df_names, sd_vec)
|
|
kable(sd_df, row.names = FALSE)
|
|
})
|
|
kables_sd
|
|
```
|
|
|
|
```{r}
|
|
boxplots <- lapply(per_taxa_network, function(matrix) {
|
|
df_mat <- stack(as.data.frame(t(matrix)))
|
|
ggplot(df_mat, aes(y = values, x = ind, fill = ind)) +
|
|
geom_boxplot()
|
|
})
|
|
boxplots[-7]
|
|
```
|
|
|
|
```{r}
|
|
seq_effort <- lapply(per_taxa_network, function(matrix) {
|
|
colSums(matrix)
|
|
})
|
|
seq_effort
|
|
``` |