Added tau propagation

This commit is contained in:
Louis 2026-01-16 15:41:16 +01:00
parent 542d9cd271
commit f5e6e8f05c

63
utils.R
View file

@ -1,6 +1,8 @@
library(knitr)
library(kableExtra)
library(dplyr)
library(tidyr)
library(rlang)
library(stringr)
ellipse_table <- function(df, nr = 5, nc = 5, format = "latex") {
@ -59,7 +61,7 @@ ellipse_table <- function(df, nr = 5, nc = 5, format = "latex") {
return(sub_df)
}
collapse_otu_at_taxo <- function(phylo_data, NArm = TRUE) {
collapse_otu_at_taxo <- function(phylo_data, NArm = TRUE, renameOTUs = TRUE) {
tax_ranks <- colnames(tax_table(phylo_data))
tax_df <- tax_table(phylo_data)
otu_tables_by_rank <- lapply(tax_ranks, function(rank) {
@ -71,16 +73,69 @@ collapse_otu_at_taxo <- function(phylo_data, NArm = TRUE) {
otu <- t(otu)
}
otu_matrix <- as.matrix(otu)
if (renameOTUs) {
id_in_taxo <- match(rownames(otu_matrix), rownames(tax_df))
taxo_names <- tax_df[id_in_taxo, seq(which(tax_ranks == rank))] %>%
as.data.frame() %>%
mutate_if(.predicate = is_character, .funs = partial(str_replace_all, pattern = "-", replacement = ".")) %>%
unite(col = "tax_full", sep = "-") %>%
mutate_if(.predicate = rlang::is_character, .funs = purrr::partial(str_replace_all, pattern = "-", replacement = ".")) %>%
tidyr::unite(col = "tax_full", sep = ";_;") %>%
unique()
rownames(otu_matrix) <- as.vector(taxo_names[, 1])
}
return(otu_matrix)
})
names(otu_tables_by_rank) <- tax_ranks
return(otu_tables_by_rank)
}
propagate_taus <- function(
tau_matrix,
physeq,
taxrank = phyloseq::rank_names(physeq)[1],
NArm = TRUE,
bad_empty = c(NA, "", " ", "\t")
) {
if (is.null(phyloseq::access(physeq, "tax_table"))) {
stop("The tax_glom() function requires that physeq contain a taxonomyTable")
}
if (!taxrank[1] %in% phyloseq::rank_names(physeq)) {
stop("Bad taxrank argument. Must be among the values of rank_names(physeq)")
}
CN <- which(phyloseq::rank_names(physeq) %in% taxrank[1])
tax <- as(phyloseq::access(physeq, "tax_table"), "matrix")[, CN]
if (NArm) {
keep_species <- names(tax)[!(tax %in% bad_empty)]
physeq <- phyloseq::prune_taxa(keep_species, physeq)
}
tax <- as(phyloseq::access(physeq, "tax_table"), "matrix")[, 1:CN,
drop = FALSE
]
tax <- apply(tax, 1, function(i) {
paste(i, sep = ";_;", collapse = ";_;")
})
tax_names <- unique(unname(tax))
spCliques <- tapply(names(tax), factor(tax), list)
old_tax_names <- rownames(tau_matrix)
nb_desc <- sapply(old_tax_names, function(name) length(grep(name, tax_names)))
cumsum_nb_desc <- cumsum(nb_desc)
nb_desc_tot <- sum(nb_desc)
new_tau <- matrix(0, nrow = nb_desc_tot, ncol = ncol(tau_matrix))
old_and_new <- lapply(old_tax_names, function(name) tax_names[grep(name, tax_names)])
names(old_and_new) <- old_tax_names
for (idx in seq_along(old_and_new)) {
old_taxa <- names(old_and_new)[[idx]]
if (idx == 1) {
old_idx <- 1
} else {
old_idx <- idx - 1
}
new_tau[cumsum_nb_desc[old_idx]:cumsum_nb_desc[idx], ] <- tau_matrix[old_taxa, ]
}
rownames(new_tau) <- tax_names
return(new_tau)
}