diff --git a/utils.R b/utils.R index 3bc52e8..83e24ae 100644 --- a/utils.R +++ b/utils.R @@ -1,6 +1,8 @@ library(knitr) library(kableExtra) library(dplyr) +library(tidyr) +library(rlang) library(stringr) ellipse_table <- function(df, nr = 5, nc = 5, format = "latex") { @@ -59,7 +61,7 @@ ellipse_table <- function(df, nr = 5, nc = 5, format = "latex") { return(sub_df) } -collapse_otu_at_taxo <- function(phylo_data, NArm = TRUE) { +collapse_otu_at_taxo <- function(phylo_data, NArm = TRUE, renameOTUs = TRUE) { tax_ranks <- colnames(tax_table(phylo_data)) tax_df <- tax_table(phylo_data) otu_tables_by_rank <- lapply(tax_ranks, function(rank) { @@ -71,16 +73,69 @@ collapse_otu_at_taxo <- function(phylo_data, NArm = TRUE) { otu <- t(otu) } otu_matrix <- as.matrix(otu) - id_in_taxo <- match(rownames(otu_matrix), rownames(tax_df)) - taxo_names <- tax_df[id_in_taxo, seq(which(tax_ranks == rank))] %>% - as.data.frame() %>% - mutate_if(.predicate = is_character, .funs = partial(str_replace_all, pattern = "-", replacement = ".")) %>% - unite(col = "tax_full", sep = "-") %>% - unique() - rownames(otu_matrix) <- as.vector(taxo_names[, 1]) - + if (renameOTUs) { + id_in_taxo <- match(rownames(otu_matrix), rownames(tax_df)) + taxo_names <- tax_df[id_in_taxo, seq(which(tax_ranks == rank))] %>% + as.data.frame() %>% + mutate_if(.predicate = rlang::is_character, .funs = purrr::partial(str_replace_all, pattern = "-", replacement = ".")) %>% + tidyr::unite(col = "tax_full", sep = ";_;") %>% + unique() + rownames(otu_matrix) <- as.vector(taxo_names[, 1]) + } return(otu_matrix) }) names(otu_tables_by_rank) <- tax_ranks return(otu_tables_by_rank) } + + +propagate_taus <- function( + tau_matrix, + physeq, + taxrank = phyloseq::rank_names(physeq)[1], + NArm = TRUE, + bad_empty = c(NA, "", " ", "\t") +) { + if (is.null(phyloseq::access(physeq, "tax_table"))) { + stop("The tax_glom() function requires that physeq contain a taxonomyTable") + } + if (!taxrank[1] %in% phyloseq::rank_names(physeq)) { + stop("Bad taxrank argument. Must be among the values of rank_names(physeq)") + } + CN <- which(phyloseq::rank_names(physeq) %in% taxrank[1]) + tax <- as(phyloseq::access(physeq, "tax_table"), "matrix")[, CN] + if (NArm) { + keep_species <- names(tax)[!(tax %in% bad_empty)] + physeq <- phyloseq::prune_taxa(keep_species, physeq) + } + tax <- as(phyloseq::access(physeq, "tax_table"), "matrix")[, 1:CN, + drop = FALSE + ] + tax <- apply(tax, 1, function(i) { + paste(i, sep = ";_;", collapse = ";_;") + }) + tax_names <- unique(unname(tax)) + spCliques <- tapply(names(tax), factor(tax), list) + + old_tax_names <- rownames(tau_matrix) + + nb_desc <- sapply(old_tax_names, function(name) length(grep(name, tax_names))) + cumsum_nb_desc <- cumsum(nb_desc) + + nb_desc_tot <- sum(nb_desc) + new_tau <- matrix(0, nrow = nb_desc_tot, ncol = ncol(tau_matrix)) + + old_and_new <- lapply(old_tax_names, function(name) tax_names[grep(name, tax_names)]) + names(old_and_new) <- old_tax_names + for (idx in seq_along(old_and_new)) { + old_taxa <- names(old_and_new)[[idx]] + if (idx == 1) { + old_idx <- 1 + } else { + old_idx <- idx - 1 + } + new_tau[cumsum_nb_desc[old_idx]:cumsum_nb_desc[idx], ] <- tau_matrix[old_taxa, ] + } + rownames(new_tau) <- tax_names + return(new_tau) +}