diff --git a/R/anovaComparison.R b/R/anovaComparison.R new file mode 100644 index 0000000..14fc05c --- /dev/null +++ b/R/anovaComparison.R @@ -0,0 +1,139 @@ +# Phylocomparison tools +library(phylolm) +library(phylotools) +library(phytools) +library(phylolimma) +library(ape) + +# Plotting +library(ggplot2) + +# Sourcing the utils +source("./R/utils.R") + +# Fixing randomness for reproducibility +set.seed(1234) + +# Parameters +nb_species <- 100 + +# Generating the phylo tree +tree <- rphylo(nb_species, birth = 0.1, death = 0) + +# Group selections tries to have group of same size +plotTree(tree, node.numbers = TRUE) + +# Here I chose two ancestors to split in two the tree +ancestors <- c(102, 104) +K <- length(ancestors) # The number of groups + +# I assign the groups numbers +## Matching the phylogeny +phylo_matching_groups <- sapply(1:nb_species, function(tip) { + get_phylo_group(tip, + tree, + ancestors = ancestors + ) +}) + +## Randomly +random_groups <- sample(1:K, nb_species, replace = TRUE) + +## Randomly but with same size of groups +# sameSize_random_groups <- sample(1:K, +# nb_species, +# replace = TRUE, +# prob = table(phylo_matching_groups) +# ) +# group_sizes <- table(phylo_matching_groups) + + +# Saving images of tree +plot_group_on_tree <- function(tree, groups) { + plot(tree, show.tip.label = FALSE, x.lim = 50) + tiplabels(bg = groups, pch = 21) + text(x = 10, y = 0, label = "This tree will be normalised.") +} + +# Saving trees +png(file = "img/group_phylo_matching_tree.png") +plot_group_on_tree(tree, group = phylo_matching_groups) +dev.off() + +png(file = "img/group_random_tree.png") +plot_group_on_tree(tree, group = random_groups) +dev.off() + +# Normalising tree edge length +taille_tree <- diag(vcv(tree))[1] +tree$edge.length <- tree$edge.length / taille_tree + +# Compute traits +# Parameters +base_values <- c(1, 2.1) # The base trait to add +sigma2_phylo <- 1 +sigma2_measure <- 0 +stoch_process <- "BM" +hypo_test <- "satterthwaite" #"vanilla" # "satterthwaite", "likelihood_ratio" + + +matching_phylo_traits <- compute_trait_values( + groups = phylo_matching_groups, + base_values = base_values, tree, + sigma2_phylo = sigma2_phylo, sigma2_measure = sigma2_measure, + stoch_process = stoch_process +) + +random_traits <- compute_trait_values( + groups = random_groups, + base_values = base_values, tree, + sigma2_phylo = sigma2_phylo, sigma2_measure = sigma2_measure, + stoch_process = stoch_process +) + +# For phylo matching +matching_anova <- lm(matching_phylo_traits ~ phylo_matching_groups) + +# TODO Handling the stoch process and model for phylolm +model <- stoch_process + +matching_phyloanova <- phylolm(matching_phylo_traits ~ phylo_matching_groups, + phy = tree, + model = model, + measurement_error = TRUE # To let phylolm know if there's measurement error +) + +matching_phyloanova$REML = FALSE + +if (hypo_test %in% c("vanilla", "satterthwaite")) { + phyloanova_F_stat <- compute_F_statistic( + r_squared = matching_phyloanova$r.squared, + df1 = K - 1, + df2 = nb_species - K + ) + + df1 <- K - 1 + df2 <- nb_species - K + + if (hypo_test == "satterthwaite") { + df2 <- phylolimma:::ddf_satterthwaite(matching_phyloanova, tree) + } + + p_value <- 1 - pf(phyloanova_F_stat, df1 = df1, df2 = df2) +} + +if (hypo_test == "likelihood_ratio") { + # How to obtain the loglikehood under H0 ? + # TODO Find the correct way to do this + # I assume that under H0 this is like saying everyone is from the same group + h0_matching_phyloanova <- phylolm(matching_phylo_traits ~ rep(1, nb_species), + phy = tree, + model = model, + measurement_error = (sigma2_measure != 0) # To let phylolm know if there's measurement error + ) + # But this gives a LAPACK error, the system is not inversible. + + lambda_ratio_stat <- -2(h0_matching_phyloanova$logLik - matching_phyloanova$logLik) +} + +p_value \ No newline at end of file diff --git a/R/utils.R b/R/utils.R new file mode 100644 index 0000000..5621122 --- /dev/null +++ b/R/utils.R @@ -0,0 +1,68 @@ +#' Get the group number of a tip of the given tree +#' +#' @description +#' Returns the group number (based on the number of ancestors provided) +#' for the given tip +get_phylo_group <- function(tip, tree, ancestors = c(102, 104)) { + # Sanity checks + if (!all(ancestors %in% tree$edge[, 1])) { + stop("At least one ancestor is not in the given tree") + } + if (!paste0("t", tip) %in% tree$tip.label) { + stop("Provided tip is not in the tree.") + } + + for (ancestor_id in seq_along(ancestors)) { + if (tip %in% getDescendants(tree, ancestors[ancestor_id])) { + # If the tip is a descendant of this ancestor return its index + return(ancestor_id) + } + } + + # If we reach this line the tip is not part of the given ancestors + warning(paste0( + "The tip ", tip, + " is not a descendant of the provided ancestors" + )) +} + +#' Compute trait values for the given groups +#' +#' @description +#' For each value of group, the base value is matched in the order of +#' the vector (1st value for 1st group and etc). +#' Then the phylogenetic trait corresponding from the tree is computed. +#' And finally the error (gaussian centered) is computed. +#' The sum is returned +compute_trait_values <- function( + groups, + base_values, + tree, + sigma2_phylo, + sigma2_measure, + stoch_process = "BM") { + # Here we compute the base values for each of the species + trait <- rowSums(sapply(seq_along(base_values), function(i) base_values[i] * (groups == i))) + + # The phylogenetic induced value + trait_phylo <- rTrait(1, tree, + stoch_process, + parameters = list(sigma2 = sigma2_phylo) + ) + + # The measure error + trait_error <- rnorm(length(groups), mean = 0, sd = sqrt(sigma2_measure)) + + return(trait + trait_phylo + trait_error) +} + +#' Computes the F_statistic from the r_squared +#' +#' @description +#' Use the formula between r_squared and df1 (K-1), df2 (n - K) to return the +#' F statistic to test against the Fisher distribution. +compute_F_statistic <- function(r_squared, df1, df2) { + # df1 = k - 1, le nombre de prédicteur + # df2 = n - k, n le nombre d'observation + return(r_squared / (1 - r_squared) * df2 / df1) +} \ No newline at end of file diff --git a/img/group_nonphylo_tree.png b/img/group_nonphylo_tree.png deleted file mode 100644 index 0f13357..0000000 Binary files a/img/group_nonphylo_tree.png and /dev/null differ diff --git a/img/group_phylo_matching_tree.png b/img/group_phylo_matching_tree.png new file mode 100644 index 0000000..130f53b Binary files /dev/null and b/img/group_phylo_matching_tree.png differ diff --git a/img/group_phylo_tree.png b/img/group_phylo_tree.png deleted file mode 100644 index 3f3fdf4..0000000 Binary files a/img/group_phylo_tree.png and /dev/null differ diff --git a/img/group_random_tree.png b/img/group_random_tree.png new file mode 100644 index 0000000..e57f66a Binary files /dev/null and b/img/group_random_tree.png differ diff --git a/img/simulation_power_pure_BM.png b/img/simulation_power_pure_BM.png deleted file mode 100644 index 4c6d298..0000000 Binary files a/img/simulation_power_pure_BM.png and /dev/null differ diff --git a/img/simulation_power_pure_both_error.png b/img/simulation_power_pure_both_error.png deleted file mode 100644 index 5c71802..0000000 Binary files a/img/simulation_power_pure_both_error.png and /dev/null differ diff --git a/simulations/functions-anova.R b/simulations/functions-anova.R index bc60dfc..c63f1b2 100644 --- a/simulations/functions-anova.R +++ b/simulations/functions-anova.R @@ -6,8 +6,8 @@ overall_p <- function(my_model) { } compute_F_statistic <- function(r_squared, df1, df2) { - # df1 = k, le nombre de prédicteur - # df2 = n - (k+1), n le nombre d'observation + # df1 = k - 1, le nombre de prédicteur + # df2 = n - k, n le nombre d'observation return(r_squared / (1 - r_squared) * df2 / df1) }