Began reimplementing properly

This commit is contained in:
Louis Lacoste 2024-01-11 18:13:26 +01:00
parent b5f8b6ea26
commit 6e80eff1a4
9 changed files with 209 additions and 2 deletions

139
R/anovaComparison.R Normal file
View file

@ -0,0 +1,139 @@
# Phylocomparison tools
library(phylolm)
library(phylotools)
library(phytools)
library(phylolimma)
library(ape)
# Plotting
library(ggplot2)
# Sourcing the utils
source("./R/utils.R")
# Fixing randomness for reproducibility
set.seed(1234)
# Parameters
nb_species <- 100
# Generating the phylo tree
tree <- rphylo(nb_species, birth = 0.1, death = 0)
# Group selections tries to have group of same size
plotTree(tree, node.numbers = TRUE)
# Here I chose two ancestors to split in two the tree
ancestors <- c(102, 104)
K <- length(ancestors) # The number of groups
# I assign the groups numbers
## Matching the phylogeny
phylo_matching_groups <- sapply(1:nb_species, function(tip) {
get_phylo_group(tip,
tree,
ancestors = ancestors
)
})
## Randomly
random_groups <- sample(1:K, nb_species, replace = TRUE)
## Randomly but with same size of groups
# sameSize_random_groups <- sample(1:K,
# nb_species,
# replace = TRUE,
# prob = table(phylo_matching_groups)
# )
# group_sizes <- table(phylo_matching_groups)
# Saving images of tree
plot_group_on_tree <- function(tree, groups) {
plot(tree, show.tip.label = FALSE, x.lim = 50)
tiplabels(bg = groups, pch = 21)
text(x = 10, y = 0, label = "This tree will be normalised.")
}
# Saving trees
png(file = "img/group_phylo_matching_tree.png")
plot_group_on_tree(tree, group = phylo_matching_groups)
dev.off()
png(file = "img/group_random_tree.png")
plot_group_on_tree(tree, group = random_groups)
dev.off()
# Normalising tree edge length
taille_tree <- diag(vcv(tree))[1]
tree$edge.length <- tree$edge.length / taille_tree
# Compute traits
# Parameters
base_values <- c(1, 2.1) # The base trait to add
sigma2_phylo <- 1
sigma2_measure <- 0
stoch_process <- "BM"
hypo_test <- "satterthwaite" #"vanilla" # "satterthwaite", "likelihood_ratio"
matching_phylo_traits <- compute_trait_values(
groups = phylo_matching_groups,
base_values = base_values, tree,
sigma2_phylo = sigma2_phylo, sigma2_measure = sigma2_measure,
stoch_process = stoch_process
)
random_traits <- compute_trait_values(
groups = random_groups,
base_values = base_values, tree,
sigma2_phylo = sigma2_phylo, sigma2_measure = sigma2_measure,
stoch_process = stoch_process
)
# For phylo matching
matching_anova <- lm(matching_phylo_traits ~ phylo_matching_groups)
# TODO Handling the stoch process and model for phylolm
model <- stoch_process
matching_phyloanova <- phylolm(matching_phylo_traits ~ phylo_matching_groups,
phy = tree,
model = model,
measurement_error = TRUE # To let phylolm know if there's measurement error
)
matching_phyloanova$REML = FALSE
if (hypo_test %in% c("vanilla", "satterthwaite")) {
phyloanova_F_stat <- compute_F_statistic(
r_squared = matching_phyloanova$r.squared,
df1 = K - 1,
df2 = nb_species - K
)
df1 <- K - 1
df2 <- nb_species - K
if (hypo_test == "satterthwaite") {
df2 <- phylolimma:::ddf_satterthwaite(matching_phyloanova, tree)
}
p_value <- 1 - pf(phyloanova_F_stat, df1 = df1, df2 = df2)
}
if (hypo_test == "likelihood_ratio") {
# How to obtain the loglikehood under H0 ?
# TODO Find the correct way to do this
# I assume that under H0 this is like saying everyone is from the same group
h0_matching_phyloanova <- phylolm(matching_phylo_traits ~ rep(1, nb_species),
phy = tree,
model = model,
measurement_error = (sigma2_measure != 0) # To let phylolm know if there's measurement error
)
# But this gives a LAPACK error, the system is not inversible.
lambda_ratio_stat <- -2(h0_matching_phyloanova$logLik - matching_phyloanova$logLik)
}
p_value

68
R/utils.R Normal file
View file

@ -0,0 +1,68 @@
#' Get the group number of a tip of the given tree
#'
#' @description
#' Returns the group number (based on the number of ancestors provided)
#' for the given tip
get_phylo_group <- function(tip, tree, ancestors = c(102, 104)) {
# Sanity checks
if (!all(ancestors %in% tree$edge[, 1])) {
stop("At least one ancestor is not in the given tree")
}
if (!paste0("t", tip) %in% tree$tip.label) {
stop("Provided tip is not in the tree.")
}
for (ancestor_id in seq_along(ancestors)) {
if (tip %in% getDescendants(tree, ancestors[ancestor_id])) {
# If the tip is a descendant of this ancestor return its index
return(ancestor_id)
}
}
# If we reach this line the tip is not part of the given ancestors
warning(paste0(
"The tip ", tip,
" is not a descendant of the provided ancestors"
))
}
#' Compute trait values for the given groups
#'
#' @description
#' For each value of group, the base value is matched in the order of
#' the vector (1st value for 1st group and etc).
#' Then the phylogenetic trait corresponding from the tree is computed.
#' And finally the error (gaussian centered) is computed.
#' The sum is returned
compute_trait_values <- function(
groups,
base_values,
tree,
sigma2_phylo,
sigma2_measure,
stoch_process = "BM") {
# Here we compute the base values for each of the species
trait <- rowSums(sapply(seq_along(base_values), function(i) base_values[i] * (groups == i)))
# The phylogenetic induced value
trait_phylo <- rTrait(1, tree,
stoch_process,
parameters = list(sigma2 = sigma2_phylo)
)
# The measure error
trait_error <- rnorm(length(groups), mean = 0, sd = sqrt(sigma2_measure))
return(trait + trait_phylo + trait_error)
}
#' Computes the F_statistic from the r_squared
#'
#' @description
#' Use the formula between r_squared and df1 (K-1), df2 (n - K) to return the
#' F statistic to test against the Fisher distribution.
compute_F_statistic <- function(r_squared, df1, df2) {
# df1 = k - 1, le nombre de prédicteur
# df2 = n - k, n le nombre d'observation
return(r_squared / (1 - r_squared) * df2 / df1)
}

Binary file not shown.

Before

Width:  |  Height:  |  Size: 14 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 14 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 12 KiB

BIN
img/group_random_tree.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 14 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 102 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 101 KiB

View file

@ -6,8 +6,8 @@ overall_p <- function(my_model) {
}
compute_F_statistic <- function(r_squared, df1, df2) {
# df1 = k, le nombre de prédicteur
# df2 = n - (k+1), n le nombre d'observation
# df1 = k - 1, le nombre de prédicteur
# df2 = n - k, n le nombre d'observation
return(r_squared / (1 - r_squared) * df2 / df1)
}