mirror of
https://github.com/Polarolouis/anova-phylogenetique-projet-msv.git
synced 2026-06-17 10:15:25 +02:00
Began reimplementing properly
This commit is contained in:
parent
b5f8b6ea26
commit
6e80eff1a4
9 changed files with 209 additions and 2 deletions
139
R/anovaComparison.R
Normal file
139
R/anovaComparison.R
Normal file
|
|
@ -0,0 +1,139 @@
|
|||
# Phylocomparison tools
|
||||
library(phylolm)
|
||||
library(phylotools)
|
||||
library(phytools)
|
||||
library(phylolimma)
|
||||
library(ape)
|
||||
|
||||
# Plotting
|
||||
library(ggplot2)
|
||||
|
||||
# Sourcing the utils
|
||||
source("./R/utils.R")
|
||||
|
||||
# Fixing randomness for reproducibility
|
||||
set.seed(1234)
|
||||
|
||||
# Parameters
|
||||
nb_species <- 100
|
||||
|
||||
# Generating the phylo tree
|
||||
tree <- rphylo(nb_species, birth = 0.1, death = 0)
|
||||
|
||||
# Group selections tries to have group of same size
|
||||
plotTree(tree, node.numbers = TRUE)
|
||||
|
||||
# Here I chose two ancestors to split in two the tree
|
||||
ancestors <- c(102, 104)
|
||||
K <- length(ancestors) # The number of groups
|
||||
|
||||
# I assign the groups numbers
|
||||
## Matching the phylogeny
|
||||
phylo_matching_groups <- sapply(1:nb_species, function(tip) {
|
||||
get_phylo_group(tip,
|
||||
tree,
|
||||
ancestors = ancestors
|
||||
)
|
||||
})
|
||||
|
||||
## Randomly
|
||||
random_groups <- sample(1:K, nb_species, replace = TRUE)
|
||||
|
||||
## Randomly but with same size of groups
|
||||
# sameSize_random_groups <- sample(1:K,
|
||||
# nb_species,
|
||||
# replace = TRUE,
|
||||
# prob = table(phylo_matching_groups)
|
||||
# )
|
||||
# group_sizes <- table(phylo_matching_groups)
|
||||
|
||||
|
||||
# Saving images of tree
|
||||
plot_group_on_tree <- function(tree, groups) {
|
||||
plot(tree, show.tip.label = FALSE, x.lim = 50)
|
||||
tiplabels(bg = groups, pch = 21)
|
||||
text(x = 10, y = 0, label = "This tree will be normalised.")
|
||||
}
|
||||
|
||||
# Saving trees
|
||||
png(file = "img/group_phylo_matching_tree.png")
|
||||
plot_group_on_tree(tree, group = phylo_matching_groups)
|
||||
dev.off()
|
||||
|
||||
png(file = "img/group_random_tree.png")
|
||||
plot_group_on_tree(tree, group = random_groups)
|
||||
dev.off()
|
||||
|
||||
# Normalising tree edge length
|
||||
taille_tree <- diag(vcv(tree))[1]
|
||||
tree$edge.length <- tree$edge.length / taille_tree
|
||||
|
||||
# Compute traits
|
||||
# Parameters
|
||||
base_values <- c(1, 2.1) # The base trait to add
|
||||
sigma2_phylo <- 1
|
||||
sigma2_measure <- 0
|
||||
stoch_process <- "BM"
|
||||
hypo_test <- "satterthwaite" #"vanilla" # "satterthwaite", "likelihood_ratio"
|
||||
|
||||
|
||||
matching_phylo_traits <- compute_trait_values(
|
||||
groups = phylo_matching_groups,
|
||||
base_values = base_values, tree,
|
||||
sigma2_phylo = sigma2_phylo, sigma2_measure = sigma2_measure,
|
||||
stoch_process = stoch_process
|
||||
)
|
||||
|
||||
random_traits <- compute_trait_values(
|
||||
groups = random_groups,
|
||||
base_values = base_values, tree,
|
||||
sigma2_phylo = sigma2_phylo, sigma2_measure = sigma2_measure,
|
||||
stoch_process = stoch_process
|
||||
)
|
||||
|
||||
# For phylo matching
|
||||
matching_anova <- lm(matching_phylo_traits ~ phylo_matching_groups)
|
||||
|
||||
# TODO Handling the stoch process and model for phylolm
|
||||
model <- stoch_process
|
||||
|
||||
matching_phyloanova <- phylolm(matching_phylo_traits ~ phylo_matching_groups,
|
||||
phy = tree,
|
||||
model = model,
|
||||
measurement_error = TRUE # To let phylolm know if there's measurement error
|
||||
)
|
||||
|
||||
matching_phyloanova$REML = FALSE
|
||||
|
||||
if (hypo_test %in% c("vanilla", "satterthwaite")) {
|
||||
phyloanova_F_stat <- compute_F_statistic(
|
||||
r_squared = matching_phyloanova$r.squared,
|
||||
df1 = K - 1,
|
||||
df2 = nb_species - K
|
||||
)
|
||||
|
||||
df1 <- K - 1
|
||||
df2 <- nb_species - K
|
||||
|
||||
if (hypo_test == "satterthwaite") {
|
||||
df2 <- phylolimma:::ddf_satterthwaite(matching_phyloanova, tree)
|
||||
}
|
||||
|
||||
p_value <- 1 - pf(phyloanova_F_stat, df1 = df1, df2 = df2)
|
||||
}
|
||||
|
||||
if (hypo_test == "likelihood_ratio") {
|
||||
# How to obtain the loglikehood under H0 ?
|
||||
# TODO Find the correct way to do this
|
||||
# I assume that under H0 this is like saying everyone is from the same group
|
||||
h0_matching_phyloanova <- phylolm(matching_phylo_traits ~ rep(1, nb_species),
|
||||
phy = tree,
|
||||
model = model,
|
||||
measurement_error = (sigma2_measure != 0) # To let phylolm know if there's measurement error
|
||||
)
|
||||
# But this gives a LAPACK error, the system is not inversible.
|
||||
|
||||
lambda_ratio_stat <- -2(h0_matching_phyloanova$logLik - matching_phyloanova$logLik)
|
||||
}
|
||||
|
||||
p_value
|
||||
68
R/utils.R
Normal file
68
R/utils.R
Normal file
|
|
@ -0,0 +1,68 @@
|
|||
#' Get the group number of a tip of the given tree
|
||||
#'
|
||||
#' @description
|
||||
#' Returns the group number (based on the number of ancestors provided)
|
||||
#' for the given tip
|
||||
get_phylo_group <- function(tip, tree, ancestors = c(102, 104)) {
|
||||
# Sanity checks
|
||||
if (!all(ancestors %in% tree$edge[, 1])) {
|
||||
stop("At least one ancestor is not in the given tree")
|
||||
}
|
||||
if (!paste0("t", tip) %in% tree$tip.label) {
|
||||
stop("Provided tip is not in the tree.")
|
||||
}
|
||||
|
||||
for (ancestor_id in seq_along(ancestors)) {
|
||||
if (tip %in% getDescendants(tree, ancestors[ancestor_id])) {
|
||||
# If the tip is a descendant of this ancestor return its index
|
||||
return(ancestor_id)
|
||||
}
|
||||
}
|
||||
|
||||
# If we reach this line the tip is not part of the given ancestors
|
||||
warning(paste0(
|
||||
"The tip ", tip,
|
||||
" is not a descendant of the provided ancestors"
|
||||
))
|
||||
}
|
||||
|
||||
#' Compute trait values for the given groups
|
||||
#'
|
||||
#' @description
|
||||
#' For each value of group, the base value is matched in the order of
|
||||
#' the vector (1st value for 1st group and etc).
|
||||
#' Then the phylogenetic trait corresponding from the tree is computed.
|
||||
#' And finally the error (gaussian centered) is computed.
|
||||
#' The sum is returned
|
||||
compute_trait_values <- function(
|
||||
groups,
|
||||
base_values,
|
||||
tree,
|
||||
sigma2_phylo,
|
||||
sigma2_measure,
|
||||
stoch_process = "BM") {
|
||||
# Here we compute the base values for each of the species
|
||||
trait <- rowSums(sapply(seq_along(base_values), function(i) base_values[i] * (groups == i)))
|
||||
|
||||
# The phylogenetic induced value
|
||||
trait_phylo <- rTrait(1, tree,
|
||||
stoch_process,
|
||||
parameters = list(sigma2 = sigma2_phylo)
|
||||
)
|
||||
|
||||
# The measure error
|
||||
trait_error <- rnorm(length(groups), mean = 0, sd = sqrt(sigma2_measure))
|
||||
|
||||
return(trait + trait_phylo + trait_error)
|
||||
}
|
||||
|
||||
#' Computes the F_statistic from the r_squared
|
||||
#'
|
||||
#' @description
|
||||
#' Use the formula between r_squared and df1 (K-1), df2 (n - K) to return the
|
||||
#' F statistic to test against the Fisher distribution.
|
||||
compute_F_statistic <- function(r_squared, df1, df2) {
|
||||
# df1 = k - 1, le nombre de prédicteur
|
||||
# df2 = n - k, n le nombre d'observation
|
||||
return(r_squared / (1 - r_squared) * df2 / df1)
|
||||
}
|
||||
Binary file not shown.
|
Before Width: | Height: | Size: 14 KiB |
BIN
img/group_phylo_matching_tree.png
Normal file
BIN
img/group_phylo_matching_tree.png
Normal file
Binary file not shown.
|
After Width: | Height: | Size: 14 KiB |
Binary file not shown.
|
Before Width: | Height: | Size: 12 KiB |
BIN
img/group_random_tree.png
Normal file
BIN
img/group_random_tree.png
Normal file
Binary file not shown.
|
After Width: | Height: | Size: 14 KiB |
Binary file not shown.
|
Before Width: | Height: | Size: 102 KiB |
Binary file not shown.
|
Before Width: | Height: | Size: 101 KiB |
|
|
@ -6,8 +6,8 @@ overall_p <- function(my_model) {
|
|||
}
|
||||
|
||||
compute_F_statistic <- function(r_squared, df1, df2) {
|
||||
# df1 = k, le nombre de prédicteur
|
||||
# df2 = n - (k+1), n le nombre d'observation
|
||||
# df1 = k - 1, le nombre de prédicteur
|
||||
# df2 = n - k, n le nombre d'observation
|
||||
return(r_squared / (1 - r_squared) * df2 / df1)
|
||||
}
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue