mirror of
https://github.com/Polarolouis/anova-phylogenetique-projet-msv.git
synced 2026-06-17 18:25:25 +02:00
Ajout des tests d'hypo fonctionnels
This commit is contained in:
parent
e62b560e0a
commit
326d2d3359
3 changed files with 106 additions and 10 deletions
|
|
@ -41,5 +41,5 @@ ggplot(multiple_BM) +
|
||||||
# For phylogenic tree
|
# For phylogenic tree
|
||||||
|
|
||||||
generate_phylo_tree <- function(n_tips, max_time) {
|
generate_phylo_tree <- function(n_tips, max_time) {
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
@ -5,7 +5,7 @@ library(ape)
|
||||||
set.seed(1234)
|
set.seed(1234)
|
||||||
|
|
||||||
N <- 100 # Number of different simulations
|
N <- 100 # Number of different simulations
|
||||||
|
n <- 100
|
||||||
# Preparing output lists
|
# Preparing output lists
|
||||||
# simulation_results <- data.frame(
|
# simulation_results <- data.frame(
|
||||||
# sim_id = numeric(),
|
# sim_id = numeric(),
|
||||||
|
|
@ -25,6 +25,7 @@ N <- 100 # Number of different simulations
|
||||||
tree <- rphylo(n, 0.1, 0)
|
tree <- rphylo(n, 0.1, 0)
|
||||||
|
|
||||||
## Groupes
|
## Groupes
|
||||||
|
K <- 3
|
||||||
get_group <- function(tip) {
|
get_group <- function(tip) {
|
||||||
if (tip %in% getDescendants(tree, 105)) {
|
if (tip %in% getDescendants(tree, 105)) {
|
||||||
return(2)
|
return(2)
|
||||||
|
|
@ -46,6 +47,17 @@ overall_p <- function(my_model) {
|
||||||
return(p)
|
return(p)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
compute_F_statistic <- function(r_squared, df1, df2) {
|
||||||
|
# df1 = k, le nombre de prédicteur
|
||||||
|
# df2 = n - (k+1), n le nombre d'observation
|
||||||
|
return(r_squared / (1 - r_squared) * df2 / df1)
|
||||||
|
}
|
||||||
|
|
||||||
|
phylo_p_value <- function(r_squared, df1, df2) {
|
||||||
|
F_stat <- compute_F_statistic(r_squared, df1, df2)
|
||||||
|
return(1 - pf(F_stat, K - 1, n - K))
|
||||||
|
}
|
||||||
|
|
||||||
compute_y <- function(mu_vect, groups) {
|
compute_y <- function(mu_vect, groups) {
|
||||||
rowSums(sapply(seq_along(mu_vect), function(i) mu_vect[i] * (groups == i)))
|
rowSums(sapply(seq_along(mu_vect), function(i) mu_vect[i] * (groups == i)))
|
||||||
}
|
}
|
||||||
|
|
@ -54,11 +66,11 @@ compute_y <- function(mu_vect, groups) {
|
||||||
# TODO : Refaire avec un Ornhstein-Uhlenbeck
|
# TODO : Refaire avec un Ornhstein-Uhlenbeck
|
||||||
|
|
||||||
# Code for one simulation
|
# Code for one simulation
|
||||||
simulate_ <- function(sim_id,
|
simulate_ANOVAs <- function(sim_id,
|
||||||
groups,
|
groups,
|
||||||
|
tree,
|
||||||
n = 100,
|
n = 100,
|
||||||
stoch_process = "BM",
|
stoch_process = "BM",
|
||||||
tree = tree,
|
|
||||||
mu_vect = c(2, -5, 2),
|
mu_vect = c(2, -5, 2),
|
||||||
risk_threshold = 0.05,
|
risk_threshold = 0.05,
|
||||||
sub_branches = 0,
|
sub_branches = 0,
|
||||||
|
|
@ -98,29 +110,44 @@ simulate_ <- function(sim_id,
|
||||||
## faire scénario H_0: mu egaux -> ANOVA se plante car dep entre les indivs
|
## faire scénario H_0: mu egaux -> ANOVA se plante car dep entre les indivs
|
||||||
## faire scenario H_1: mu differents -> supp ANOVA phylo se plante car pas de dep entre indiv
|
## faire scenario H_1: mu differents -> supp ANOVA phylo se plante car pas de dep entre indiv
|
||||||
|
|
||||||
methods <- as.factor(c("ANOVA", "ANOVA-Phylo"))
|
tested_methods <- as.factor(c("ANOVA", "ANOVA-Phylo"))
|
||||||
|
|
||||||
if(is_H0){
|
if(is_H0){
|
||||||
correct_hypothesis <- rep("H0", 2)
|
correct_hypothesis <- rep("H0", 2)
|
||||||
|
|
||||||
has_selected_correctly <- c(
|
has_selected_correctly <- c(
|
||||||
overall_p(summary(fit_ANOVA)) > risk_threshold,
|
overall_p(fit_ANOVA) > risk_threshold,
|
||||||
overall_p(summary(fitphy_ANOVA)) > risk_threshold
|
phylo_p_value(fitphy_ANOVA$r.squared, n - K, K - 1) > risk_threshold
|
||||||
)
|
)
|
||||||
|
selected_hypothesis <- sapply(1:2, function(id) {
|
||||||
|
if (has_selected_correctly[id]) {
|
||||||
|
return("H0")
|
||||||
|
} else {
|
||||||
|
return("H1")
|
||||||
|
}
|
||||||
|
})
|
||||||
} else {
|
} else {
|
||||||
correct_hypothesis <- rep("H1", 2)
|
correct_hypothesis <- rep("H1", 2)
|
||||||
|
|
||||||
# If the p_value is below the risk_threshold the H0 is rejected
|
# If the p_value is below the risk_threshold the H0 is rejected
|
||||||
has_selected_correctly <- c(
|
has_selected_correctly <- c(
|
||||||
overall_p(summary(fit_ANOVA)) <= risk_threshold,
|
overall_p(fit_ANOVA) <= risk_threshold,
|
||||||
overall_p(summary(fitphy_ANOVA)) <= risk_threshold
|
phylo_p_value(fitphy_ANOVA$r.squared, n - K, K - 1) <= risk_threshold
|
||||||
)
|
)
|
||||||
|
selected_hypothesis <- sapply(1:2, function(id) {
|
||||||
|
if (has_selected_correctly[id]) {
|
||||||
|
return("H1")
|
||||||
|
}else{
|
||||||
|
return("H0")
|
||||||
|
}
|
||||||
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
results <- data.frame(
|
results <- data.frame(
|
||||||
sim_id = rep(sim_id, 2),
|
sim_id = rep(sim_id, 2),
|
||||||
methods = methods,
|
tested_methods = tested_methods,
|
||||||
correct_hypothesis = correct_hypothesis,
|
correct_hypothesis = correct_hypothesis,
|
||||||
|
selected_hypothesis = selected_hypothesis,
|
||||||
has_selected_correctly = has_selected_correctly
|
has_selected_correctly = has_selected_correctly
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
|
||||||
69
sources/fisher.R
Normal file
69
sources/fisher.R
Normal file
|
|
@ -0,0 +1,69 @@
|
||||||
|
library(ape)
|
||||||
|
library(phylolm)
|
||||||
|
library(phytools)
|
||||||
|
|
||||||
|
set.seed(4568)
|
||||||
|
|
||||||
|
## Tree
|
||||||
|
n <- 30
|
||||||
|
tree <- rphylo(n, birth = 0.5, death = 0)
|
||||||
|
|
||||||
|
plot(tree, show.tip.label = FALSE, no.margin = TRUE)
|
||||||
|
nodelabels()
|
||||||
|
|
||||||
|
# groups
|
||||||
|
K <- 3
|
||||||
|
get_group <- function(tip) {
|
||||||
|
if (tip %in% getDescendants(tree, 34)) {
|
||||||
|
return(2)
|
||||||
|
}
|
||||||
|
if (tip %in% getDescendants(tree, 38)) {
|
||||||
|
return(3)
|
||||||
|
}
|
||||||
|
return(1)
|
||||||
|
}
|
||||||
|
|
||||||
|
group <- as.factor(sapply(1:n, get_group))
|
||||||
|
|
||||||
|
plot(tree, show.tip.label = FALSE)
|
||||||
|
tiplabels(bg = group, pch = 21)
|
||||||
|
|
||||||
|
# Trait under H0
|
||||||
|
y <- 2.0 + rTrait(n = 1, phy = tree, model = "BM",
|
||||||
|
parameters = list(acestral.state = 0, sigma2 = 1))
|
||||||
|
|
||||||
|
|
||||||
|
# phylolm fit
|
||||||
|
fit <- phylolm(y ~ group, phy = tree)
|
||||||
|
summary(fit)
|
||||||
|
|
||||||
|
# Fisher (naive version : uses the inverse of V, while phylolm never computes it.
|
||||||
|
V <- vcv(tree, model = "BM")
|
||||||
|
Vinv <- solve(V)
|
||||||
|
F_stat_naive <- t(fit$fitted.values - mean(y)) %*% Vinv %*% (fit$fitted.values - mean(y))/(K-1)
|
||||||
|
F_stat_naive <- F_stat_naive / (t(y - fit$fitted.values) %*% Vinv %*% (y - fit$fitted.values)/(n-K))
|
||||||
|
F_stat_naive
|
||||||
|
|
||||||
|
# Fisher (using r squared)
|
||||||
|
F_stat <- fit$r.squared / (1 - fit$r.squared) * (n - K) / (K - 1)
|
||||||
|
F_stat
|
||||||
|
|
||||||
|
# p-value
|
||||||
|
p_value <- 1 - pf(F_stat, K - 1, n - K)
|
||||||
|
p_value
|
||||||
|
|
||||||
|
## Check with star tree: phylolm and lm should give the same result
|
||||||
|
tree <- stree(n)
|
||||||
|
tree$edge.length <- rep(1.0, nrow(tree$edge))
|
||||||
|
plot(tree)
|
||||||
|
|
||||||
|
# phylo
|
||||||
|
fit <- phylolm(y ~ group, phy = tree)
|
||||||
|
F_stat <- fit$r.squared / (1 - fit$r.squared) * (n - K) / (K - 1)
|
||||||
|
1 - pf(F_stat, K - 1, n - K)
|
||||||
|
|
||||||
|
# non phylo
|
||||||
|
fit <- lm(y ~ group)
|
||||||
|
aa <- anova(fit)
|
||||||
|
aa$`F value`
|
||||||
|
aa$`Pr(>F)`
|
||||||
Loading…
Add table
Reference in a new issue