Ajout des tests d'hypo fonctionnels

2026-06-17 10:15:25 +02:00 · 2024-01-04 10:35:14 +01:00 · 2024-01-04 10:35:14 +01:00 · 326d2d3359
commit 326d2d3359
parent e62b560e0a
3 changed files with 106 additions and 10 deletions
--- a/simulations/mouvement_brownien.R
+++ b/simulations/mouvement_brownien.R
@ -41,5 +41,5 @@ ggplot(multiple_BM) +
 # For phylogenic tree

 generate_phylo_tree <- function(n_tips, max_time) {
-
+    
 }
--- a/simulations/test-anova.phylo.R
+++ b/simulations/test-anova.phylo.R
@ -5,7 +5,7 @@ library(ape)
 set.seed(1234)

 N <- 100 # Number of different simulations
-
+n <- 100
 # Preparing output lists
 # simulation_results <- data.frame(
 #     sim_id = numeric(),
@ -25,6 +25,7 @@ N <- 100 # Number of different simulations
 tree <- rphylo(n, 0.1, 0)

 ## Groupes
+K <- 3
 get_group <- function(tip) {
    if (tip %in% getDescendants(tree, 105)) {
        return(2)
@ -46,6 +47,17 @@ overall_p <- function(my_model) {
    return(p)
 }

+compute_F_statistic <- function(r_squared, df1, df2) {
+    # df1 = k, le nombre de prédicteur
+    # df2 = n - (k+1), n le nombre d'observation
+    return(r_squared / (1 - r_squared) * df2 / df1)
+}
+
+phylo_p_value <- function(r_squared, df1, df2) {
+    F_stat <- compute_F_statistic(r_squared, df1, df2)
+    return(1 - pf(F_stat, K - 1, n - K))
+}
+
 compute_y <- function(mu_vect, groups) {
    rowSums(sapply(seq_along(mu_vect), function(i) mu_vect[i] * (groups == i)))
 }
@ -54,11 +66,11 @@ compute_y <- function(mu_vect, groups) {
 # TODO : Refaire avec un Ornhstein-Uhlenbeck

 # Code for one simulation
-simulate_ <- function(sim_id,
+simulate_ANOVAs <- function(sim_id,
    groups,
+    tree,
    n = 100,
    stoch_process = "BM",
-    tree = tree,
    mu_vect = c(2, -5, 2),
    risk_threshold = 0.05,
    sub_branches = 0,
@ -98,29 +110,44 @@ simulate_ <- function(sim_id,
    ## faire scénario H_0: mu egaux -> ANOVA se plante car dep entre les indivs
    ## faire scenario H_1: mu differents -> supp ANOVA phylo se plante car pas de dep entre indiv

-    methods <- as.factor(c("ANOVA", "ANOVA-Phylo"))
+    tested_methods <- as.factor(c("ANOVA", "ANOVA-Phylo"))
    
    if(is_H0){
        correct_hypothesis <- rep("H0", 2)
        
        has_selected_correctly <- c(
-            overall_p(summary(fit_ANOVA)) > risk_threshold,
-            overall_p(summary(fitphy_ANOVA)) > risk_threshold
+            overall_p(fit_ANOVA) > risk_threshold,
+            phylo_p_value(fitphy_ANOVA$r.squared, n - K, K - 1) > risk_threshold
        )
+        selected_hypothesis <- sapply(1:2, function(id) {
+            if (has_selected_correctly[id]) {
+                return("H0")
+            } else {
+                return("H1")
+            }
+        })
    } else {
        correct_hypothesis <- rep("H1", 2)

        # If the p_value is below the risk_threshold the H0 is rejected
        has_selected_correctly <- c(
-            overall_p(summary(fit_ANOVA)) <= risk_threshold,
-            overall_p(summary(fitphy_ANOVA)) <= risk_threshold
+            overall_p(fit_ANOVA) <= risk_threshold,
+            phylo_p_value(fitphy_ANOVA$r.squared, n - K, K - 1) <= risk_threshold
        )
+        selected_hypothesis <- sapply(1:2, function(id) {
+            if (has_selected_correctly[id]) {
+                return("H1")
+            }else{
+                return("H0")
+            }
+        })
    }

    results <- data.frame(
        sim_id = rep(sim_id, 2),
-        methods = methods,
+        tested_methods = tested_methods,
        correct_hypothesis = correct_hypothesis,
+        selected_hypothesis = selected_hypothesis,
        has_selected_correctly = has_selected_correctly
    )

--- a/sources/fisher.R
+++ b/sources/fisher.R
@ -0,0 +1,69 @@
+library(ape)
+library(phylolm)
+library(phytools)
+
+set.seed(4568)
+
+## Tree
+n <- 30
+tree <- rphylo(n, birth = 0.5, death = 0)
+
+plot(tree, show.tip.label = FALSE, no.margin = TRUE)
+nodelabels()
+
+# groups
+K <- 3
+get_group <- function(tip) {
+  if (tip %in% getDescendants(tree, 34)) {
+    return(2)
+  }
+  if (tip %in% getDescendants(tree, 38)) {
+    return(3)
+  }
+  return(1)
+}
+
+group <- as.factor(sapply(1:n, get_group))
+
+plot(tree, show.tip.label = FALSE)
+tiplabels(bg = group, pch = 21)
+
+# Trait under H0
+y <- 2.0 + rTrait(n = 1, phy = tree, model = "BM",
+                  parameters = list(acestral.state = 0, sigma2 = 1))
+
+
+# phylolm fit
+fit <- phylolm(y ~ group, phy = tree)
+summary(fit)
+
+# Fisher (naive version : uses the inverse of V, while phylolm never computes it.
+V <- vcv(tree, model = "BM")
+Vinv <- solve(V)
+F_stat_naive <- t(fit$fitted.values - mean(y)) %*% Vinv %*% (fit$fitted.values - mean(y))/(K-1)
+F_stat_naive <- F_stat_naive / (t(y - fit$fitted.values) %*% Vinv %*% (y - fit$fitted.values)/(n-K))
+F_stat_naive
+
+# Fisher (using r squared)
+F_stat <- fit$r.squared / (1 - fit$r.squared) * (n - K) / (K - 1)
+F_stat
+
+# p-value
+p_value <- 1 - pf(F_stat, K - 1, n - K)
+p_value
+
+## Check with star tree: phylolm and lm should give the same result
+tree <- stree(n)
+tree$edge.length <- rep(1.0, nrow(tree$edge))
+plot(tree)
+
+# phylo
+fit <- phylolm(y ~ group, phy = tree)
+F_stat <- fit$r.squared / (1 - fit$r.squared) * (n - K) / (K - 1)
+1 - pf(F_stat, K - 1, n - K)
+
+# non phylo
+fit <- lm(y ~ group)
+aa <- anova(fit)
+aa$`F value`
+aa$`Pr(>F)`