🖍️Refactoring the code

✨Began the implementation of the inference and simulation SEPARATED methods
2026-06-17 10:15:25 +02:00 · 2024-01-23 23:02:19 +01:00 · 2024-01-23 23:02:19 +01:00 · 70bdebfeb9
commit 70bdebfeb9
parent 87c2d77034
1 changed files with 87 additions and 55 deletions
--- a/R/anovaComparison.R
+++ b/R/anovaComparison.R
@ -138,6 +138,90 @@ phyloanova_anova_pvalues <- function(
        phylo_df2 = df2
    )
 }
 # TODO Séparer les deux fonctions de simulation et d'inférence
 #' Infere an ANOVA and a phyloanova
 #'
 #' @param y the vector of traits for which to fit the models
 #' @param groups the groups to which fit the models
 #' @param tree the phylo tree to use
 #' @param stoch_process the stochastic process to use for the phylolm
 #'
 infere_anova_phyloanova <- function(y, groups, tree, stoch_process = "BM") {
    #  The fits
    fit_anova <- lm(y ~ groups)
    fit_phylolm <- phylolm(y ~ groups, phy = tree, model = stoch_process)
    return(list(anova = fit_anova, phyloanova = fit_phylolm))
 }
 #' Return pvalues for the anova and the phyloanova
 #'
 #' @param fit_anova
 #' @param fit_phylolm
 pvalues_from_fits <- function(
    fit_anova,
    fit_phylolm, tree,
    tested_method = c("vanilla", "satterthwaite", "lrt")) {
    #  For sanity test
    match.arg(tested_method)
    invalid_value <- function(value) {
        return(is.nan(value) ||
            is.null(value) ||
            is.infinite(value) ||
            value == 0)
    }
    #  The default value for the df2
    df1 <- K - 1
    df2 <- nb_species - K
    anova_F_stat <- summary(fit_anova)$fstatistic[1]
    anova_df1 <- summary(fit_anova)$fstatistic[2]
    anova_df2 <- summary(fit_anova)$fstatistic[3]
    pvalue_anova <- pvalue_F_test(anova_F_stat,
        df1 = anova_df1, df2 = anova_df2
    )
    switch(tested_method,
        "vanilla" = {
            pvalue_phylolm <- compute_F_statistic(
                r_squared = fit_phylolm$r.squared,
                df1 = df1,
                df2 = df2
            )
        },
        "satterthwaite" = {
            df2 <- ddf_satterthwaite_sum(fit_phylolm = fit_phylolm, phylo = tree, REML = REML)
            pvalue_phylolm <- compute_F_statistic(
                r_squared = fit_phylolm$r.squared,
                df1 = df1,
                df2 = df2
            )
        },
        "lrt" = {
            h0_phylolm <- phylolm(fit_phylolm$y ~ 1,
                phy = phy,
                model = fit_phylolm$model,
                measurement_error = invalid_value(fit_phylolm$sigma2_error) # To let phylolm know if there's measurement error
            )
            lambda_ratio_stat <- -2 * (h0_phylolm$logLik - fit_phylolm$logLik)
            # Computes the pvalue from the statistic
            # df1 = K - 1
            pvalue_phylolm <- 1 - pchisq(lambda_ratio_stat, df1)
        }
    )
    return(data.frame(
        tested_method = tested_method,
        pvalue_anova = pvalue_anova,
        pvalue_phylolm = pvalue_phylolm,
        anova_df1 = anova_df1,
        anova_df2 = anova_df2,
        phylolm_df1 = df1,
        phylolm_df2 = df2
    ))
 }
 simulate_matching_and_random <- function(
    id, base_values,
@ -161,7 +245,7 @@ simulate_matching_and_random <- function(
        test_method = test_method, measurement_error = TRUE
    )
    matching_pvalues <- matching_pval_df[c(1, 2)]
-    
+
    matching_df2 <- matching_pval_df[c(3, 4)]
    random_groups_traits <- compute_trait_values(
@ -176,9 +260,9 @@ simulate_matching_and_random <- function(
        groups = random_groups, tree, stoch_process = stoch_process,
        test_method = test_method, measurement_error = TRUE
    )
-    random_groups_pvalues <- random_groups_pval_df2[c(1,2)]
+    random_groups_pvalues <- random_groups_pval_df2[c(1, 2)]
-    random_groups_df2 <- random_groups_pval_df2[c(3,4)]
+    random_groups_df2 <- random_groups_pval_df2[c(3, 4)]
    # Concatenate pvalues
    pvalues <- c(unlist(matching_pvalues), unlist(random_groups_pvalues))
@ -241,7 +325,6 @@ compare_methods <- function(
        stop("Unknown method to test.")
    }
    #  Generating data for each method
    # TODO Séparer les deux fonctions de simulation et d'inférence 
    # TODO Utiliser les mêmes données pour les méthodes
    ##  To compute power
    full_power_data <-
@ -316,54 +399,6 @@ plot_simulation_data <- function(data, parameters_string, threshold = 0.95) {
    return(p)
 }
 # # Vanilla
 # vanilla_results <- simulate_data(N, base_values, risk_threshold, sigma2_phylo,
 #     sigma2_measure, stoch_process,
 #     test_method = "vanilla"
 # )
 # vanilla_data <- vanilla_results$data
 # vanilla_parameters_string <- vanilla_results$parameters_string
 # plot_simulation_data(vanilla_data, vanilla_parameters_string)
 # vanilla_results_H0 <- simulate_data(N,
 #     base_values = c(1, 1), risk_threshold, sigma2_phylo,
 #     sigma2_measure, stoch_process,
 #     test_method = "vanilla",
 #     correct_hypothesis = "H0"
 # )
 # vanilla_data_H0 <- vanilla_results_H0$data
 # vanilla_parameters_string_H0 <- vanilla_results_H0$parameters_string
 # plot_simulation_data(vanilla_data_H0, vanilla_parameters_string_H0, threshold = 0.05)
 # # Satterthwaite
 # satterthwaite_results <- simulate_data(N, base_values, risk_threshold, sigma2_phylo,
 #     sigma2_measure = 1, stoch_process,
 #     test_method = "satterthwaite"
 # )
 # satterthwaite_data <- satterthwaite_results$data
 # satterthwaite_parameters_string <- satterthwaite_results$parameters_string
 # plot_simulation_data(satterthwaite_data, satterthwaite_parameters_string)
 # satterthwaite_results_H0 <- simulate_data(N,
 #     base_values = c(1, 1), risk_threshold, sigma2_phylo,
 #     sigma2_measure = 1, stoch_process,
 #     test_method = "satterthwaite", correct_hypothesis = "H0"
 # )
 # satterthwaite_data_H0 <- satterthwaite_results_H0$data
 # satterthwaite_parameters_string_H0 <- satterthwaite_results_H0$parameters_string
 # plot_simulation_data(satterthwaite_data_H0, satterthwaite_parameters_string_H0, threshold = 0.05)
 # # Likelihood ratio
 # lrt_results <- simulate_data(N, base_values, risk_threshold, sigma2_phylo,
 #     sigma2_measure, stoch_process,
 #     test_method = "lrt"
 # )
 # lrt_data <- lrt_results$data
 # lrt_parameters_string <- lrt_results$parameters_string
 # plot_simulation_data(lrt_data, lrt_parameters_string)
 plot_comparison <- function(data, sim_parameters) {
    #  Retrieving simulation parameters
    risk_threshold <- sim_parameters$risk_threshold
@ -424,7 +459,6 @@ plot_comparison <- function(data, sim_parameters) {
 # plot_comparison(comparison_data, sim_parameters = comparison$sim_parameters)
 #  TODO Adapt to the current code
 ## Standardized parameters
 total_variance <- 1.0 # sigma2_phylo + sigma2_error, fixed [as tree_height = 1]
 heri <- c(0.0, 0.25, 0.5, 1.0) # heritability her = sigma2_phylo / total_variance. 0 means only noise. 1 means only phylo.
@ -442,5 +476,3 @@ for (her in heri) {
    res_sim_plot
    ggsave(paste0("img/simulation_BM_her_", her, ".png"), plot = res_sim_plot)
 }