mirror of
https://github.com/Polarolouis/anova-phylogenetique-projet-msv.git
synced 2026-06-17 10:15:25 +02:00
🖍️Refactoring the code
✨Began the implementation of the inference and simulation SEPARATED methods
This commit is contained in:
parent
87c2d77034
commit
70bdebfeb9
1 changed files with 87 additions and 55 deletions
|
|
@ -138,6 +138,90 @@ phyloanova_anova_pvalues <- function(
|
||||||
phylo_df2 = df2
|
phylo_df2 = df2
|
||||||
)
|
)
|
||||||
}
|
}
|
||||||
|
# TODO Séparer les deux fonctions de simulation et d'inférence
|
||||||
|
#' Infere an ANOVA and a phyloanova
|
||||||
|
#'
|
||||||
|
#' @param y the vector of traits for which to fit the models
|
||||||
|
#' @param groups the groups to which fit the models
|
||||||
|
#' @param tree the phylo tree to use
|
||||||
|
#' @param stoch_process the stochastic process to use for the phylolm
|
||||||
|
#'
|
||||||
|
infere_anova_phyloanova <- function(y, groups, tree, stoch_process = "BM") {
|
||||||
|
# The fits
|
||||||
|
fit_anova <- lm(y ~ groups)
|
||||||
|
fit_phylolm <- phylolm(y ~ groups, phy = tree, model = stoch_process)
|
||||||
|
return(list(anova = fit_anova, phyloanova = fit_phylolm))
|
||||||
|
}
|
||||||
|
|
||||||
|
#' Return pvalues for the anova and the phyloanova
|
||||||
|
#'
|
||||||
|
#' @param fit_anova
|
||||||
|
#' @param fit_phylolm
|
||||||
|
pvalues_from_fits <- function(
|
||||||
|
fit_anova,
|
||||||
|
fit_phylolm, tree,
|
||||||
|
tested_method = c("vanilla", "satterthwaite", "lrt")) {
|
||||||
|
# For sanity test
|
||||||
|
match.arg(tested_method)
|
||||||
|
|
||||||
|
invalid_value <- function(value) {
|
||||||
|
return(is.nan(value) ||
|
||||||
|
is.null(value) ||
|
||||||
|
is.infinite(value) ||
|
||||||
|
value == 0)
|
||||||
|
}
|
||||||
|
|
||||||
|
# The default value for the df2
|
||||||
|
df1 <- K - 1
|
||||||
|
df2 <- nb_species - K
|
||||||
|
|
||||||
|
anova_F_stat <- summary(fit_anova)$fstatistic[1]
|
||||||
|
anova_df1 <- summary(fit_anova)$fstatistic[2]
|
||||||
|
anova_df2 <- summary(fit_anova)$fstatistic[3]
|
||||||
|
pvalue_anova <- pvalue_F_test(anova_F_stat,
|
||||||
|
df1 = anova_df1, df2 = anova_df2
|
||||||
|
)
|
||||||
|
|
||||||
|
switch(tested_method,
|
||||||
|
"vanilla" = {
|
||||||
|
pvalue_phylolm <- compute_F_statistic(
|
||||||
|
r_squared = fit_phylolm$r.squared,
|
||||||
|
df1 = df1,
|
||||||
|
df2 = df2
|
||||||
|
)
|
||||||
|
},
|
||||||
|
"satterthwaite" = {
|
||||||
|
df2 <- ddf_satterthwaite_sum(fit_phylolm = fit_phylolm, phylo = tree, REML = REML)
|
||||||
|
pvalue_phylolm <- compute_F_statistic(
|
||||||
|
r_squared = fit_phylolm$r.squared,
|
||||||
|
df1 = df1,
|
||||||
|
df2 = df2
|
||||||
|
)
|
||||||
|
},
|
||||||
|
"lrt" = {
|
||||||
|
h0_phylolm <- phylolm(fit_phylolm$y ~ 1,
|
||||||
|
phy = phy,
|
||||||
|
model = fit_phylolm$model,
|
||||||
|
measurement_error = invalid_value(fit_phylolm$sigma2_error) # To let phylolm know if there's measurement error
|
||||||
|
)
|
||||||
|
lambda_ratio_stat <- -2 * (h0_phylolm$logLik - fit_phylolm$logLik)
|
||||||
|
|
||||||
|
|
||||||
|
# Computes the pvalue from the statistic
|
||||||
|
# df1 = K - 1
|
||||||
|
pvalue_phylolm <- 1 - pchisq(lambda_ratio_stat, df1)
|
||||||
|
}
|
||||||
|
)
|
||||||
|
return(data.frame(
|
||||||
|
tested_method = tested_method,
|
||||||
|
pvalue_anova = pvalue_anova,
|
||||||
|
pvalue_phylolm = pvalue_phylolm,
|
||||||
|
anova_df1 = anova_df1,
|
||||||
|
anova_df2 = anova_df2,
|
||||||
|
phylolm_df1 = df1,
|
||||||
|
phylolm_df2 = df2
|
||||||
|
))
|
||||||
|
}
|
||||||
|
|
||||||
simulate_matching_and_random <- function(
|
simulate_matching_and_random <- function(
|
||||||
id, base_values,
|
id, base_values,
|
||||||
|
|
@ -161,7 +245,7 @@ simulate_matching_and_random <- function(
|
||||||
test_method = test_method, measurement_error = TRUE
|
test_method = test_method, measurement_error = TRUE
|
||||||
)
|
)
|
||||||
matching_pvalues <- matching_pval_df[c(1, 2)]
|
matching_pvalues <- matching_pval_df[c(1, 2)]
|
||||||
|
|
||||||
matching_df2 <- matching_pval_df[c(3, 4)]
|
matching_df2 <- matching_pval_df[c(3, 4)]
|
||||||
|
|
||||||
random_groups_traits <- compute_trait_values(
|
random_groups_traits <- compute_trait_values(
|
||||||
|
|
@ -176,9 +260,9 @@ simulate_matching_and_random <- function(
|
||||||
groups = random_groups, tree, stoch_process = stoch_process,
|
groups = random_groups, tree, stoch_process = stoch_process,
|
||||||
test_method = test_method, measurement_error = TRUE
|
test_method = test_method, measurement_error = TRUE
|
||||||
)
|
)
|
||||||
random_groups_pvalues <- random_groups_pval_df2[c(1,2)]
|
random_groups_pvalues <- random_groups_pval_df2[c(1, 2)]
|
||||||
|
|
||||||
random_groups_df2 <- random_groups_pval_df2[c(3,4)]
|
random_groups_df2 <- random_groups_pval_df2[c(3, 4)]
|
||||||
# Concatenate pvalues
|
# Concatenate pvalues
|
||||||
pvalues <- c(unlist(matching_pvalues), unlist(random_groups_pvalues))
|
pvalues <- c(unlist(matching_pvalues), unlist(random_groups_pvalues))
|
||||||
|
|
||||||
|
|
@ -241,7 +325,6 @@ compare_methods <- function(
|
||||||
stop("Unknown method to test.")
|
stop("Unknown method to test.")
|
||||||
}
|
}
|
||||||
# Generating data for each method
|
# Generating data for each method
|
||||||
# TODO Séparer les deux fonctions de simulation et d'inférence
|
|
||||||
# TODO Utiliser les mêmes données pour les méthodes
|
# TODO Utiliser les mêmes données pour les méthodes
|
||||||
## To compute power
|
## To compute power
|
||||||
full_power_data <-
|
full_power_data <-
|
||||||
|
|
@ -316,54 +399,6 @@ plot_simulation_data <- function(data, parameters_string, threshold = 0.95) {
|
||||||
return(p)
|
return(p)
|
||||||
}
|
}
|
||||||
|
|
||||||
# # Vanilla
|
|
||||||
# vanilla_results <- simulate_data(N, base_values, risk_threshold, sigma2_phylo,
|
|
||||||
# sigma2_measure, stoch_process,
|
|
||||||
# test_method = "vanilla"
|
|
||||||
# )
|
|
||||||
# vanilla_data <- vanilla_results$data
|
|
||||||
# vanilla_parameters_string <- vanilla_results$parameters_string
|
|
||||||
# plot_simulation_data(vanilla_data, vanilla_parameters_string)
|
|
||||||
|
|
||||||
# vanilla_results_H0 <- simulate_data(N,
|
|
||||||
# base_values = c(1, 1), risk_threshold, sigma2_phylo,
|
|
||||||
# sigma2_measure, stoch_process,
|
|
||||||
# test_method = "vanilla",
|
|
||||||
# correct_hypothesis = "H0"
|
|
||||||
# )
|
|
||||||
# vanilla_data_H0 <- vanilla_results_H0$data
|
|
||||||
# vanilla_parameters_string_H0 <- vanilla_results_H0$parameters_string
|
|
||||||
# plot_simulation_data(vanilla_data_H0, vanilla_parameters_string_H0, threshold = 0.05)
|
|
||||||
|
|
||||||
# # Satterthwaite
|
|
||||||
|
|
||||||
# satterthwaite_results <- simulate_data(N, base_values, risk_threshold, sigma2_phylo,
|
|
||||||
# sigma2_measure = 1, stoch_process,
|
|
||||||
# test_method = "satterthwaite"
|
|
||||||
# )
|
|
||||||
# satterthwaite_data <- satterthwaite_results$data
|
|
||||||
# satterthwaite_parameters_string <- satterthwaite_results$parameters_string
|
|
||||||
# plot_simulation_data(satterthwaite_data, satterthwaite_parameters_string)
|
|
||||||
|
|
||||||
# satterthwaite_results_H0 <- simulate_data(N,
|
|
||||||
# base_values = c(1, 1), risk_threshold, sigma2_phylo,
|
|
||||||
# sigma2_measure = 1, stoch_process,
|
|
||||||
# test_method = "satterthwaite", correct_hypothesis = "H0"
|
|
||||||
# )
|
|
||||||
# satterthwaite_data_H0 <- satterthwaite_results_H0$data
|
|
||||||
# satterthwaite_parameters_string_H0 <- satterthwaite_results_H0$parameters_string
|
|
||||||
# plot_simulation_data(satterthwaite_data_H0, satterthwaite_parameters_string_H0, threshold = 0.05)
|
|
||||||
|
|
||||||
# # Likelihood ratio
|
|
||||||
|
|
||||||
# lrt_results <- simulate_data(N, base_values, risk_threshold, sigma2_phylo,
|
|
||||||
# sigma2_measure, stoch_process,
|
|
||||||
# test_method = "lrt"
|
|
||||||
# )
|
|
||||||
# lrt_data <- lrt_results$data
|
|
||||||
# lrt_parameters_string <- lrt_results$parameters_string
|
|
||||||
# plot_simulation_data(lrt_data, lrt_parameters_string)
|
|
||||||
|
|
||||||
plot_comparison <- function(data, sim_parameters) {
|
plot_comparison <- function(data, sim_parameters) {
|
||||||
# Retrieving simulation parameters
|
# Retrieving simulation parameters
|
||||||
risk_threshold <- sim_parameters$risk_threshold
|
risk_threshold <- sim_parameters$risk_threshold
|
||||||
|
|
@ -424,7 +459,6 @@ plot_comparison <- function(data, sim_parameters) {
|
||||||
# plot_comparison(comparison_data, sim_parameters = comparison$sim_parameters)
|
# plot_comparison(comparison_data, sim_parameters = comparison$sim_parameters)
|
||||||
|
|
||||||
|
|
||||||
# TODO Adapt to the current code
|
|
||||||
## Standardized parameters
|
## Standardized parameters
|
||||||
total_variance <- 1.0 # sigma2_phylo + sigma2_error, fixed [as tree_height = 1]
|
total_variance <- 1.0 # sigma2_phylo + sigma2_error, fixed [as tree_height = 1]
|
||||||
heri <- c(0.0, 0.25, 0.5, 1.0) # heritability her = sigma2_phylo / total_variance. 0 means only noise. 1 means only phylo.
|
heri <- c(0.0, 0.25, 0.5, 1.0) # heritability her = sigma2_phylo / total_variance. 0 means only noise. 1 means only phylo.
|
||||||
|
|
@ -442,5 +476,3 @@ for (her in heri) {
|
||||||
res_sim_plot
|
res_sim_plot
|
||||||
ggsave(paste0("img/simulation_BM_her_", her, ".png"), plot = res_sim_plot)
|
ggsave(paste0("img/simulation_BM_her_", her, ".png"), plot = res_sim_plot)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
|
||||||
Loading…
Add table
Reference in a new issue