diff --git a/R/utils.R b/R/utils.R index ad6d6a8..8c14f24 100644 --- a/R/utils.R +++ b/R/utils.R @@ -300,19 +300,14 @@ compute_vanilla_pvalue <- function(fit_phylolm, return_df = FALSE) { #' @param REML Use REML for computation #' #' @return pvalue -compute_satterthwaite_pvalue <- function(fit_phylolm, tree, REML = FALSE, return_df = FALSE) { +compute_satterthwaite_pvalue <- function(fit_phylolm, tree, return_df = FALSE) { # Extract parameters nb_species <- nrow(fit_phylolm$X) K <- length(unique(fit_phylolm$X[, 2])) #  Compute degrees of freedom df1 <- K - 1 - # Satterthwaite approximation - # df2 <- ddf_satterthwaite_sum( - # fit_phylolm = fit_phylolm, - # phylo = tree, - # REML = REML - # )$ddf + df2 <- phylolimma:::ddf_satterthwaite_BM_error(fit_phylolm = fit_phylolm, phylo = tree)$ddf @@ -374,11 +369,14 @@ pvalues_from_fits <- function( fit_phylolm, fit_phylolm_reml, tree, - tested_methods = c("anova", "vanilla", "satterthwaite", "lrt"), + tested_methods = c("ANOVA", "ANOVA Phylo", "ANOVA Phylo REML", + "ANOVA Phylo Satterthwaite", "ANOVA Phylo Satterthwaite REML", "LRT"), REML = TRUE) { #  For sanity test rlang::arg_match(tested_methods, - values = c("anova", "vanilla", "satterthwaite", "lrt"), + values = c("ANOVA", "ANOVA Phylo", "ANOVA Phylo REML", + "ANOVA Phylo Satterthwaite", "ANOVA Phylo Satterthwaite REML", + "LRT"), multiple = TRUE ) @@ -390,19 +388,13 @@ pvalues_from_fits <- function( res_df <- data.frame(matrix(ncol = 4, nrow = 0)) colnames(res_df) <- c("tested_method", "pvalue", "df1", "df2") - if (REML) { - fitphylo <- fit_phylolm_reml - } else { - fitphylo <- fit_phylolm - } - # Iterates over the methods to test for (method in tested_methods) { #  The default value for the df2 df1 <- K - 1 switch(method, - "anova" = { + "ANOVA" = { anova_F_stat <- summary(fit_anova)$fstatistic[1] df1 <- summary(fit_anova)$fstatistic[2] df2 <- summary(fit_anova)$fstatistic[3] @@ -410,23 +402,31 @@ pvalues_from_fits <- function( df1 = df1, df2 = df2 ) }, - "vanilla" = { + "ANOVA Phylo" = { df2 <- nb_species - K - pvalue <- compute_vanilla_pvalue(fit_phylolm = fitphylo) + pvalue <- compute_vanilla_pvalue(fit_phylolm = fit_phylolm) }, - "satterthwaite" = { - df2 <- ddf_satterthwaite_sum( - fit_phylolm = fitphylo, - phylo = tree, - REML = REML - )$ddf + "ANOVA Phylo REML" = { + df2 <- nb_species - K + pvalue <- compute_vanilla_pvalue(fit_phylolm = fit_phylolm_reml) + }, + "ANOVA Phylo Satterthwaite" = { + df2 <- phylolimma:::ddf_satterthwaite_BM_error( + fit_phylolm = fit_phylolm, + phylo = tree)$ddf pvalue <- compute_satterthwaite_pvalue( - fit_phylolm = fitphylo, - tree = tree, - REML = REML - ) + fit_phylolm = fit_phylolm, + tree = tree) }, - "lrt" = { + "ANOVA Phylo Satterthwaite REML" = { + df2 <- phylolimma:::ddf_satterthwaite_BM_error( + fit_phylolm = fit_phylolm_reml, + phylo = tree)$ddf + pvalue <- compute_satterthwaite_pvalue( + fit_phylolm = fit_phylolm_reml, + tree = tree) + }, + "LRT" = { df2 <- NA pvalue <- compute_lrt_pvalue( fit_phylolm = fit_phylolm, @@ -592,14 +592,16 @@ plot_method_comparison <- function(df_plot, title = "") { scale_y_continuous(limits = c(0, 1)) + ylab("Erreur type I") + xlab("Type de groupe") + - labs(fill = "Type de groupe") + + # labs(fill = "Type de groupe") + theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 1)) + - facet_wrap(vars(tested_method), nrow = 4) + + facet_wrap(vars(tested_method), ncol = 1) + geom_text(aes(label = round(errortypeI, digits = 2)), vjust = -0.5, position = position_dodge(width = 0.9) ) + geom_hline(yintercept = 0.05) + - ggtitle("Erreur Type I") + guides(fill="none") + + ggtitle("Erreur Type I") + + theme_minimal() power <- ggplot(df_plot) + aes(x = group_type, y = power, fill = group_type) + @@ -607,14 +609,16 @@ plot_method_comparison <- function(df_plot, title = "") { scale_y_continuous(limits = c(0, 1)) + ylab("Puissance") + xlab("Type de groupe") + - labs(fill = "Type de groupe") + + # labs(fill = "Type de groupe") + theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 1)) + - facet_wrap(vars(tested_method), nrow = 4) + + facet_wrap(vars(tested_method), ncol = 1) + geom_text(aes(label = round(power, digits = 2)), vjust = -0.5, position = position_dodge(width = 0.9) ) + - ggtitle("Puissance") + guides(fill="none") + + ggtitle("Puissance") + + theme_minimal() (error + power + plot_layout(guides = "collect", axes = "collect", axis_titles = "collect")) + - plot_annotation(title = title) + plot_annotation(title = title, tag_levels = 'A') } diff --git a/Rnw/simulations-methodes.Rnw b/Rnw/simulations-methodes.Rnw index 404b5c1..2992706 100644 --- a/Rnw/simulations-methodes.Rnw +++ b/Rnw/simulations-methodes.Rnw @@ -40,7 +40,8 @@ nous nous attendons à ce que l'ANOVA phylogénétique parvienne à mieux prendr compte l'information apportée par la phylogénie et à démêler son effet.\\ <<'modules-simulations', include = FALSE, eval=TRUE>>= -necessary_simu <- c("ape", "remotes", "phylolm", "phylolimma", "phytools", "here") +necessary_simu <- c("ape", "remotes", "phylolm", "phylolimma", "phytools", + "latex2exp", "here") if (any(!(necessary_simu %in% installed.packages()))){ install.packages(necessary_simu) @@ -55,6 +56,7 @@ library("here") library("tidyverse") library("ggplot2") library("patchwork") +library("latex2exp") source(here("R","utils.R")) @ @@ -106,26 +108,29 @@ du genre \emph{Mus} avec les rats et les autres espèces dans un autre groupe Et pour le groupe ne respectant pas la phylogénie, nous avons sélectionnés les espèces en respectant les proportions des groupes définis avant afin de rendre les résultats comparables (voir la -figure~\ref{fig:simu-groupes-alea-prop}). +figure~\ref{fig:simu-groupes-prop}). Enfin pour que notre analyse soit reproductible nous fixons la graine à \Sexpr{seed}. - -\begin{figure}[H] - \centering - <<'plot-groupes-mus'>>= - plot_group_on_tree(tree, group = group_mus_rat_vs_other) - @ - \caption{Groupes \emph{Mus} et rats contre les autres} - \label{fig:simu-groupes-mus} -\end{figure} -\begin{figure}[H] - \centering - <<'plot-groupes-random'>>= - plot_group_on_tree(tree, group = random_groups) - @ - \caption{Groupes aléatoires respectant les proportions} - \label{fig:simu-groupes-alea-prop} +\begin{figure} + \begin{subfigure}[H]{0.45\textwidth} + \centering + <<'plot-groupes-mus'>>= + plot_group_on_tree(tree, group = group_mus_rat_vs_other) + @ + \caption{Groupes \emph{Mus} et rats contre les autres} + \label{fig:simu-groupes-mus} + \end{subfigure} + \begin{subfigure}[H]{0.45\textwidth} + \centering + <<'plot-groupes-random'>>= + plot_group_on_tree(tree, group = random_groups) + @ + \caption{Groupes respectant les proportions} + \label{fig:simu-groupes-prop} + \end{subfigure} + \caption{Arbre et groupes pour les simulations} + \label{fig:arbres-groupes} \end{figure} <<'param-simulation'>>= @@ -142,52 +147,51 @@ heri <- c(0.3, 0.5, 0.7, 0.9) # heritability her = sigma2_phylo / total_variance snr <- 1 # signal to noise ratio snr = size_effect / total_variance @ -Nous re-paramétrisons le modèle, $variance\text{ }totale = \sigma^2_{phylo} + +Nous re-paramétrisons le modèle, la variance totale $v_{tot}$ suit la relation +$v_{tot} = \sigma^2_{phylo} + \sigma^2_{measure} = \Sexpr{total_variance} $. Nous allons faire prendre à $h$, défini comme l'héritabilité, les valeurs $h \in (\Sexpr{heri})$. L'héritabilité est liée à $\sigma^2_{phylo}$ et $\sigma^2_{phylo} = h\times -variance\text{ }totale$. Et alors $\sigma^2_{measure} = (1-h) \times -variance\text{ }totale$. +v_{tot}$. Et alors $\sigma^2_{measure} = (1-h) \times +v_{tot}$. Pour chaque valeur d'héritabilité, nous allons générer $\Sexpr{N}$ jeux de données différents sur lesquels les méthodes sont utilisées. -<<'simus-results', cache = TRUE, warning = FALSE, fig.cap = "Simulations">>= -for (her in heri) { +<<'simus-results', warning = FALSE, fig.pos = "H", fig.cap = "Simulations pour différente valeurs d'héritabilité", fig.subcap = paste0("$h = ", heri, "$"), fig.ncol = 2, out.width='.49\\linewidth'>>= + +plot_list <- lapply(seq_len(length(heri)), function(idx) + { + her <- heri[idx] filename <- here("data", "simus", - paste0("real_her_", her, "_seed_", seed, ".Rds")) - # if (!file.exists(filename)) { - sim <- N_simulation_typeI_power(N, - groups_list = list(ratmus_vs_other = group_mus_rat_vs_other, - random = random_groups), - tree = tree, - base_values = c(0, snr * total_variance), - sigma2_phylo = her * total_variance, - sigma2_measure = (1 - her) * total_variance#, - # REML = TRUE - ) - df_sim_plot <- compute_power_typeI(df = sim) - # saveRDS(df_sim_plot, filename) - # } else { - # load(filename) - # } - res_sim_plot <- plot_method_comparison(df_sim_plot, title = paste("Rat&Mus héritabilité ", her)) - knitr::knit_print(res_sim_plot) - # saveRDS(sim, filename) - + paste0("real_her_", her, "_seed_", seed, ".Rds")) + if (!file.exists(filename)) { + sim <- N_simulation_typeI_power(N, + groups_list = list(RatMus = group_mus_rat_vs_other, + Proportions = random_groups), + tree = tree, + base_values = c(0, snr * total_variance), + sigma2_phylo = her * total_variance, + sigma2_measure = (1 - her) * total_variance#, + # REML = TRUE + ) + saveRDS(sim, filename) + } + + sim <- readRDS(filename) + + df_sim_plot <- compute_power_typeI(df = sim) + res_sim_plot <- plot_method_comparison(df_sim_plot) + res_sim_plot +}) +for(plot in plot_list) { + print(plot) } @ -% <<'simus-plot'>>= -% for (her in heri) { -% filename <- here("data", "simus", -% paste0("real_her_", her, "_seed_", seed, ".Rds")) -% load(filename) -% df_sim_plot <- compute_power_typeI(df = sim) +Sur toutes les sous-figures de la figure~\ref{fig:simus-results}, les étiquettes +A présentent les erreurs de type I commises par les méthodes et les étiquettes B +présentent les puissances des mêmes méthodes. -% res_sim_plot <- plot_method_comparison(df_sim_plot, title = paste("Rat&Mus héritabilité ", her)) -% print(res_sim_plot) -% } -% @ +TODO Ajouter les commentaires sur les simulations -\subsubsection*{\emph{REML} contre \emph{Maximum Likelihood}} diff --git a/data/simus/real_her_0.3_seed_1234.Rds b/data/simus/real_her_0.3_seed_1234.Rds new file mode 100644 index 0000000..7ebd303 Binary files /dev/null and b/data/simus/real_her_0.3_seed_1234.Rds differ diff --git a/data/simus/real_her_0.5_seed_1234.Rds b/data/simus/real_her_0.5_seed_1234.Rds new file mode 100644 index 0000000..0c9fa7b Binary files /dev/null and b/data/simus/real_her_0.5_seed_1234.Rds differ diff --git a/data/simus/real_her_0.7_seed_1234.Rds b/data/simus/real_her_0.7_seed_1234.Rds new file mode 100644 index 0000000..f96758a Binary files /dev/null and b/data/simus/real_her_0.7_seed_1234.Rds differ diff --git a/data/simus/real_her_0.9_seed_1234.Rds b/data/simus/real_her_0.9_seed_1234.Rds new file mode 100644 index 0000000..410e31f Binary files /dev/null and b/data/simus/real_her_0.9_seed_1234.Rds differ diff --git a/rapport.pdf b/rapport.pdf index 1590eb9..4b1c6f3 100644 Binary files a/rapport.pdf and b/rapport.pdf differ