mirror of
https://github.com/Polarolouis/anova-phylogenetique-projet-msv.git
synced 2026-06-17 10:15:25 +02:00
💻 Ajout des résultats de simulations et ajout des méthodes REML
This commit is contained in:
parent
e9b53f2a60
commit
997c154ba3
7 changed files with 98 additions and 90 deletions
76
R/utils.R
76
R/utils.R
|
|
@ -300,19 +300,14 @@ compute_vanilla_pvalue <- function(fit_phylolm, return_df = FALSE) {
|
||||||
#' @param REML Use REML for computation
|
#' @param REML Use REML for computation
|
||||||
#'
|
#'
|
||||||
#' @return pvalue
|
#' @return pvalue
|
||||||
compute_satterthwaite_pvalue <- function(fit_phylolm, tree, REML = FALSE, return_df = FALSE) {
|
compute_satterthwaite_pvalue <- function(fit_phylolm, tree, return_df = FALSE) {
|
||||||
# Extract parameters
|
# Extract parameters
|
||||||
nb_species <- nrow(fit_phylolm$X)
|
nb_species <- nrow(fit_phylolm$X)
|
||||||
K <- length(unique(fit_phylolm$X[, 2]))
|
K <- length(unique(fit_phylolm$X[, 2]))
|
||||||
|
|
||||||
# Compute degrees of freedom
|
# Compute degrees of freedom
|
||||||
df1 <- K - 1
|
df1 <- K - 1
|
||||||
# Satterthwaite approximation
|
|
||||||
# df2 <- ddf_satterthwaite_sum(
|
|
||||||
# fit_phylolm = fit_phylolm,
|
|
||||||
# phylo = tree,
|
|
||||||
# REML = REML
|
|
||||||
# )$ddf
|
|
||||||
df2 <- phylolimma:::ddf_satterthwaite_BM_error(fit_phylolm = fit_phylolm,
|
df2 <- phylolimma:::ddf_satterthwaite_BM_error(fit_phylolm = fit_phylolm,
|
||||||
phylo = tree)$ddf
|
phylo = tree)$ddf
|
||||||
|
|
||||||
|
|
@ -374,11 +369,14 @@ pvalues_from_fits <- function(
|
||||||
fit_phylolm,
|
fit_phylolm,
|
||||||
fit_phylolm_reml,
|
fit_phylolm_reml,
|
||||||
tree,
|
tree,
|
||||||
tested_methods = c("anova", "vanilla", "satterthwaite", "lrt"),
|
tested_methods = c("ANOVA", "ANOVA Phylo", "ANOVA Phylo REML",
|
||||||
|
"ANOVA Phylo Satterthwaite", "ANOVA Phylo Satterthwaite REML", "LRT"),
|
||||||
REML = TRUE) {
|
REML = TRUE) {
|
||||||
# For sanity test
|
# For sanity test
|
||||||
rlang::arg_match(tested_methods,
|
rlang::arg_match(tested_methods,
|
||||||
values = c("anova", "vanilla", "satterthwaite", "lrt"),
|
values = c("ANOVA", "ANOVA Phylo", "ANOVA Phylo REML",
|
||||||
|
"ANOVA Phylo Satterthwaite", "ANOVA Phylo Satterthwaite REML",
|
||||||
|
"LRT"),
|
||||||
multiple = TRUE
|
multiple = TRUE
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
@ -390,19 +388,13 @@ pvalues_from_fits <- function(
|
||||||
res_df <- data.frame(matrix(ncol = 4, nrow = 0))
|
res_df <- data.frame(matrix(ncol = 4, nrow = 0))
|
||||||
colnames(res_df) <- c("tested_method", "pvalue", "df1", "df2")
|
colnames(res_df) <- c("tested_method", "pvalue", "df1", "df2")
|
||||||
|
|
||||||
if (REML) {
|
|
||||||
fitphylo <- fit_phylolm_reml
|
|
||||||
} else {
|
|
||||||
fitphylo <- fit_phylolm
|
|
||||||
}
|
|
||||||
|
|
||||||
# Iterates over the methods to test
|
# Iterates over the methods to test
|
||||||
for (method in tested_methods) {
|
for (method in tested_methods) {
|
||||||
# The default value for the df2
|
# The default value for the df2
|
||||||
df1 <- K - 1
|
df1 <- K - 1
|
||||||
|
|
||||||
switch(method,
|
switch(method,
|
||||||
"anova" = {
|
"ANOVA" = {
|
||||||
anova_F_stat <- summary(fit_anova)$fstatistic[1]
|
anova_F_stat <- summary(fit_anova)$fstatistic[1]
|
||||||
df1 <- summary(fit_anova)$fstatistic[2]
|
df1 <- summary(fit_anova)$fstatistic[2]
|
||||||
df2 <- summary(fit_anova)$fstatistic[3]
|
df2 <- summary(fit_anova)$fstatistic[3]
|
||||||
|
|
@ -410,23 +402,31 @@ pvalues_from_fits <- function(
|
||||||
df1 = df1, df2 = df2
|
df1 = df1, df2 = df2
|
||||||
)
|
)
|
||||||
},
|
},
|
||||||
"vanilla" = {
|
"ANOVA Phylo" = {
|
||||||
df2 <- nb_species - K
|
df2 <- nb_species - K
|
||||||
pvalue <- compute_vanilla_pvalue(fit_phylolm = fitphylo)
|
pvalue <- compute_vanilla_pvalue(fit_phylolm = fit_phylolm)
|
||||||
},
|
},
|
||||||
"satterthwaite" = {
|
"ANOVA Phylo REML" = {
|
||||||
df2 <- ddf_satterthwaite_sum(
|
df2 <- nb_species - K
|
||||||
fit_phylolm = fitphylo,
|
pvalue <- compute_vanilla_pvalue(fit_phylolm = fit_phylolm_reml)
|
||||||
phylo = tree,
|
},
|
||||||
REML = REML
|
"ANOVA Phylo Satterthwaite" = {
|
||||||
)$ddf
|
df2 <- phylolimma:::ddf_satterthwaite_BM_error(
|
||||||
|
fit_phylolm = fit_phylolm,
|
||||||
|
phylo = tree)$ddf
|
||||||
pvalue <- compute_satterthwaite_pvalue(
|
pvalue <- compute_satterthwaite_pvalue(
|
||||||
fit_phylolm = fitphylo,
|
fit_phylolm = fit_phylolm,
|
||||||
tree = tree,
|
tree = tree)
|
||||||
REML = REML
|
|
||||||
)
|
|
||||||
},
|
},
|
||||||
"lrt" = {
|
"ANOVA Phylo Satterthwaite REML" = {
|
||||||
|
df2 <- phylolimma:::ddf_satterthwaite_BM_error(
|
||||||
|
fit_phylolm = fit_phylolm_reml,
|
||||||
|
phylo = tree)$ddf
|
||||||
|
pvalue <- compute_satterthwaite_pvalue(
|
||||||
|
fit_phylolm = fit_phylolm_reml,
|
||||||
|
tree = tree)
|
||||||
|
},
|
||||||
|
"LRT" = {
|
||||||
df2 <- NA
|
df2 <- NA
|
||||||
pvalue <- compute_lrt_pvalue(
|
pvalue <- compute_lrt_pvalue(
|
||||||
fit_phylolm = fit_phylolm,
|
fit_phylolm = fit_phylolm,
|
||||||
|
|
@ -592,14 +592,16 @@ plot_method_comparison <- function(df_plot, title = "") {
|
||||||
scale_y_continuous(limits = c(0, 1)) +
|
scale_y_continuous(limits = c(0, 1)) +
|
||||||
ylab("Erreur type I") +
|
ylab("Erreur type I") +
|
||||||
xlab("Type de groupe") +
|
xlab("Type de groupe") +
|
||||||
labs(fill = "Type de groupe") +
|
# labs(fill = "Type de groupe") +
|
||||||
theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 1)) +
|
theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 1)) +
|
||||||
facet_wrap(vars(tested_method), nrow = 4) +
|
facet_wrap(vars(tested_method), ncol = 1) +
|
||||||
geom_text(aes(label = round(errortypeI, digits = 2)),
|
geom_text(aes(label = round(errortypeI, digits = 2)),
|
||||||
vjust = -0.5, position = position_dodge(width = 0.9)
|
vjust = -0.5, position = position_dodge(width = 0.9)
|
||||||
) +
|
) +
|
||||||
geom_hline(yintercept = 0.05) +
|
geom_hline(yintercept = 0.05) +
|
||||||
ggtitle("Erreur Type I")
|
guides(fill="none") +
|
||||||
|
ggtitle("Erreur Type I") +
|
||||||
|
theme_minimal()
|
||||||
|
|
||||||
power <- ggplot(df_plot) +
|
power <- ggplot(df_plot) +
|
||||||
aes(x = group_type, y = power, fill = group_type) +
|
aes(x = group_type, y = power, fill = group_type) +
|
||||||
|
|
@ -607,14 +609,16 @@ plot_method_comparison <- function(df_plot, title = "") {
|
||||||
scale_y_continuous(limits = c(0, 1)) +
|
scale_y_continuous(limits = c(0, 1)) +
|
||||||
ylab("Puissance") +
|
ylab("Puissance") +
|
||||||
xlab("Type de groupe") +
|
xlab("Type de groupe") +
|
||||||
labs(fill = "Type de groupe") +
|
# labs(fill = "Type de groupe") +
|
||||||
theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 1)) +
|
theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 1)) +
|
||||||
facet_wrap(vars(tested_method), nrow = 4) +
|
facet_wrap(vars(tested_method), ncol = 1) +
|
||||||
geom_text(aes(label = round(power, digits = 2)),
|
geom_text(aes(label = round(power, digits = 2)),
|
||||||
vjust = -0.5, position = position_dodge(width = 0.9)
|
vjust = -0.5, position = position_dodge(width = 0.9)
|
||||||
) +
|
) +
|
||||||
ggtitle("Puissance")
|
guides(fill="none") +
|
||||||
|
ggtitle("Puissance") +
|
||||||
|
theme_minimal()
|
||||||
|
|
||||||
(error + power + plot_layout(guides = "collect", axes = "collect", axis_titles = "collect")) +
|
(error + power + plot_layout(guides = "collect", axes = "collect", axis_titles = "collect")) +
|
||||||
plot_annotation(title = title)
|
plot_annotation(title = title, tag_levels = 'A')
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -40,7 +40,8 @@ nous nous attendons à ce que l'ANOVA phylogénétique parvienne à mieux prendr
|
||||||
compte l'information apportée par la phylogénie et à démêler son effet.\\
|
compte l'information apportée par la phylogénie et à démêler son effet.\\
|
||||||
|
|
||||||
<<'modules-simulations', include = FALSE, eval=TRUE>>=
|
<<'modules-simulations', include = FALSE, eval=TRUE>>=
|
||||||
necessary_simu <- c("ape", "remotes", "phylolm", "phylolimma", "phytools", "here")
|
necessary_simu <- c("ape", "remotes", "phylolm", "phylolimma", "phytools",
|
||||||
|
"latex2exp", "here")
|
||||||
|
|
||||||
if (any(!(necessary_simu %in% installed.packages()))){
|
if (any(!(necessary_simu %in% installed.packages()))){
|
||||||
install.packages(necessary_simu)
|
install.packages(necessary_simu)
|
||||||
|
|
@ -55,6 +56,7 @@ library("here")
|
||||||
library("tidyverse")
|
library("tidyverse")
|
||||||
library("ggplot2")
|
library("ggplot2")
|
||||||
library("patchwork")
|
library("patchwork")
|
||||||
|
library("latex2exp")
|
||||||
source(here("R","utils.R"))
|
source(here("R","utils.R"))
|
||||||
@
|
@
|
||||||
|
|
||||||
|
|
@ -106,26 +108,29 @@ du genre \emph{Mus} avec les rats et les autres espèces dans un autre groupe
|
||||||
Et pour le groupe ne respectant pas la phylogénie, nous avons sélectionnés les
|
Et pour le groupe ne respectant pas la phylogénie, nous avons sélectionnés les
|
||||||
espèces en respectant les proportions des groupes définis
|
espèces en respectant les proportions des groupes définis
|
||||||
avant afin de rendre les résultats comparables (voir la
|
avant afin de rendre les résultats comparables (voir la
|
||||||
figure~\ref{fig:simu-groupes-alea-prop}).
|
figure~\ref{fig:simu-groupes-prop}).
|
||||||
Enfin pour que notre analyse soit reproductible nous fixons la graine à
|
Enfin pour que notre analyse soit reproductible nous fixons la graine à
|
||||||
\Sexpr{seed}.
|
\Sexpr{seed}.
|
||||||
|
|
||||||
|
\begin{figure}
|
||||||
\begin{figure}[H]
|
\begin{subfigure}[H]{0.45\textwidth}
|
||||||
\centering
|
\centering
|
||||||
<<'plot-groupes-mus'>>=
|
<<'plot-groupes-mus'>>=
|
||||||
plot_group_on_tree(tree, group = group_mus_rat_vs_other)
|
plot_group_on_tree(tree, group = group_mus_rat_vs_other)
|
||||||
@
|
@
|
||||||
\caption{Groupes \emph{Mus} et rats contre les autres}
|
\caption{Groupes \emph{Mus} et rats contre les autres}
|
||||||
\label{fig:simu-groupes-mus}
|
\label{fig:simu-groupes-mus}
|
||||||
\end{figure}
|
\end{subfigure}
|
||||||
\begin{figure}[H]
|
\begin{subfigure}[H]{0.45\textwidth}
|
||||||
\centering
|
\centering
|
||||||
<<'plot-groupes-random'>>=
|
<<'plot-groupes-random'>>=
|
||||||
plot_group_on_tree(tree, group = random_groups)
|
plot_group_on_tree(tree, group = random_groups)
|
||||||
@
|
@
|
||||||
\caption{Groupes aléatoires respectant les proportions}
|
\caption{Groupes respectant les proportions}
|
||||||
\label{fig:simu-groupes-alea-prop}
|
\label{fig:simu-groupes-prop}
|
||||||
|
\end{subfigure}
|
||||||
|
\caption{Arbre et groupes pour les simulations}
|
||||||
|
\label{fig:arbres-groupes}
|
||||||
\end{figure}
|
\end{figure}
|
||||||
|
|
||||||
<<'param-simulation'>>=
|
<<'param-simulation'>>=
|
||||||
|
|
@ -142,52 +147,51 @@ heri <- c(0.3, 0.5, 0.7, 0.9) # heritability her = sigma2_phylo / total_variance
|
||||||
snr <- 1 # signal to noise ratio snr = size_effect / total_variance
|
snr <- 1 # signal to noise ratio snr = size_effect / total_variance
|
||||||
@
|
@
|
||||||
|
|
||||||
Nous re-paramétrisons le modèle, $variance\text{ }totale = \sigma^2_{phylo} +
|
Nous re-paramétrisons le modèle, la variance totale $v_{tot}$ suit la relation
|
||||||
|
$v_{tot} = \sigma^2_{phylo} +
|
||||||
\sigma^2_{measure} = \Sexpr{total_variance} $.
|
\sigma^2_{measure} = \Sexpr{total_variance} $.
|
||||||
Nous allons faire prendre à $h$, défini comme l'héritabilité, les valeurs $h
|
Nous allons faire prendre à $h$, défini comme l'héritabilité, les valeurs $h
|
||||||
\in (\Sexpr{heri})$. L'héritabilité est liée à $\sigma^2_{phylo}$ et $\sigma^2_{phylo} = h\times
|
\in (\Sexpr{heri})$. L'héritabilité est liée à $\sigma^2_{phylo}$ et $\sigma^2_{phylo} = h\times
|
||||||
variance\text{ }totale$. Et alors $\sigma^2_{measure} = (1-h) \times
|
v_{tot}$. Et alors $\sigma^2_{measure} = (1-h) \times
|
||||||
variance\text{ }totale$.
|
v_{tot}$.
|
||||||
|
|
||||||
Pour chaque valeur d'héritabilité, nous allons générer $\Sexpr{N}$ jeux de données
|
Pour chaque valeur d'héritabilité, nous allons générer $\Sexpr{N}$ jeux de données
|
||||||
différents sur lesquels les méthodes sont utilisées.
|
différents sur lesquels les méthodes sont utilisées.
|
||||||
|
|
||||||
<<'simus-results', cache = TRUE, warning = FALSE, fig.cap = "Simulations">>=
|
<<'simus-results', warning = FALSE, fig.pos = "H", fig.cap = "Simulations pour différente valeurs d'héritabilité", fig.subcap = paste0("$h = ", heri, "$"), fig.ncol = 2, out.width='.49\\linewidth'>>=
|
||||||
for (her in heri) {
|
|
||||||
|
plot_list <- lapply(seq_len(length(heri)), function(idx)
|
||||||
|
{
|
||||||
|
her <- heri[idx]
|
||||||
filename <- here("data", "simus",
|
filename <- here("data", "simus",
|
||||||
paste0("real_her_", her, "_seed_", seed, ".Rds"))
|
paste0("real_her_", her, "_seed_", seed, ".Rds"))
|
||||||
# if (!file.exists(filename)) {
|
if (!file.exists(filename)) {
|
||||||
sim <- N_simulation_typeI_power(N,
|
sim <- N_simulation_typeI_power(N,
|
||||||
groups_list = list(ratmus_vs_other = group_mus_rat_vs_other,
|
groups_list = list(RatMus = group_mus_rat_vs_other,
|
||||||
random = random_groups),
|
Proportions = random_groups),
|
||||||
tree = tree,
|
tree = tree,
|
||||||
base_values = c(0, snr * total_variance),
|
base_values = c(0, snr * total_variance),
|
||||||
sigma2_phylo = her * total_variance,
|
sigma2_phylo = her * total_variance,
|
||||||
sigma2_measure = (1 - her) * total_variance#,
|
sigma2_measure = (1 - her) * total_variance#,
|
||||||
# REML = TRUE
|
# REML = TRUE
|
||||||
)
|
)
|
||||||
df_sim_plot <- compute_power_typeI(df = sim)
|
saveRDS(sim, filename)
|
||||||
# saveRDS(df_sim_plot, filename)
|
}
|
||||||
# } else {
|
|
||||||
# load(filename)
|
sim <- readRDS(filename)
|
||||||
# }
|
|
||||||
res_sim_plot <- plot_method_comparison(df_sim_plot, title = paste("Rat&Mus héritabilité ", her))
|
df_sim_plot <- compute_power_typeI(df = sim)
|
||||||
knitr::knit_print(res_sim_plot)
|
res_sim_plot <- plot_method_comparison(df_sim_plot)
|
||||||
# saveRDS(sim, filename)
|
res_sim_plot
|
||||||
|
})
|
||||||
|
for(plot in plot_list) {
|
||||||
|
print(plot)
|
||||||
}
|
}
|
||||||
@
|
@
|
||||||
|
|
||||||
% <<'simus-plot'>>=
|
Sur toutes les sous-figures de la figure~\ref{fig:simus-results}, les étiquettes
|
||||||
% for (her in heri) {
|
A présentent les erreurs de type I commises par les méthodes et les étiquettes B
|
||||||
% filename <- here("data", "simus",
|
présentent les puissances des mêmes méthodes.
|
||||||
% paste0("real_her_", her, "_seed_", seed, ".Rds"))
|
|
||||||
% load(filename)
|
|
||||||
% df_sim_plot <- compute_power_typeI(df = sim)
|
|
||||||
|
|
||||||
% res_sim_plot <- plot_method_comparison(df_sim_plot, title = paste("Rat&Mus héritabilité ", her))
|
TODO Ajouter les commentaires sur les simulations
|
||||||
% print(res_sim_plot)
|
|
||||||
% }
|
|
||||||
% @
|
|
||||||
|
|
||||||
\subsubsection*{\emph{REML} contre \emph{Maximum Likelihood}}
|
|
||||||
|
|
|
||||||
BIN
data/simus/real_her_0.3_seed_1234.Rds
Normal file
BIN
data/simus/real_her_0.3_seed_1234.Rds
Normal file
Binary file not shown.
BIN
data/simus/real_her_0.5_seed_1234.Rds
Normal file
BIN
data/simus/real_her_0.5_seed_1234.Rds
Normal file
Binary file not shown.
BIN
data/simus/real_her_0.7_seed_1234.Rds
Normal file
BIN
data/simus/real_her_0.7_seed_1234.Rds
Normal file
Binary file not shown.
BIN
data/simus/real_her_0.9_seed_1234.Rds
Normal file
BIN
data/simus/real_her_0.9_seed_1234.Rds
Normal file
Binary file not shown.
BIN
rapport.pdf
BIN
rapport.pdf
Binary file not shown.
Loading…
Add table
Reference in a new issue