mirror of
https://github.com/Polarolouis/anova-phylogenetique-projet-msv.git
synced 2026-06-17 10:15:25 +02:00
Modifications from last meeting
This commit is contained in:
parent
0e427396b5
commit
971c8a49cb
9 changed files with 82 additions and 16 deletions
|
|
@ -64,6 +64,16 @@ if (!file.exists(here("data",pvalues_data))){
|
|||
|
||||
pvalue_vec_vanilla_adj <- p.adjust(pvalue_vec_vanilla, method = "BH")
|
||||
|
||||
pvalue_vec_vanilla.REML <- sapply(seq(1, nrow(data.trans)), function(row_id) {
|
||||
trait <- data.trans[row_id, ]
|
||||
fit_phylo <- phylolm(trait ~ design_data$condition, phy = cdata@tree, REML = TRUE, measurement_error = TRUE)
|
||||
compute_vanilla_pvalue(fit_phylo)
|
||||
})
|
||||
|
||||
pvalue_vec_vanilla.REML <- setNames(pvalue_vec_vanilla.REML, rownames(data.trans))
|
||||
|
||||
pvalue_vec_vanilla_adj.REML <- p.adjust(pvalue_vec_vanilla.REML, method = "BH")
|
||||
|
||||
pvalue_vec_satterthwaite <- sapply(seq(1, nrow(data.trans)), function(row_id) {
|
||||
trait <- data.trans[row_id, ]
|
||||
fit_phylo <- phylolm(trait ~ design_data$condition, phy = cdata@tree, measurement_error = TRUE)
|
||||
|
|
@ -87,7 +97,7 @@ if (!file.exists(here("data",pvalues_data))){
|
|||
# REML
|
||||
pvalue_vec_satterthwaite.REML <- sapply(seq(1, nrow(data.trans)), function(row_id) {
|
||||
trait <- data.trans[row_id, ]
|
||||
fit_phylo <- phylolm(trait ~ design_data$condition, phy = cdata@tree, measurement_error = TRUE)
|
||||
fit_phylo <- phylolm(trait ~ design_data$condition, phy = cdata@tree, REML = TRUE, measurement_error = TRUE)
|
||||
compute_satterthwaite_pvalue(fit_phylo, tree = cdata@tree, REML = TRUE)
|
||||
})
|
||||
|
||||
|
|
@ -95,16 +105,18 @@ if (!file.exists(here("data",pvalues_data))){
|
|||
|
||||
pvalue_vec_satterthwaite_adj.REML <- p.adjust(pvalue_vec_satterthwaite.REML, method = "BH")
|
||||
|
||||
# TODO Nettoyer le dataframe
|
||||
|
||||
## Préparation du dataframe
|
||||
pvalues_dataframe <- data.frame(
|
||||
gene = rep(rownames(data.trans), 8),
|
||||
pvalue = c(pvalue_vec_vanilla, pvalue_vec_vanilla_adj,
|
||||
pvalue_vec_satterthwaite, pvalue_vec_satterthwaite_adj,
|
||||
pvalue_vec_lrt, pvalue_vec_lrt_adj, pvalue_vec_satterthwaite.REML,
|
||||
gene = rep(rownames(data.trans), 5),
|
||||
pvalue = c(pvalue_vec_vanilla_adj,
|
||||
pvalue_vec_vanilla_adj.REML,
|
||||
pvalue_vec_satterthwaite_adj,
|
||||
pvalue_vec_lrt_adj,
|
||||
pvalue_vec_satterthwaite_adj.REML),
|
||||
test_method = rep(c("Vanilla", "VanillaAdj", "Satterthwaite",
|
||||
"SatterthwaiteAdj", "LRT", "LRTAdj", "SatterthwaiteREML",
|
||||
test_method = rep(c( "VanillaAdj", "VanillaAdjREML",
|
||||
"SatterthwaiteAdj", "LRTAdj",
|
||||
"SatterthwaiteAdjREML"), each = nrow(data.trans))
|
||||
)
|
||||
pvalues_dataframe$test_method <- as.factor(pvalues_dataframe$test_method)
|
||||
|
|
@ -113,12 +125,15 @@ if (!file.exists(here("data",pvalues_data))){
|
|||
} else {
|
||||
load(here("data", pvalues_data))
|
||||
}
|
||||
|
||||
pvalues_dataframe$Adj <- grepl(pattern = "*Adj*", pvalues_dataframe$test_method)
|
||||
```
|
||||
|
||||
```{r graphique_all_pvalues, echo = FALSE}
|
||||
## Graphiques
|
||||
ggplot(pvalues_dataframe) +
|
||||
aes(x = gene, y = pvalue, fill = test_method) +
|
||||
# TODO Faire les plots des pvalues dans l'ordre décroissant
|
||||
ggplot(pvalues_dataframe[pvalues_dataframe$Adj,]) +
|
||||
aes(x = gene, y = sort(pvalue, decreasing = TRUE), fill = test_method) +
|
||||
geom_bar(stat = "identity", position = "dodge") +
|
||||
facet_wrap(~test_method) +
|
||||
theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1))
|
||||
|
|
@ -127,7 +142,7 @@ ggplot(pvalues_dataframe) +
|
|||
Ici on réalise un pivot_wider pour montrer les gènes sélectionnées par méthodes.
|
||||
|
||||
```{r wide_data, echo = FALSE}
|
||||
pvalues_dataframe_wide <- pvalues_dataframe %>%
|
||||
pvalues_dataframe_wide <- pvalues_dataframe[pvalues_dataframe$Adj,] %>%
|
||||
pivot_wider(id_cols = gene,
|
||||
names_from = test_method,
|
||||
values_from = selected) %>%
|
||||
|
|
|
|||
File diff suppressed because one or more lines are too long
BIN
R/Method_on_realtree_files/figure-html/eve_upset-1.png
Normal file
BIN
R/Method_on_realtree_files/figure-html/eve_upset-1.png
Normal file
Binary file not shown.
|
After Width: | Height: | Size: 16 KiB |
Binary file not shown.
|
After Width: | Height: | Size: 69 KiB |
BIN
R/Method_on_realtree_files/figure-html/upset_selection-1.png
Normal file
BIN
R/Method_on_realtree_files/figure-html/upset_selection-1.png
Normal file
Binary file not shown.
|
After Width: | Height: | Size: 21 KiB |
37
R/lrt_OU_eqv_EVEmodel.R
Normal file
37
R/lrt_OU_eqv_EVEmodel.R
Normal file
|
|
@ -0,0 +1,37 @@
|
|||
require(phylotools)
|
||||
require(phytools)
|
||||
require(phylolm)
|
||||
require(limma)
|
||||
require(edgeR)
|
||||
require(here)
|
||||
require(ggplot2)
|
||||
require(dplyr)
|
||||
require(tidyr)
|
||||
|
||||
source("R/utils.R")
|
||||
|
||||
### Data import
|
||||
cdata <- readRDS(here("data", "data_TER", "data", "chen2019_rodents_cpd.rds"))
|
||||
is.valid <- compcodeR:::check_phyloCompData(cdata)
|
||||
if (!(is.valid == TRUE)) stop("Not a valid phyloCompData object.")
|
||||
|
||||
# Design
|
||||
design_formula <- as.formula(~condition)
|
||||
design_data <- compcodeR:::sample.annotations(cdata)[, "condition", drop = FALSE]
|
||||
design_data$condition <- factor(design_data$condition)
|
||||
design <- model.matrix(design_formula, design_data)
|
||||
|
||||
# Normalisation
|
||||
nf <- edgeR::calcNormFactors(compcodeR:::count.matrix(cdata) / compcodeR:::length.matrix(cdata), method = "TMM")
|
||||
lib.size <- colSums(compcodeR:::count.matrix(cdata) / compcodeR:::length.matrix(cdata)) * nf
|
||||
data.norm <- sweep((compcodeR:::count.matrix(cdata) + 0.5) / compcodeR:::length.matrix(cdata), 2, lib.size + 1, "/")
|
||||
data.norm <- data.norm * 1e6
|
||||
|
||||
# Transformation
|
||||
data.trans <- log2(data.norm)
|
||||
rownames(data.trans) <- rownames(compcodeR:::count.matrix(cdata))
|
||||
|
||||
# TODO phylolm(stoch_model = "OUfixedRoot")
|
||||
|
||||
fit_phylo <- phylolm(trait ~ design_data$condition, phy = cdata@tree, measurement_error = TRUE)
|
||||
compute_satterthwaite_pvalue(fit_phylo, tree = cdata@tree, return_df = TRUE)
|
||||
|
|
@ -53,9 +53,11 @@ pvalue_vec_vanilla_adj <- p.adjust(pvalue_vec_vanilla, method = "BH")
|
|||
pvalue_vec_satterthwaite <- sapply(seq(1, nrow(data.trans)), function(row_id) {
|
||||
trait <- data.trans[row_id, ]
|
||||
fit_phylo <- phylolm(trait ~ design_data$condition, phy = cdata@tree, measurement_error = TRUE)
|
||||
compute_satterthwaite_pvalue(fit_phylo, tree = cdata@tree)
|
||||
compute_satterthwaite_pvalue(fit_phylo, tree = cdata@tree, )
|
||||
})
|
||||
|
||||
# TODO Analyser l'origine de la surestimation du nombre de df2 pour le gène 1. Vient pê de sigma2_error ~ 1e-11
|
||||
|
||||
pvalue_vec_satterthwaite <- setNames(pvalue_vec_satterthwaite, rownames(data.trans))
|
||||
|
||||
pvalue_vec_satterthwaite_adj <- p.adjust(pvalue_vec_satterthwaite, method = "BH")
|
||||
|
|
|
|||
20
R/utils.R
20
R/utils.R
|
|
@ -160,6 +160,7 @@ ddf_satterthwaite_sum <- function(fit_phylolm, phylo, REML = FALSE) {
|
|||
return(list(ddf = ddf, vcov = A))
|
||||
}
|
||||
|
||||
# TODO Use phylolimma analytic hessian
|
||||
# Adapted from lmerTest
|
||||
# https://github.com/runehaubo/lmerTestR/blob/35dc5885205d709cdc395b369b08ca2b7273cb78/R/lmer.R#L173
|
||||
compute_hessian <- function(optpars, fun, grad_trans, tol = 1e-8, ...) {
|
||||
|
|
@ -262,7 +263,7 @@ is_invalid_value <- function(value) {
|
|||
#' @param fit_phylolm The phylolm fit for which to test
|
||||
#'
|
||||
#' @return pvalue
|
||||
compute_vanilla_pvalue <- function(fit_phylolm){
|
||||
compute_vanilla_pvalue <- function(fit_phylolm, return_df = FALSE){
|
||||
|
||||
# Extract parameters
|
||||
nb_species <- nrow(fit_phylolm$X)
|
||||
|
|
@ -278,7 +279,12 @@ compute_vanilla_pvalue <- function(fit_phylolm){
|
|||
df2 = df2
|
||||
)
|
||||
pvalue <- 1 - pf(F_stat, df1, df2)
|
||||
return(pvalue)
|
||||
if (!return_df) {
|
||||
return(pvalue)
|
||||
} else {
|
||||
return(list(pvalue = pvalue, df2 = df2))
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
#' Computes pvalue with Satterthwaite approximation for phylolm fit Fisher test
|
||||
|
|
@ -288,7 +294,7 @@ compute_vanilla_pvalue <- function(fit_phylolm){
|
|||
#' @param REML Use REML for computation
|
||||
#'
|
||||
#' @return pvalue
|
||||
compute_satterthwaite_pvalue <- function(fit_phylolm, tree, REML = FALSE){
|
||||
compute_satterthwaite_pvalue <- function(fit_phylolm, tree, REML = FALSE, return_df = FALSE){
|
||||
|
||||
# Extract parameters
|
||||
nb_species <- nrow(fit_phylolm$X)
|
||||
|
|
@ -307,7 +313,13 @@ compute_satterthwaite_pvalue <- function(fit_phylolm, tree, REML = FALSE){
|
|||
df2 = df2
|
||||
)
|
||||
pvalue <- 1 - pf(F_stat, df1, df2)
|
||||
return(pvalue)
|
||||
|
||||
if (!return_df) {
|
||||
return(pvalue)
|
||||
} else {
|
||||
return(list(pvalue = pvalue, df2 = df2))
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
#' Computes pvalue for phylolm fit likelihood ratio test
|
||||
|
|
|
|||
Binary file not shown.
Loading…
Add table
Reference in a new issue