anova-phylogenetique-projet.../data/data_TER/Analyse_Méthodes_compcodeR.Rmd

1202 lines
57 KiB
Text

---
title: "Anlayse appliquée sur les données réelles rodents"
output: html_notebook
---
```{r}
#Méthode EVE empirique
require(evemodel)
require(limma)
require(edgeR)
library(here)
cdata <- readRDS(here("data", "chen2019_rodents_cpd.rds"))
is.valid <- compcodeR:::check_phyloCompData(cdata)
if (!(is.valid == TRUE)) stop('Not a valid phyloCompData object.')
tree_rep <- compcodeR:::getTree(cdata)
tree_norep <- compcodeR:::getTreeEVE(cdata)
theta_2_vec <- compcodeR:::getIsTheta2edge(cdata, tree_norep)
# col_species <- tree_norep$tip.label[compcodeR:::sample.annotations(cdata)$id.species]
col_species <- tree_norep$tip.label[cumsum(!duplicated(compcodeR:::sample.annotations(cdata)$id.species))]
# Normalisation
nf <- edgeR::calcNormFactors(compcodeR:::count.matrix(cdata) / compcodeR:::length.matrix(cdata), method = 'TMM')
lib.size <- colSums(compcodeR:::count.matrix(cdata) / compcodeR:::length.matrix(cdata)) * nf
data.norm <- sweep((compcodeR:::count.matrix(cdata) + 0.5) / compcodeR:::length.matrix(cdata), 2, lib.size + 1, '/')
data.norm <- data.norm * 1e6
# Transformation
data.trans <- log2(data.norm)
rownames(data.trans) <- rownames(compcodeR:::count.matrix(cdata))
# Analysis with EVE
evemodel.results_list <- evemodel::twoThetaTest(tree = tree_norep, gene.data = data.trans, isTheta2edge = theta_2_vec, colSpecies = col_species, upperBound = c(theta = Inf, sigma2 = Inf, alpha = log(2)/0.001/1))
parEstim <- apply(evemodel.results_list$oneThetaRes$par, 2, median)
set.seed(1289)
nullData <- evemodel::simOneTheta(n = 100, tree = tree_norep, colSpecies = col_species, theta = parEstim["theta"], sigma2 = parEstim["sigma2"], alpha = parEstim["alpha"], beta = parEstim["beta"])
test.nullData_full <- evemodel::twoThetaTest(tree = tree_norep, gene.data = nullData, isTheta2edge = theta_2_vec, colSpecies = col_species)
emp_cff <- ecdf(test.nullData_full$LRT)
result.table <- data.frame(pvalue = 1 - emp_cff(evemodel.results_list$LRT), logFC = compcodeR:::getlogFCEVE(evemodel.results_list$twoThetaRes, theta_2_vec, tree_norep))
result.table$score <- 1 - result.table$pvalue
result.table$adjpvalue <- p.adjust(result.table$pvalue, 'BH')
# Save the results
rownames(result.table) <- rownames(compcodeR:::count.matrix(cdata))
compcodeR:::result.table(cdata) <- result.table
compcodeR:::package.version(cdata) <- paste('evemodel,', packageVersion('evemodel'))
compcodeR:::package.version(cdata) <- paste('edgeR,', packageVersion('edgeR'))
compcodeR:::analysis.date(cdata) <- date()
compcodeR:::method.names(cdata) <- list('short.name' = 'evetwotheta', 'full.name' = 'evemodel0.0.0.9008.TMM.lengthNorm.TPM.dataTrans.log2.empNull.TRUE.nGenesNull.100')
is.valid <- compcodeR:::check_compData_results(cdata)
if (!(is.valid == TRUE)) stop('Not a valid phyloCompData result object.')
saveRDS(cdata, '/Users/marinetognia/InterspeciesDEMethods/code/2023-05-16_2023-05-16_simulations_chen2019_results/real_tree_rodents_BM_with_lengths_3_0.8_1_1_evemodel_emp.TPM.log2.rds')
print(paste('Unique data set ID:', compcodeR:::info.parameters(cdata)$uID))
table(cdata@result.table$adjpvalue<0.05)
#Donnees incluant les gènes différentiellement exprimés pour la méthode evemodel_emp
# data_csv <- "data_evemodel_emp.csv"
# df <- data.frame(Genes = rownames(cdata@result.table)[cdata@result.table$adjpvalue<0.05] )
# write.csv(df, file = data_csv, row.names = FALSE)
# data_real <- "data_real"
#
# dir.create(data_real)
#
# if(file.exists(data_real)) {
# print("Le dossier a été créé.")
# } else {
# print("Une erreur s'est produite lors de la création du dossier.")
# }
# file.rename(from = "/Users/marinetognia/InterspeciesDEMethods/code/evemodel_emp.csv", to = "/Users/marinetognia/InterspeciesDEMethods/code/data_real/evemodel_emp.csv")
```
Il y a 404 gènes différentiellement exprimés pour la méthode evemodel_emp
NB : Les gènes dont la valeur p ajustée (adjpvalue) est inférieure à 0.05 sont considérés comme différentiellement exprimés, ce qui signifie qu'ils présentent des différences d'expression significatives par rapport aux autres gènes étudié.
```{r}
#Méthode EVE
require(evemodel)
require(limma)
require(edgeR)
library(here)
cdata <- readRDS(here("data", "chen2019_rodents_cpd.rds"))
is.valid <- compcodeR:::check_phyloCompData(cdata)
if (!(is.valid == TRUE)) stop('Not a valid phyloCompData object.')
tree_rep <- compcodeR:::getTree(cdata)
tree_norep <- compcodeR:::getTreeEVE(cdata)
theta_2_vec <- compcodeR:::getIsTheta2edge(cdata, tree_norep)
#col_species <- tree_norep$tip.label[compcodeR:::sample.annotations(cdata)$id.species]
col_species <- tree_norep$tip.label[cumsum(!duplicated(compcodeR:::sample.annotations(cdata)$id.species))]
# Normalisation
nf <- edgeR::calcNormFactors(compcodeR:::count.matrix(cdata) / compcodeR:::length.matrix(cdata), method = 'TMM')
lib.size <- colSums(compcodeR:::count.matrix(cdata) / compcodeR:::length.matrix(cdata)) * nf
data.norm <- sweep((compcodeR:::count.matrix(cdata) + 0.5) / compcodeR:::length.matrix(cdata), 2, lib.size + 1, '/')
data.norm <- data.norm * 1e6
# Transformation
data.trans <- log2(data.norm)
rownames(data.trans) <- rownames(compcodeR:::count.matrix(cdata))
# Analysis with EVE
evemodel.results_list <- evemodel::twoThetaTest(tree = tree_norep, gene.data = data.trans, isTheta2edge = theta_2_vec, colSpecies = col_species, upperBound = c(theta = Inf, sigma2 = Inf, alpha = log(2)/0.001/1))
result.table <- data.frame(pvalue = pchisq(evemodel.results_list$LRT, df = 1, lower.tail = FALSE), logFC = compcodeR:::getlogFCEVE(evemodel.results_list$twoThetaRes, theta_2_vec, tree_norep))
result.table$score <- 1 - result.table$pvalue
result.table$adjpvalue <- p.adjust(result.table$pvalue, 'BH')
# Save the results
rownames(result.table) <- rownames(compcodeR:::count.matrix(cdata))
compcodeR:::result.table(cdata) <- result.table
compcodeR:::package.version(cdata) <- paste('evemodel,', packageVersion('evemodel'))
compcodeR:::package.version(cdata) <- paste('edgeR,', packageVersion('edgeR'))
compcodeR:::analysis.date(cdata) <- date()
compcodeR:::method.names(cdata) <- list('short.name' = 'evetwotheta', 'full.name' = 'evemodel0.0.0.9008.TMM.lengthNorm.TPM.dataTrans.log2.empNull.FALSE')
is.valid <- compcodeR:::check_compData_results(cdata)
if (!(is.valid == TRUE)) stop('Not a valid phyloCompData result object.')
saveRDS(cdata, '/Users/marinetognia/InterspeciesDEMethods/code/2023-05-16_2023-05-16_simulations_chen2019_results/real_tree_rodents_BM_with_lengths_3_0.8_1_1_evemodel.TPM.log2.rds')
print(paste('Unique data set ID:', compcodeR:::info.parameters(cdata)$uID))
table(cdata@result.table$adjpvalue<0.05)
#Donnees incluant les gènes différentiellement exprimés pour la méthode evemodel
# data_csv <- "data_evemodel.csv"
# df <- data.frame(Genes = rownames(cdata@result.table)[cdata@result.table$adjpvalue<0.05] )
# write.csv(df, file = data_csv, row.names = FALSE)
# file.rename(from = "/Users/marinetognia/InterspeciesDEMethods/code/data_evemodel.csv", to = "/Users/marinetognia/InterspeciesDEMethods/code/data_real/data_evemodel.csv")
```
Il y a 158 gènes différentiellement exprimés pour la méthode evemodel.
```{r}
#Méthode Limma (paramètres : no trend, no blocks)
require(limma)
require(edgeR)
library(here)
cdata <- readRDS(here("data", "chen2019_rodents_cpd.rds"))
is.valid <- compcodeR:::check_phyloCompData(cdata)
if (!(is.valid == TRUE)) stop('Not a valid phyloCompData object.')
# Design
design_formula <- as.formula(~ condition)
design_data <- compcodeR:::sample.annotations(cdata)[, 'condition', drop = FALSE]
design_data$condition <- factor(design_data$condition)
design <- model.matrix(design_formula, design_data)
# Normalisation
nf <- edgeR::calcNormFactors(compcodeR:::count.matrix(cdata) / compcodeR:::length.matrix(cdata), method = 'TMM')
lib.size <- colSums(compcodeR:::count.matrix(cdata) / compcodeR:::length.matrix(cdata)) * nf
data.norm <- sweep((compcodeR:::count.matrix(cdata) + 0.5) / compcodeR:::length.matrix(cdata), 2, lib.size + 1, '/')
data.norm <- data.norm * 1e6
# Transformation
data.trans <- log2(data.norm)
rownames(data.trans) <- rownames(compcodeR:::count.matrix(cdata))
# Fit
length.fitlimma <- limma::lmFit(data.trans, design = design)
length.fitbayes <- limma::eBayes(length.fitlimma)
length.pvalues <- length.fitbayes$p.value[, ncol(length.fitbayes$p.value)]
length.adjpvalues <- p.adjust(length.pvalues, method = 'BH')
length.logFC <- length.fitbayes$coefficients[, ncol(length.fitbayes$coefficients)]
length.score <- 1 - length.pvalues
result.table <- data.frame('pvalue' = length.pvalues, 'adjpvalue' = length.adjpvalues, 'logFC' = length.logFC, 'score' = length.score)
rownames(result.table) <- rownames(compcodeR:::count.matrix(cdata))
compcodeR:::result.table(cdata) <- result.table
compcodeR:::package.version(cdata) <- paste('limma,', packageVersion('limma'), ';', 'edgeR,', packageVersion('edgeR'))
compcodeR:::analysis.date(cdata) <- date()
compcodeR:::method.names(cdata) <- list('short.name' = 'lengthNorm.limma', 'full.name' = 'length.3.54.2.limma.TMM.lengthNorm.TPM.dataTrans.log2.no_trend')
is.valid <- compcodeR:::check_compData_results(cdata)
if (!(is.valid == TRUE)) stop('Not a valid phyloCompData result object.')
saveRDS(cdata, '/Users/marinetognia/InterspeciesDEMethods/code/2023-05-16_2023-05-16_simulations_chen2019_results/real_tree_rodents_BM_with_lengths_3_0.8_1_1_lengthNorm.limma.TPM.log2.no_trend.no_blocks.rds')
print(paste('Unique data set ID:', compcodeR:::info.parameters(cdata)$uID))
table(cdata@result.table$adjpvalue<0.05)
#Donnees incluant les gènes différentiellement exprimés pour la méthode limma.no_trend.no_blocks
# data_csv <- "data_limma.no_trend.no_blocks .csv"
# df <- data.frame(Genes = rownames(cdata@result.table)[cdata@result.table$adjpvalue<0.05] )
# write.csv(df, file = data_csv, row.names = FALSE)
# file.rename(from = "/Users/marinetognia/InterspeciesDEMethods/code/data_limma.no_trend.no_blocks .csv", to = "/Users/marinetognia/InterspeciesDEMethods/code/data_real/data_limma.no_trend.no_blocks.csv")
```
Il y a 3916 gènes différentiellement exprimés pour la méthode limma.no_trend.no_blocks
```{r}
#Méthode Limma (paramètres : no trend, with blocks)
require(limma)
require(edgeR)
library(here)
cdata <- readRDS(here("data", "chen2019_rodents_cpd.rds"))
is.valid <- compcodeR:::check_phyloCompData(cdata)
if (!(is.valid == TRUE)) stop('Not a valid phyloCompData object.')
# Design
design_formula <- as.formula(~ condition)
design_data <- compcodeR:::sample.annotations(cdata)[, 'condition', drop = FALSE]
design_data$condition <- factor(design_data$condition)
design <- model.matrix(design_formula, design_data)
# Normalisation
nf <- edgeR::calcNormFactors(compcodeR:::count.matrix(cdata) / compcodeR:::length.matrix(cdata), method = 'TMM')
lib.size <- colSums(compcodeR:::count.matrix(cdata) / compcodeR:::length.matrix(cdata)) * nf
data.norm <- sweep((compcodeR:::count.matrix(cdata) + 0.5) / compcodeR:::length.matrix(cdata), 2, lib.size + 1, '/')
data.norm <- data.norm * 1e6
# Transformation
data.trans <- log2(data.norm)
rownames(data.trans) <- rownames(compcodeR:::count.matrix(cdata))
# Fitting Block correlations
block <- compcodeR:::sample.annotations(cdata)[['id.species']]
corfit <- duplicateCorrelation(data.trans, design = design, block = block, ndups = 1)
# Fit
length.fitlimma <- limma::lmFit(data.trans, design = design, correlation = corfit$consensus, block = block)
length.fitbayes <- limma::eBayes(length.fitlimma)
length.pvalues <- length.fitbayes$p.value[, ncol(length.fitbayes$p.value)]
length.adjpvalues <- p.adjust(length.pvalues, method = 'BH')
length.logFC <- length.fitbayes$coefficients[, ncol(length.fitbayes$coefficients)]
length.score <- 1 - length.pvalues
result.table <- data.frame('pvalue' = length.pvalues, 'adjpvalue' = length.adjpvalues, 'logFC' = length.logFC, 'score' = length.score)
rownames(result.table) <- rownames(compcodeR:::count.matrix(cdata))
compcodeR:::result.table(cdata) <- result.table
compcodeR:::package.version(cdata) <- paste('limma,', packageVersion('limma'), ';', 'edgeR,', packageVersion('edgeR'))
compcodeR:::analysis.date(cdata) <- date()
compcodeR:::method.names(cdata) <- list('short.name' = 'lengthNorm.limma', 'full.name' = 'length.3.54.2.limma.TMM.lengthNorm.TPM.dataTrans.log2.no_trend')
is.valid <- compcodeR:::check_compData_results(cdata)
if (!(is.valid == TRUE)) stop('Not a valid phyloCompData result object.')
saveRDS(cdata, '/Users/marinetognia/InterspeciesDEMethods/code/2023-05-16_2023-05-16_simulations_chen2019_results/real_tree_rodents_BM_with_lengths_3_0.8_1_1_lengthNorm.limma.TPM.log2.no_trend.with_blocks.rds')
print(paste('Unique data set ID:', compcodeR:::info.parameters(cdata)$uID))
table(cdata@result.table$adjpvalue<0.05)
#Donnees incluant les gènes différentiellement exprimés pour la méthode limma.no_trend.with_blocks
# data_csv <- "data_limma.no_trend.with_blocks.csv"
# df <- data.frame(Genes = rownames(cdata@result.table)[cdata@result.table$adjpvalue<0.05] )
# write.csv(df, file = data_csv, row.names = FALSE)
# file.rename(from = "/Users/marinetognia/InterspeciesDEMethods/code/data_limma.no_trend.with_blocks.csv", to = "/Users/marinetognia/InterspeciesDEMethods/code/data_real/data_limma.no_trend.with_blocks.csv")
```
Il y a 1515 gènes différentiellement exprimés pour la méthode limma.no_trend.with_blocks
```{r}
#Méthode Limma ( with Trend, no blocks)
require(limma)
require(edgeR)
library(here)
cdata <- readRDS(here("data", "chen2019_rodents_cpd.rds"))
is.valid <- compcodeR:::check_phyloCompData(cdata)
if (!(is.valid == TRUE)) stop('Not a valid phyloCompData object.')
# Design
design_formula <- as.formula(~ condition)
design_data <- compcodeR:::sample.annotations(cdata)[, 'condition', drop = FALSE]
design_data$condition <- factor(design_data$condition)
design <- model.matrix(design_formula, design_data)
# Normalisation
nf <- edgeR::calcNormFactors(compcodeR:::count.matrix(cdata) / compcodeR:::length.matrix(cdata), method = 'TMM')
lib.size <- colSums(compcodeR:::count.matrix(cdata) / compcodeR:::length.matrix(cdata)) * nf
data.norm <- sweep((compcodeR:::count.matrix(cdata) + 0.5) / compcodeR:::length.matrix(cdata), 2, lib.size + 1, '/')
data.norm <- data.norm * 1e6
# Transformation
data.trans <- log2(data.norm)
rownames(data.trans) <- rownames(compcodeR:::count.matrix(cdata))
# Fit
length.fitlimma <- limma::lmFit(data.trans, design = design)
length.fitbayes <- limma::eBayes(length.fitlimma, trend = TRUE)
length.pvalues <- length.fitbayes$p.value[, ncol(length.fitbayes$p.value)]
length.adjpvalues <- p.adjust(length.pvalues, method = 'BH')
length.logFC <- length.fitbayes$coefficients[, ncol(length.fitbayes$coefficients)]
length.score <- 1 - length.pvalues
result.table <- data.frame('pvalue' = length.pvalues, 'adjpvalue' = length.adjpvalues, 'logFC' = length.logFC, 'score' = length.score)
rownames(result.table) <- rownames(compcodeR:::count.matrix(cdata))
compcodeR:::result.table(cdata) <- result.table
compcodeR:::package.version(cdata) <- paste('limma,', packageVersion('limma'), ';', 'edgeR,', packageVersion('edgeR'))
compcodeR:::analysis.date(cdata) <- date()
compcodeR:::method.names(cdata) <- list('short.name' = 'lengthNorm.limma', 'full.name' = 'length.3.54.2.limma.TMM.lengthNorm.TPM.dataTrans.log2.with_trend')
is.valid <- compcodeR:::check_compData_results(cdata)
if (!(is.valid == TRUE)) stop('Not a valid phyloCompData result object.')
saveRDS(cdata, '/Users/marinetognia/InterspeciesDEMethods/code/2023-05-16_2023-05-16_simulations_chen2019_results/real_tree_rodents_BM_with_lengths_3_0.8_1_1_lengthNorm.limma.TPM.log2.with_trend.no_blocks.rds')
print(paste('Unique data set ID:', compcodeR:::info.parameters(cdata)$uID))
table(cdata@result.table$adjpvalue<0.05)
#Donnees incluant les gènes différentiellement exprimés pour la méthode limma.with_trend.no_blocks
# data_csv <- "data_limma.with_trend.no_blocks.csv"
# df <- data.frame(Genes = rownames(cdata@result.table)[cdata@result.table$adjpvalue<0.05] )
# write.csv(df, file = data_csv, row.names = FALSE)
# file.rename(from = "/Users/marinetognia/InterspeciesDEMethods/code/data_limma.with_trend.no_blocks.csv", to = "/Users/marinetognia/InterspeciesDEMethods/code/data_real/data_limma.with_trend.no_blocks.csv")
```
Il y a 3924 gènes différentiellement exprimés pour la méthode limma.with_trend.no_blocks
```{r}
#Méthode Limma (with trend, with blocks)
require(limma)
require(edgeR)
library(here)
cdata <- readRDS(here("data", "chen2019_rodents_cpd.rds"))
is.valid <- compcodeR:::check_phyloCompData(cdata)
if (!(is.valid == TRUE)) stop('Not a valid phyloCompData object.')
# Design
design_formula <- as.formula(~ condition)
design_data <- compcodeR:::sample.annotations(cdata)[, 'condition', drop = FALSE]
design_data$condition <- factor(design_data$condition)
design <- model.matrix(design_formula, design_data)
# Normalisation
nf <- edgeR::calcNormFactors(compcodeR:::count.matrix(cdata) / compcodeR:::length.matrix(cdata), method = 'TMM')
lib.size <- colSums(compcodeR:::count.matrix(cdata) / compcodeR:::length.matrix(cdata)) * nf
data.norm <- sweep((compcodeR:::count.matrix(cdata) + 0.5) / compcodeR:::length.matrix(cdata), 2, lib.size + 1, '/')
data.norm <- data.norm * 1e6
# Transformation
data.trans <- log2(data.norm)
rownames(data.trans) <- rownames(compcodeR:::count.matrix(cdata))
# Fitting Block correlations
block <- compcodeR:::sample.annotations(cdata)[['id.species']]
corfit <- duplicateCorrelation(data.trans, design = design, block = block, ndups = 1)
# Fit
length.fitlimma <- limma::lmFit(data.trans, design = design, correlation = corfit$consensus, block = block)
length.fitbayes <- limma::eBayes(length.fitlimma, trend = TRUE)
length.pvalues <- length.fitbayes$p.value[, ncol(length.fitbayes$p.value)]
length.adjpvalues <- p.adjust(length.pvalues, method = 'BH')
length.logFC <- length.fitbayes$coefficients[, ncol(length.fitbayes$coefficients)]
length.score <- 1 - length.pvalues
result.table <- data.frame('pvalue' = length.pvalues, 'adjpvalue' = length.adjpvalues, 'logFC' = length.logFC, 'score' = length.score)
rownames(result.table) <- rownames(compcodeR:::count.matrix(cdata))
compcodeR:::result.table(cdata) <- result.table
compcodeR:::package.version(cdata) <- paste('limma,', packageVersion('limma'), ';', 'edgeR,', packageVersion('edgeR'))
compcodeR:::analysis.date(cdata) <- date()
compcodeR:::method.names(cdata) <- list('short.name' = 'lengthNorm.limma', 'full.name' = 'length.3.54.2.limma.TMM.lengthNorm.TPM.dataTrans.log2.with_trend.id.species')
is.valid <- compcodeR:::check_compData_results(cdata)
if (!(is.valid == TRUE)) stop('Not a valid phyloCompData result object.')
saveRDS(cdata, '/Users/marinetognia/InterspeciesDEMethods/code/2023-05-16_2023-05-16_simulations_chen2019_results/real_tree_rodents_BM_with_lengths_3_0.8_1_1_lengthNorm.limma.TPM.log2.with_trend.with_blocks.rds')
print(paste('Unique data set ID:', compcodeR:::info.parameters(cdata)$uID))
table(cdata@result.table$adjpvalue<0.05)
#Donnees incluant les gènes différentiellement exprimés pour la méthode limma.with_trend.with_blocks
# data_csv <- "data_limma.with_trend.with_blocks.csv"
# df <- data.frame(Genes = rownames(cdata@result.table)[cdata@result.table$adjpvalue<0.05] )
# write.csv(df, file = data_csv, row.names = FALSE)
# file.rename(from = "/Users/marinetognia/InterspeciesDEMethods/code/data_limma.with_trend.with_blocks.csv", to = "/Users/marinetognia/InterspeciesDEMethods/code/data_real/data_limma.with_trend.with_blocks.csv")
```
Il y a 1542 gènes différentiellement exprimés pour la méthode limma.with_trend.with_blocks
```{r}
#Méthode Phylolimma (Paramètres : no trend, regCor FALSE, BM)
#install.packages("remotes")
#remotes::install_github("pbastide/phylolimma")
require(phylolimma)
require(edgeR)
library(here)
cdata <- readRDS(here("data", "chen2019_rodents_cpd.rds"))
is.valid <- compcodeR:::check_phyloCompData(cdata)
if (!(is.valid == TRUE)) stop('Not a valid phyloCompData object.')
# Design
design_formula <- as.formula(~ condition)
design_data <- compcodeR:::sample.annotations(cdata)[, 'condition', drop = FALSE]
design_data$condition <- factor(design_data$condition)
design <- model.matrix(design_formula, design_data)
# Normalisation
nf <- edgeR::calcNormFactors(compcodeR:::count.matrix(cdata) / compcodeR:::length.matrix(cdata), method = 'TMM')
data.trans <- phylolimma::lengthNormalizeRNASeq(compcodeR:::count.matrix(cdata), compcodeR:::length.matrix(cdata), normalisationFactor = nf, lengthNormalization = 'TPM', dataTransformation = 'log2')
# Fit
tree <- compcodeR:::getTree(cdata)
length.fitlimma <- phylolimma::phylolmFit(data.trans, design = design, phy = tree, model = 'BM', measurement_error = TRUE, use_consensus = FALSE,)
length.fitbayes <- limma::eBayes(length.fitlimma)
length.pvalues <- length.fitbayes$p.value[, ncol(length.fitbayes$p.value)]
length.logFC <- length.fitbayes$coefficients[, ncol(length.fitbayes$coefficients)]
length.adjpvalues <- p.adjust(length.pvalues, method = 'BH')
length.score <- 1 - length.pvalues
result.table <- data.frame('pvalue' = length.pvalues, 'adjpvalue' = length.adjpvalues, 'logFC' = length.logFC, 'score' = length.score)
rownames(result.table) <- rownames(compcodeR:::count.matrix(cdata))
compcodeR:::result.table(cdata) <- result.table
compcodeR:::package.version(cdata) <- paste('phylolimma,', packageVersion('phylolimma'), ';', 'edgeR,', packageVersion('edgeR'))
compcodeR:::analysis.date(cdata) <- date()
compcodeR:::method.names(cdata) <- list('short.name' = 'phylolimma', 'full.name' = 'phylolimma0.1.03.38.4.TMM.BM.me.lengthNorm.TPM.dataTrans.log2.moderation.eBayes.no_trend.nonregularized_correlation')
is.valid <- compcodeR:::check_compData_results(cdata)
if (!(is.valid == TRUE)) stop('Not a valid phyloCompData result object.')
saveRDS(cdata, '/Users/marinetognia/InterspeciesDEMethods/code/2023-05-16_2023-05-16_simulations_chen2019_results/real_tree_rodents_BM_with_lengths_3_0.8_1_1_phylolimma.TPM.log2.BM.no_trend.regCor_FALSE.rds')
print(paste('Unique data set ID:', compcodeR:::info.parameters(cdata)$uID))
table(cdata@result.table$adjpvalue<0.05)
#Donnees incluant les gènes différentiellement exprimés pour la méthode phylolimma.no_trend.regCor_FALSE
# data_csv <- "data_phylolimma.no_trend.regCor_FALSE.csv"
# df <- data.frame(Genes = rownames(cdata@result.table)[cdata@result.table$adjpvalue<0.05] )
# write.csv(df, file = data_csv, row.names = FALSE)
# file.rename(from = "/Users/marinetognia/InterspeciesDEMethods/code/data_phylolimma.no_trend.regCor_FALSE.csv", to = "/Users/marinetognia/InterspeciesDEMethods/code/data_real/data_phylolimma.no_trend.regCor_FALSE.csv")
```
Il y a 1655 gènes différentiellement exprimés pour la méthode phylolimma.no_trend.regCor_FALSE
```{r}
#Méthode Phylolimma (Paramètres : no trend, regCor True,BM)
require(phylolimma)
require(edgeR)
library(here)
cdata <- readRDS(here("data", "chen2019_rodents_cpd.rds"))
is.valid <- compcodeR:::check_phyloCompData(cdata)
if (!(is.valid == TRUE)) stop('Not a valid phyloCompData object.')
# Design
design_formula <- as.formula(~ condition)
design_data <- compcodeR:::sample.annotations(cdata)[, 'condition', drop = FALSE]
design_data$condition <- factor(design_data$condition)
design <- model.matrix(design_formula, design_data)
# Normalisation
nf <- edgeR::calcNormFactors(compcodeR:::count.matrix(cdata) / compcodeR:::length.matrix(cdata), method = 'TMM')
data.trans <- phylolimma::lengthNormalizeRNASeq(compcodeR:::count.matrix(cdata), compcodeR:::length.matrix(cdata), normalisationFactor = nf, lengthNormalization = 'TPM', dataTransformation = 'log2')
# Fit
tree <- compcodeR:::getTree(cdata)
length.fitlimma <- phylolimma::phylolmFit(data.trans, design = design, phy = tree, model = 'BM', measurement_error = TRUE, use_consensus = TRUE,)
length.fitbayes <- limma::eBayes(length.fitlimma)
length.pvalues <- length.fitbayes$p.value[, ncol(length.fitbayes$p.value)]
length.logFC <- length.fitbayes$coefficients[, ncol(length.fitbayes$coefficients)]
length.adjpvalues <- p.adjust(length.pvalues, method = 'BH')
length.score <- 1 - length.pvalues
result.table <- data.frame('pvalue' = length.pvalues, 'adjpvalue' = length.adjpvalues, 'logFC' = length.logFC, 'score' = length.score)
rownames(result.table) <- rownames(compcodeR:::count.matrix(cdata))
compcodeR:::result.table(cdata) <- result.table
compcodeR:::package.version(cdata) <- paste('phylolimma,', packageVersion('phylolimma'), ';', 'edgeR,', packageVersion('edgeR'))
compcodeR:::analysis.date(cdata) <- date()
compcodeR:::method.names(cdata) <- list('short.name' = 'phylolimma', 'full.name' = 'phylolimma0.1.03.38.4.TMM.BM.me.lengthNorm.TPM.dataTrans.log2.moderation.eBayes.no_trend.regularized_correlation')
is.valid <- compcodeR:::check_compData_results(cdata)
if (!(is.valid == TRUE)) stop('Not a valid phyloCompData result object.')
saveRDS(cdata, '/Users/marinetognia/InterspeciesDEMethods/code/2023-05-16_2023-05-16_simulations_chen2019_results/real_tree_rodents_BM_with_lengths_3_0.8_1_1_phylolimma.TPM.log2.BM.no_trend.regCor_TRUE.rds')
print(paste('Unique data set ID:', compcodeR:::info.parameters(cdata)$uID))
table(cdata@result.table$adjpvalue<0.05)
#Donnees incluant les gènes différentiellement exprimés pour la méthode phylolimma.no_trend.regCor_TRUE
# data_csv <- "data_phylolimma.no_trend.regCor_TRUE.csv"
# df <- data.frame(Genes = rownames(cdata@result.table)[cdata@result.table$adjpvalue<0.05] )
# write.csv(df, file = data_csv, row.names = FALSE)
# file.rename(from = "/Users/marinetognia/InterspeciesDEMethods/code/data_phylolimma.no_trend.regCor_TRUE.csv", to = "/Users/marinetognia/InterspeciesDEMethods/code/data_real/data_phylolimma.no_trend.regCor_TRUE.csv")
```
Il y a 207 gènes différentiellement exprimés phylolimma.no_trend.regCor_TRUE
```{r}
#Méthode Phylolimma (Paramètres : with trend, regCor False,BM)
require(phylolimma)
require(edgeR)
library(here)
cdata <- readRDS(here("data", "chen2019_rodents_cpd.rds"))
is.valid <- compcodeR:::check_phyloCompData(cdata)
if (!(is.valid == TRUE)) stop('Not a valid phyloCompData object.')
# Design
design_formula <- as.formula(~ condition)
design_data <- compcodeR:::sample.annotations(cdata)[, 'condition', drop = FALSE]
design_data$condition <- factor(design_data$condition)
design <- model.matrix(design_formula, design_data)
# Normalisation
nf <- edgeR::calcNormFactors(compcodeR:::count.matrix(cdata) / compcodeR:::length.matrix(cdata), method = 'TMM')
data.trans <- phylolimma::lengthNormalizeRNASeq(compcodeR:::count.matrix(cdata), compcodeR:::length.matrix(cdata), normalisationFactor = nf, lengthNormalization = 'TPM', dataTransformation = 'log2')
# Fit
tree <- compcodeR:::getTree(cdata)
length.fitlimma <- phylolimma::phylolmFit(data.trans, design = design, phy = tree, model = 'BM', measurement_error = TRUE, use_consensus = FALSE,)
length.fitbayes <- limma::eBayes(length.fitlimma, trend = TRUE)
length.pvalues <- length.fitbayes$p.value[, ncol(length.fitbayes$p.value)]
length.logFC <- length.fitbayes$coefficients[, ncol(length.fitbayes$coefficients)]
length.adjpvalues <- p.adjust(length.pvalues, method = 'BH')
length.score <- 1 - length.pvalues
result.table <- data.frame('pvalue' = length.pvalues, 'adjpvalue' = length.adjpvalues, 'logFC' = length.logFC, 'score' = length.score)
rownames(result.table) <- rownames(compcodeR:::count.matrix(cdata))
compcodeR:::result.table(cdata) <- result.table
compcodeR:::package.version(cdata) <- paste('phylolimma,', packageVersion('phylolimma'), ';', 'edgeR,', packageVersion('edgeR'))
compcodeR:::analysis.date(cdata) <- date()
compcodeR:::method.names(cdata) <- list('short.name' = 'phylolimma', 'full.name' = 'phylolimma0.1.03.38.4.TMM.BM.me.lengthNorm.TPM.dataTrans.log2.moderation.eBayes.with_trend.nonregularized_correlation')
is.valid <- compcodeR:::check_compData_results(cdata)
if (!(is.valid == TRUE)) stop('Not a valid phyloCompData result object.')
saveRDS(cdata, '/Users/marinetognia/InterspeciesDEMethods/code/2023-05-16_2023-05-16_simulations_chen2019_results/real_tree_rodents_BM_with_lengths_3_0.8_1_1_phylolimma.TPM.log2.BM.with_trend.regCor_FALSE.rds')
print(paste('Unique data set ID:', compcodeR:::info.parameters(cdata)$uID))
table(cdata@result.table$adjpvalue<0.05)
#Donnees incluant les gènes différentiellement exprimés pour la méthode phylolimma.with_trend.regCor_FALSE
# data_csv <- "data_phylolimma.with_trend.regCor_FALSE.csv"
# df <- data.frame(Genes = rownames(cdata@result.table)[cdata@result.table$adjpvalue<0.05] )
# write.csv(df, file = data_csv, row.names = FALSE)
# file.rename(from = "/Users/marinetognia/InterspeciesDEMethods/code/data_phylolimma.with_trend.regCor_FALSE.csv", to = "/Users/marinetognia/InterspeciesDEMethods/code/data_real/data_phylolimma.with_trend.regCor_FALSE.csv")
```
Il y a 1690 gènes différentiellement exprimés pour la méthode phylolimma.with_trend.regCor_FALSE
```{r}
#Méthode Phylolimma (Paramètres : with trend, regCor TRUE,BM)
require(phylolimma)
require(edgeR)
library(here)
cdata <- readRDS(here("data", "chen2019_rodents_cpd.rds"))
is.valid <- compcodeR:::check_phyloCompData(cdata)
if (!(is.valid == TRUE)) stop('Not a valid phyloCompData object.')
# Design
design_formula <- as.formula(~ condition)
design_data <- compcodeR:::sample.annotations(cdata)[, 'condition', drop = FALSE]
design_data$condition <- factor(design_data$condition)
design <- model.matrix(design_formula, design_data)
# Normalisation
nf <- edgeR::calcNormFactors(compcodeR:::count.matrix(cdata) / compcodeR:::length.matrix(cdata), method = 'TMM')
data.trans <- phylolimma::lengthNormalizeRNASeq(compcodeR:::count.matrix(cdata), compcodeR:::length.matrix(cdata), normalisationFactor = nf, lengthNormalization = 'TPM', dataTransformation = 'log2')
# Fit
tree <- compcodeR:::getTree(cdata)
length.fitlimma <- phylolimma::phylolmFit(data.trans, design = design, phy = tree, model = 'BM', measurement_error = TRUE, use_consensus = TRUE,)
length.fitbayes <- limma::eBayes(length.fitlimma, trend = TRUE)
length.pvalues <- length.fitbayes$p.value[, ncol(length.fitbayes$p.value)]
length.logFC <- length.fitbayes$coefficients[, ncol(length.fitbayes$coefficients)]
length.adjpvalues <- p.adjust(length.pvalues, method = 'BH')
length.score <- 1 - length.pvalues
result.table <- data.frame('pvalue' = length.pvalues, 'adjpvalue' = length.adjpvalues, 'logFC' = length.logFC, 'score' = length.score)
rownames(result.table) <- rownames(compcodeR:::count.matrix(cdata))
compcodeR:::result.table(cdata) <- result.table
compcodeR:::package.version(cdata) <- paste('phylolimma,', packageVersion('phylolimma'), ';', 'edgeR,', packageVersion('edgeR'))
compcodeR:::analysis.date(cdata) <- date()
compcodeR:::method.names(cdata) <- list('short.name' = 'phylolimma', 'full.name' = 'phylolimma0.1.03.38.4.TMM.BM.me.lengthNorm.TPM.dataTrans.log2.moderation.eBayes.with_trend.regularized_correlation')
is.valid <- compcodeR:::check_compData_results(cdata)
if (!(is.valid == TRUE)) stop('Not a valid phyloCompData result object.')
saveRDS(cdata, '/Users/marinetognia/InterspeciesDEMethods/code/2023-05-16_2023-05-16_simulations_chen2019_results/real_tree_rodents_BM_with_lengths_3_0.8_1_1_phylolimma.TPM.log2.BM.with_trend.regCor_TRUE.rds')
print(paste('Unique data set ID:', compcodeR:::info.parameters(cdata)$uID))
table(cdata@result.table$adjpvalue<0.05)
#Donnees incluant les gènes différentiellement exprimés pour la méthode phylolimma.with_trend.regCor_TRUE
# data_csv <- "data_phylolimma.with_trend.regCor_TRUE.csv"
# df <- data.frame(Genes = rownames(cdata@result.table)[cdata@result.table$adjpvalue<0.05] )
# write.csv(df, file = data_csv, row.names = FALSE)
# file.rename(from = "/Users/marinetognia/InterspeciesDEMethods/code/data_phylolimma.with_trend.regCor_TRUE.csv", to = "/Users/marinetognia/InterspeciesDEMethods/code/data_real/data_phylolimma.with_trend.regCor_TRUE.csv")
```
Il y a 228 gènes différentiellement exprimés pour la méthode phylolimma.with_trend.regCor_TRUE
```{r}
#Méthode Phylolimma (Paramètres : no trend, regCor False, OU)
require(phylolimma)
require(edgeR)
library(here)
cdata <- readRDS(here("data", "chen2019_rodents_cpd.rds"))
is.valid <- compcodeR:::check_phyloCompData(cdata)
if (!(is.valid == TRUE)) stop('Not a valid phyloCompData object.')
# Design
design_formula <- as.formula(~ condition)
design_data <- compcodeR:::sample.annotations(cdata)[, 'condition', drop = FALSE]
design_data$condition <- factor(design_data$condition)
design <- model.matrix(design_formula, design_data)
# Normalisation
nf <- edgeR::calcNormFactors(compcodeR:::count.matrix(cdata) / compcodeR:::length.matrix(cdata), method = 'TMM')
data.trans <- phylolimma::lengthNormalizeRNASeq(compcodeR:::count.matrix(cdata), compcodeR:::length.matrix(cdata), normalisationFactor = nf, lengthNormalization = 'TPM', dataTransformation = 'log2')
# Fit
tree <- compcodeR:::getTree(cdata)
length.fitlimma <- phylolimma::phylolmFit(data.trans, design = design, phy = tree, model = 'OUfixedRoot', measurement_error = TRUE, use_consensus = FALSE,)
length.fitbayes <- limma::eBayes(length.fitlimma)
length.pvalues <- length.fitbayes$p.value[, ncol(length.fitbayes$p.value)]
length.logFC <- length.fitbayes$coefficients[, ncol(length.fitbayes$coefficients)]
length.adjpvalues <- p.adjust(length.pvalues, method = 'BH')
length.score <- 1 - length.pvalues
result.table <- data.frame('pvalue' = length.pvalues, 'adjpvalue' = length.adjpvalues, 'logFC' = length.logFC, 'score' = length.score)
rownames(result.table) <- rownames(compcodeR:::count.matrix(cdata))
compcodeR:::result.table(cdata) <- result.table
compcodeR:::package.version(cdata) <- paste('phylolimma,', packageVersion('phylolimma'), ';', 'edgeR,', packageVersion('edgeR'))
compcodeR:::analysis.date(cdata) <- date()
compcodeR:::method.names(cdata) <- list('short.name' = 'phylolimma', 'full.name' = 'phylolimma0.1.03.38.4.TMM.BM.me.lengthNorm.TPM.dataTrans.log2.moderation.eBayes.no_trend.regularized_correlation')
is.valid <- compcodeR:::check_compData_results(cdata)
if (!(is.valid == TRUE)) stop('Not a valid phyloCompData result object.')
saveRDS(cdata, '/Users/marinetognia/InterspeciesDEMethods/code/2023-05-16_2023-05-16_simulations_chen2019_results/real_tree_rodents_BM_with_lengths_3_0.8_1_1_phylolimma.TPM.log2.OUfixedRoot.no_trend.regCor_FALSE.rds')
print(paste('Unique data set ID:', compcodeR:::info.parameters(cdata)$uID))
table(cdata@result.table$adjpvalue<0.05)
#Donnees incluant les gènes différentiellement exprimés pour la méthode phylolimma.OUfixedRoot.no_trend.regCor_FALSE
# data_csv <- "data_phylolimma.OUfixedRoot.no_trend.regCor_FALSE.csv"
# df <- data.frame(Genes = rownames(cdata@result.table)[cdata@result.table$adjpvalue<0.05] )
# write.csv(df, file = data_csv, row.names = FALSE)
# file.rename(from = "/Users/marinetognia/InterspeciesDEMethods/code/data_phylolimma.OUfixedRoot.no_trend.regCor_FALSE.csv", to = "/Users/marinetognia/InterspeciesDEMethods/code/data_real/data_phylolimma.OUfixedRoot.no_trend.regCor_FALSE.csv")
```
Il y a 1726 gènes différentiellement exprimés pour la méthode phylolimma.OUfixedRoot.no_trend.regCor_FALSE
```{r}
#Méthode Phylolimma (Paramètres : no trend, regCor True, OU)
require(phylolimma)
require(edgeR)
library(here)
cdata <- readRDS(here("data", "chen2019_rodents_cpd.rds"))
is.valid <- compcodeR:::check_phyloCompData(cdata)
if (!(is.valid == TRUE)) stop('Not a valid phyloCompData object.')
# Design
design_formula <- as.formula(~ condition)
design_data <- compcodeR:::sample.annotations(cdata)[, 'condition', drop = FALSE]
design_data$condition <- factor(design_data$condition)
design <- model.matrix(design_formula, design_data)
# Normalisation
nf <- edgeR::calcNormFactors(compcodeR:::count.matrix(cdata) / compcodeR:::length.matrix(cdata), method = 'TMM')
data.trans <- phylolimma::lengthNormalizeRNASeq(compcodeR:::count.matrix(cdata), compcodeR:::length.matrix(cdata), normalisationFactor = nf, lengthNormalization = 'TPM', dataTransformation = 'log2')
# Fit
tree <- compcodeR:::getTree(cdata)
length.fitlimma <- phylolimma::phylolmFit(data.trans, design = design, phy = tree, model = 'OUfixedRoot', measurement_error = TRUE, use_consensus = TRUE,)
length.fitbayes <- limma::eBayes(length.fitlimma)
length.pvalues <- length.fitbayes$p.value[, ncol(length.fitbayes$p.value)]
length.logFC <- length.fitbayes$coefficients[, ncol(length.fitbayes$coefficients)]
length.adjpvalues <- p.adjust(length.pvalues, method = 'BH')
length.score <- 1 - length.pvalues
result.table <- data.frame('pvalue' = length.pvalues, 'adjpvalue' = length.adjpvalues, 'logFC' = length.logFC, 'score' = length.score)
rownames(result.table) <- rownames(compcodeR:::count.matrix(cdata))
compcodeR:::result.table(cdata) <- result.table
compcodeR:::package.version(cdata) <- paste('phylolimma,', packageVersion('phylolimma'), ';', 'edgeR,', packageVersion('edgeR'))
compcodeR:::analysis.date(cdata) <- date()
compcodeR:::method.names(cdata) <- list('short.name' = 'phylolimma', 'full.name' = 'phylolimma0.1.03.38.4.TMM.OUfixedRoot.me.lengthNorm.TPM.dataTrans.log2.moderation.eBayes.no_trend.regularized_correlation')
is.valid <- compcodeR:::check_compData_results(cdata)
if (!(is.valid == TRUE)) stop('Not a valid phyloCompData result object.')
saveRDS(cdata, '/Users/marinetognia/InterspeciesDEMethods/code/2023-05-16_2023-05-16_simulations_chen2019_results/real_tree_rodents_BM_with_lengths_3_0.8_1_1_phylolimma.TPM.log2.OUfixedRoot.no_trend.regCor_TRUE.rds')
print(paste('Unique data set ID:', compcodeR:::info.parameters(cdata)$uID))
table(cdata@result.table$adjpvalue<0.05)
#Donnees incluant les gènes différentiellement exprimés pour la méthode phylolimma.OUfixedRoot.no_trend.regCor_TRUE
# data_csv <- "data_phylolimma.OUfixedRoot.no_trend.regCor_TRUE.csv"
# df <- data.frame(Genes = rownames(cdata@result.table)[cdata@result.table$adjpvalue<0.05] )
# write.csv(df, file = data_csv, row.names = FALSE)
# file.rename(from = "/Users/marinetognia/InterspeciesDEMethods/code/data_phylolimma.OUfixedRoot.no_trend.regCor_TRUE.csv", to = "/Users/marinetognia/InterspeciesDEMethods/code/data_real/data_phylolimma.OUfixedRoot.no_trend.regCor_TRUE.csv")
```
Il y a 1730 gènes différentiellement exprimés pour la méthode phylolimma.OUfixedRoot.no_trend.regCor_TRUE
```{r}
#Méthode Phylolimma (Paramètres : with trend, regCor False, OU )
require(phylolimma)
require(edgeR)
library(here)
cdata <- readRDS(here("data", "chen2019_rodents_cpd.rds"))
is.valid <- compcodeR:::check_phyloCompData(cdata)
if (!(is.valid == TRUE)) stop('Not a valid phyloCompData object.')
# Design
design_formula <- as.formula(~ condition)
design_data <- compcodeR:::sample.annotations(cdata)[, 'condition', drop = FALSE]
design_data$condition <- factor(design_data$condition)
design <- model.matrix(design_formula, design_data)
# Normalisation
nf <- edgeR::calcNormFactors(compcodeR:::count.matrix(cdata) / compcodeR:::length.matrix(cdata), method = 'TMM')
data.trans <- phylolimma::lengthNormalizeRNASeq(compcodeR:::count.matrix(cdata), compcodeR:::length.matrix(cdata), normalisationFactor = nf, lengthNormalization = 'TPM', dataTransformation = 'log2')
# Fit
tree <- compcodeR:::getTree(cdata)
length.fitlimma <- phylolimma::phylolmFit(data.trans, design = design, phy = tree, model = 'OUfixedRoot', measurement_error = TRUE, use_consensus = FALSE,)
length.fitbayes <- limma::eBayes(length.fitlimma, trend = TRUE)
length.pvalues <- length.fitbayes$p.value[, ncol(length.fitbayes$p.value)]
length.logFC <- length.fitbayes$coefficients[, ncol(length.fitbayes$coefficients)]
length.adjpvalues <- p.adjust(length.pvalues, method = 'BH')
length.score <- 1 - length.pvalues
result.table <- data.frame('pvalue' = length.pvalues, 'adjpvalue' = length.adjpvalues, 'logFC' = length.logFC, 'score' = length.score)
rownames(result.table) <- rownames(compcodeR:::count.matrix(cdata))
compcodeR:::result.table(cdata) <- result.table
compcodeR:::package.version(cdata) <- paste('phylolimma,', packageVersion('phylolimma'), ';', 'edgeR,', packageVersion('edgeR'))
compcodeR:::analysis.date(cdata) <- date()
compcodeR:::method.names(cdata) <- list('short.name' = 'phylolimma', 'full.name' = 'phylolimma0.1.03.38.4.TMM.OUfixedRoot.me.lengthNorm.TPM.dataTrans.log2.moderation.eBayes.with_trend.nonregularized_correlation')
is.valid <- compcodeR:::check_compData_results(cdata)
if (!(is.valid == TRUE)) stop('Not a valid phyloCompData result object.')
saveRDS(cdata, '/Users/marinetognia/InterspeciesDEMethods/code/2023-05-16_2023-05-16_simulations_chen2019_results/real_tree_rodents_BM_with_lengths_3_0.8_1_1_phylolimma.TPM.log2.OUfixedRoot.with_trend.regCor_FALSE.rds')
print(paste('Unique data set ID:', compcodeR:::info.parameters(cdata)$uID))
table(cdata@result.table$adjpvalue<0.05)
#Donnees incluant les gènes différentiellement exprimés pour la méthode phylolimma.OUfixedRoot.with_trend.regCor_FALSE
# data_csv <- "data_phylolimma.OUfixedRoot.with_trend.regCor_FALSE.csv"
# df <- data.frame(Genes = rownames(cdata@result.table)[cdata@result.table$adjpvalue<0.05] )
# write.csv(df, file = data_csv, row.names = FALSE)
# file.rename(from = "/Users/marinetognia/InterspeciesDEMethods/code/data_phylolimma.OUfixedRoot.with_trend.regCor_FALSE.csv", to = "/Users/marinetognia/InterspeciesDEMethods/code/data_real/data_phylolimma.OUfixedRoot.with_trend.regCor_FALSE.csv")
```
Il y a 1736 gènes différentiellement exprimés pour la méthode phylolimma.OUfixedRoot.with_trend.regCor_FALSE
```{r}
#Méthode Phylolimma (Paramètres : with trend, regCor True, OU)
require(phylolimma)
require(edgeR)
library(here)
cdata <- readRDS(here("data", "chen2019_rodents_cpd.rds"))
is.valid <- compcodeR:::check_phyloCompData(cdata)
if (!(is.valid == TRUE)) stop('Not a valid phyloCompData object.')
# Design
design_formula <- as.formula(~ condition)
design_data <- compcodeR:::sample.annotations(cdata)[, 'condition', drop = FALSE]
design_data$condition <- factor(design_data$condition)
design <- model.matrix(design_formula, design_data)
# Normalisation
nf <- edgeR::calcNormFactors(compcodeR:::count.matrix(cdata) / compcodeR:::length.matrix(cdata), method = 'TMM')
data.trans <- phylolimma::lengthNormalizeRNASeq(compcodeR:::count.matrix(cdata), compcodeR:::length.matrix(cdata), normalisationFactor = nf, lengthNormalization = 'TPM', dataTransformation = 'log2')
# Fit
tree <- compcodeR:::getTree(cdata)
length.fitlimma <- phylolimma::phylolmFit(data.trans, design = design, phy = tree, model = 'OUfixedRoot', measurement_error = TRUE, use_consensus = TRUE,)
length.fitbayes <- limma::eBayes(length.fitlimma, trend = TRUE)
length.pvalues <- length.fitbayes$p.value[, ncol(length.fitbayes$p.value)]
length.logFC <- length.fitbayes$coefficients[, ncol(length.fitbayes$coefficients)]
length.adjpvalues <- p.adjust(length.pvalues, method = 'BH')
length.score <- 1 - length.pvalues
result.table <- data.frame('pvalue' = length.pvalues, 'adjpvalue' = length.adjpvalues, 'logFC' = length.logFC, 'score' = length.score)
rownames(result.table) <- rownames(compcodeR:::count.matrix(cdata))
compcodeR:::result.table(cdata) <- result.table
compcodeR:::package.version(cdata) <- paste('phylolimma,', packageVersion('phylolimma'), ';', 'edgeR,', packageVersion('edgeR'))
compcodeR:::analysis.date(cdata) <- date()
compcodeR:::method.names(cdata) <- list('short.name' = 'phylolimma', 'full.name' = 'phylolimma0.1.03.38.4.TMM.OUfixedRoot.me.lengthNorm.TPM.dataTrans.log2.moderation.eBayes.with_trend.regularized_correlation')
is.valid <- compcodeR:::check_compData_results(cdata)
if (!(is.valid == TRUE)) stop('Not a valid phyloCompData result object.')
saveRDS(cdata, '/Users/marinetognia/InterspeciesDEMethods/code/2023-05-16_2023-05-16_simulations_chen2019_results/real_tree_rodents_BM_with_lengths_3_0.8_1_1_phylolimma.TPM.log2.OUfixedRoot.with_trend.regCor_TRUE.rds')
print(paste('Unique data set ID:', compcodeR:::info.parameters(cdata)$uID))
table(cdata@result.table$adjpvalue<0.05)
#Donnees incluant les gènes différentiellement exprimés pour la méthode phylolimma.OUfixedRoot.with_trend.regCor_TRUE
# data_csv <- "data_phylolimma.OUfixedRoot.with_trend.regCor_TRUE.csv"
# df <- data.frame(Genes = rownames(cdata@result.table)[cdata@result.table$adjpvalue<0.05] )
# write.csv(df, file = data_csv, row.names = FALSE)
# file.rename(from = "/Users/marinetognia/InterspeciesDEMethods/code/data_phylolimma.OUfixedRoot.with_trend.regCor_TRUE.csv", to = "/Users/marinetognia/InterspeciesDEMethods/code/data_real/data_phylolimma.OUfixedRoot.with_trend.regCor_TRUE.csv")
```
Il y a 1753 gènes différentiellement exprimés
```{r}
#Méthode Phylolm (Paramètres : BM)
require(limma)
require(edgeR)
library(here)
cdata <- readRDS(here("data", "chen2019_rodents_cpd.rds"))
is.valid <- compcodeR:::check_phyloCompData(cdata)
if (!(is.valid == TRUE)) stop('Not a valid phyloCompData object.')
# Design
design_formula <- as.formula(~ condition)
design_data <- compcodeR:::sample.annotations(cdata)[, 'condition', drop = FALSE]
design_data$condition <- factor(design_data$condition)
design <- model.matrix(design_formula, design_data)
# Normalisation
nf <- edgeR::calcNormFactors(compcodeR:::count.matrix(cdata) / compcodeR:::length.matrix(cdata), method = 'TMM')
lib.size <- colSums(compcodeR:::count.matrix(cdata) / compcodeR:::length.matrix(cdata)) * nf
data.norm <- sweep((compcodeR:::count.matrix(cdata) + 0.5) / compcodeR:::length.matrix(cdata), 2, lib.size + 1, '/')
data.norm <- data.norm * 1e6
# Transformation
data.trans <- log2(data.norm)
rownames(data.trans) <- rownames(compcodeR:::count.matrix(cdata))
# Wrapper functions
extract_results_phylolm <- function (phylo_lm_obj)
{
res <- as.data.frame(summary(phylo_lm_obj)$coefficients)
result.table <- data.frame(pvalue = res["condition2", "p.value"],
logFC = res["condition2", "Estimate"], score = 1 - res["condition2",
"p.value"])
return(result.table)
}
phylolm_analysis <- function (dat, design_data, design_formula, tree, model, measurement_error,
upper.bound = NULL, lower.bound = NULL, starting.value = NULL,
...)
{
data_reg <- design_data
data_reg$expr <- dat
levels(data_reg$condition) <- c(1, 2)
res <- try(extract_results_phylolm(suppressWarnings(phylolm::phylolm(paste("expr",
paste(as.character(design_formula), collapse = "")),
data = data_reg, phy = tree, model = model, measurement_error = measurement_error,
upper.bound = upper.bound, starting.value = starting.value,
lower.bound = lower.bound, ...))), silent = TRUE)
if (inherits(res, "try-error")) {
if (model == "BM" && measurement_error) {
res <- try(extract_results_phylolm(phylolm::phylolm(paste("expr",
paste(as.character(design_formula), collapse = "")),
data = data_reg, phy = tree, model = "lambda",
measurement_error = FALSE, upper.bound = upper.bound,
starting.value = starting.value, lower.bound = lower.bound,
...)), silent = TRUE)
}
else {
h_tree <- max(ape::node.depth.edgelength(tree))
res <- try(extract_results_phylolm(suppressWarnings(phylolm::phylolm(paste("expr",
paste(as.character(design_formula), collapse = "")),
data = data_reg, phy = tree, model = model, measurement_error = measurement_error,
upper.bound = list(alpha = log(2)/0.1/h_tree),
starting.value = list(alpha = log(2)/1/h_tree),
lower.bound = list(alpha = log(2)/10/h_tree),
...))), silent = TRUE)
}
}
if (inherits(res, "try-error")) {
res <- data.frame(pvalue = 1, logFC = 0, score = 0)
warning(paste0("A gene produced an error."))
}
return(res)
}
# Analysis
tree <-compcodeR::: getTree(cdata)
phylolm.results_list <- apply(data.trans, 1, phylolm_analysis, design_data = design_data, design_formula = design_formula, tree = tree, model = 'BM', measurement_error = TRUE, lower.bound = list(sigma2_error = min_sigma2_error), upper.bound = list(lambda = 1/(1 + min_sigma2_error)))
result.table <- do.call(rbind, phylolm.results_list)
result.table$adjpvalue <- p.adjust(result.table$pvalue, 'BH')
# Save the results
rownames(result.table) <- rownames(compcodeR:::count.matrix(cdata))
compcodeR:::result.table(cdata) <- result.table
compcodeR:::package.version(cdata) <- paste('phylolm,', packageVersion('phylolm'))
compcodeR:::package.version(cdata) <- paste('limma,', packageVersion('limma'))
compcodeR:::analysis.date(cdata) <- date()
compcodeR:::method.names(cdata) <- list('short.name' = 'phylolm', 'full.name' = 'phylolm3.54.22.6.4.TMM.BM.me.lengthNorm.TPM.dataTrans.log2')
is.valid <- compcodeR:::check_compData_results(cdata)
if (!(is.valid == TRUE)) stop('Not a valid phyloCompData result object.')
saveRDS(cdata, '/Users/marinetognia/InterspeciesDEMethods/code/2023-05-16_2023-05-16_simulations_chen2019_results/real_tree_rodents_BM_with_lengths_3_0.8_1_1_phylolm.TPM.log2.BM.rds')
print(paste('Unique data set ID:', compcodeR:::info.parameters(cdata)$uID))
table(cdata@result.table$adjpvalue<0.05)
#Donnees incluant les gènes différentiellement exprimés pour la méthode phylolm_BM
# data_csv <- "data_phylolm_BM.csv"
# df <- data.frame(Genes = rownames(cdata@result.table)[cdata@result.table$adjpvalue<0.05] )
# write.csv(df, file = data_csv, row.names = FALSE)
# file.rename(from = "/Users/marinetognia/InterspeciesDEMethods/code/data_phylolm_BM.csv", to = "/Users/marinetognia/InterspeciesDEMethods/code/data_real/data_phylolm_BM.csv")
```
Il n'y a pas de gène différentiellement exprimés avec la méthode phylolm_BM
```{r}
#Méthode Phylolm (Pamamètres : OU)
require(phylolm)
require(limma)
require(edgeR)
library(here)
cdata <- readRDS(here("data", "chen2019_rodents_cpd.rds"))
is.valid <- compcodeR:::check_phyloCompData(cdata)
if (!(is.valid == TRUE)) stop('Not a valid phyloCompData object.')
# Design
design_formula <- as.formula(~ condition)
design_data <- compcodeR:::sample.annotations(cdata)[, 'condition', drop = FALSE]
design_data$condition <- factor(design_data$condition)
design <- model.matrix(design_formula, design_data)
# Normalisation
nf <- edgeR::calcNormFactors(compcodeR:::count.matrix(cdata) / compcodeR:::length.matrix(cdata), method = 'TMM')
lib.size <- colSums(compcodeR:::count.matrix(cdata) / compcodeR:::length.matrix(cdata)) * nf
data.norm <- sweep((compcodeR:::count.matrix(cdata) + 0.5) / compcodeR:::length.matrix(cdata), 2, lib.size + 1, '/')
data.norm <- data.norm * 1e6
# Transformation
data.trans <- log2(data.norm)
rownames(data.trans) <- rownames(compcodeR:::count.matrix(cdata))
# Wrapper functions
extract_results_phylolm <- function (phylo_lm_obj)
{
res <- as.data.frame(summary(phylo_lm_obj)$coefficients)
result.table <- data.frame(pvalue = res["condition2", "p.value"],
logFC = res["condition2", "Estimate"], score = 1 - res["condition2",
"p.value"])
return(result.table)
}
phylolm_analysis <- function (dat, design_data, design_formula, tree, model, measurement_error,
upper.bound = NULL, lower.bound = NULL, starting.value = NULL,
...)
{
data_reg <- design_data
data_reg$expr <- dat
levels(data_reg$condition) <- c(1, 2)
res <- try(extract_results_phylolm(suppressWarnings(phylolm::phylolm(paste("expr",
paste(as.character(design_formula), collapse = "")),
data = data_reg, phy = tree, model = model, measurement_error = measurement_error,
upper.bound = upper.bound, starting.value = starting.value,
lower.bound = lower.bound, ...))), silent = TRUE)
if (inherits(res, "try-error")) {
if (model == "BM" && measurement_error) {
res <- try(extract_results_phylolm(phylolm::phylolm(paste("expr",
paste(as.character(design_formula), collapse = "")),
data = data_reg, phy = tree, model = "lambda",
measurement_error = FALSE, upper.bound = upper.bound,
starting.value = starting.value, lower.bound = lower.bound,
...)), silent = TRUE)
}
else {
h_tree <- max(ape::node.depth.edgelength(tree))
res <- try(extract_results_phylolm(suppressWarnings(phylolm::phylolm(paste("expr",
paste(as.character(design_formula), collapse = "")),
data = data_reg, phy = tree, model = model, measurement_error = measurement_error,
upper.bound = list(alpha = log(2)/0.1/h_tree),
starting.value = list(alpha = log(2)/1/h_tree),
lower.bound = list(alpha = log(2)/10/h_tree),
...))), silent = TRUE)
}
}
if (inherits(res, "try-error")) {
res <- data.frame(pvalue = 1, logFC = 0, score = 0)
warning(paste0("A gene produced an error."))
}
return(res)
}
# Analysis
tree <- compcodeR:::getTree(cdata)
phylolm.results_list <- apply(data.trans, 1, phylolm_analysis, design_data = design_data, design_formula = design_formula, tree = tree, model = 'OUfixedRoot', measurement_error = TRUE, lower.bound = list(sigma2_error = min_sigma2_error), upper.bound = list(lambda = 1/(1 + min_sigma2_error)))
result.table <- do.call(rbind, phylolm.results_list)
result.table$adjpvalue <- p.adjust(result.table$pvalue, 'BH')
# Save the results
rownames(result.table) <- rownames(compcodeR:::count.matrix(cdata))
compcodeR:::result.table(cdata) <- result.table
compcodeR:::package.version(cdata) <- paste('phylolm,', packageVersion('phylolm'))
compcodeR:::package.version(cdata) <- paste('limma,', packageVersion('limma'))
compcodeR:::analysis.date(cdata) <- date()
compcodeR:::method.names(cdata) <- list('short.name' = 'phylolm', 'full.name' = 'phylolm3.54.22.6.4.TMM.OUfixedRoot.me.lengthNorm.TPM.dataTrans.log2')
is.valid <- compcodeR:::check_compData_results(cdata)
if (!(is.valid == TRUE)) stop('Not a valid phyloCompData result object.')
saveRDS(cdata, '/Users/marinetognia/InterspeciesDEMethods/code/2023-05-16_2023-05-16_simulations_chen2019_results/real_tree_rodents_BM_with_lengths_3_0.8_1_1_phylolm.TPM.log2.OUfixedRoot.rds')
print(paste('Unique data set ID:', compcodeR:::info.parameters(cdata)$uID))
table(cdata@result.table$adjpvalue<0.05)
#Donnees incluant les gènes différentiellement exprimés pour la méthode phylolm_OUfixedRoot
# data_csv <- "data_phylolm_OUfixedRoot.csv"
# df <- data.frame(Genes = rownames(cdata@result.table)[cdata@result.table$adjpvalue<0.05] )
# write.csv(df, file = data_csv, row.names = FALSE)
# file.rename(from = "/Users/marinetognia/InterspeciesDEMethods/code/data_phylolm_OUfixedRoot.csv", to = "/Users/marinetognia/InterspeciesDEMethods/code/data_real/data_phylolm_OUfixedRoot.csv")
```
Il y a 649 gènes différentiellement exprimés pour la méthode phylolm_OUfixedRoot.
```{r}
library(UpSetR)
library(here)
#Données evemodel
data1 <- read.csv(here("data_real","evemodel_emp.csv"))
data2 <- read.csv(here("data_real","data_evemodel.csv"))
data15 <- read.csv(here("data_real","data_evemodel_emp_primates.csv"))
data16 <- read.csv(here("data_real","data_evemodel_primates.csv"))
#Données phylolm
data3 <- read.csv(here("data_real","data_phylolm_BM.csv"))
data4 <- read.csv(here("data_real","data_phylolm_OUfixedRoot.csv"))
data17 <- read.csv(here("data_real","data_phylolm_BM_primates.csv"))
data18 <- read.csv(here("data_real","data_phylolm_OUfixedRoot_primates.csv"))
#Données limma
data5 <- read.csv(here("data_real","data_limma.no_trend.no_blocks.csv"))
data6 <- read.csv(here("data_real","data_limma.no_trend.with_blocks.csv"))
data19 <- read.csv(here("data_real","data_limma.no_trend.no_blocks_primates.csv"))
data20 <- read.csv(here("data_real","data_limma.no_trend.with_blocks_primates.csv"))
#data7 <- read.csv(here("data_real","data_limma.with_trend.no_blocks.csv"))
#data8 <- read.csv(here("data_real", "data_limma.with_trend.with_blocks.csv"))
#Données Phylolimma
#data9 <- read.csv(here("data_real", "data_phylolimma.no_trend.regCor_FALSE.csv"))
data10 <- read.csv(here("data_real","data_phylolimma.no_trend.regCor_TRUE.csv"))
data21 <- read.csv(here("data_real","data_phylolimma.no_trend.regCor_TRUE_primates.csv"))
#data11 <- read.csv(here("data_real","data_phylolimma.with_trend.regCor_FALSE.csv"))
#data12 <- read.csv(here("data_real","data_phylolimma.with_trend.regCor_TRUE.csv"))
#data13 <- read.csv(here("data_real","data_phylolimma.OUfixedRoot.no_trend.regCor_FALSE.csv"))
data14 <- read.csv(here("data_real","data_phylolimma.OUfixedRoot.no_trend.regCor_TRUE.csv"))
data22 <- read.csv(here("data_real","data_phylolimma.OUfixedRoot.no_trend.regCor_TRUE_primates.csv"))
#data15 <- read.csv(here("data_real","data_phylolimma.OUfixedRoot.with_trend.regCor_FALSE.csv"))
#data16 <- read.csv(here("data_real","data_phylolimma.OUfixedRoot.with_trend.regCor_TRUE.csv"))
# Créer un objet upSet à partir des données rodents
x = list (
evemodel_emp = data1$Genes,
evemodel = data2$Genes,
phylolm_OU = data4$Genes,
limma.no_trend.no_blocks = data5$Genes,
limma.no_trend.with_blocks = data6$Genes,
#limma.with_trend.no_blocks = filtered_data7,
#limma.with_trend.with_blocks = filtered_data8,
#phylolimma.BM.no_trend.regCor_FALSE = filtered_data_9,
phylolimma.BM.no_trend.regCor_TRUE = data10$Genes,
#phylolimma.BM.with_trend.regCor_FALSE = filtered_data_11,
#phylolimma.BM.with_trend.regCor_TRUE = filtered_data_12,
#phylolimma.OU.no_trend.regCor_FALSE = filtered_data_13,
phylolimma.OU.no_trend.regCor_TRUE = data14$Genes
#phylolimma.OU.with_trend.regCor_FALSE = filtered_data_15,
#phylolimma.OU.with_trend.regCor_TRUE = filtered_data_16
)
# Créer un objet upSet à partir des données primates
x1 = list (
evemodel_emp = data15$Genes,
evemodel = data16$Genes,
phylolm_OU = data18$Genes,
limma.no_trend.no_blocks = data19$Genes,
limma.no_trend.with_blocks = data20$Genes,
phylolimma.BM.no_trend.regCor_TRUE = data21$Genes,
phylolimma.OU.no_trend.regCor_TRUE = data22$Genes
)
#graphique UpSet données rodents
upset(fromList(x), nsets = 16, point.size = 1.2,line.size = 0.5, text.scale = 0.6, order.by = "freq")
#graphique UpSet données primates
upset(fromList(x1), nsets = 10, point.size = 1.2,line.size = 0.5, text.scale = 0.6, order.by = "freq")
```