human-microbiome-compendium/retrieve-data.R

52 lines
1.8 KiB
R

if ("MicroBioMap" %in% rownames(installed.packages()) == FALSE) {
cat("MicroBioMap package not found. Installing...\n")
if (!requireNamespace("BiocManager", quietly = TRUE)) {
install.packages("BiocManager")
}
BiocManager::install("blekhmanlab/MicroBioMap")
}
library(MicroBioMap)
library(countrycode)
library(igraph)
library(here)
library(readr)
setClassUnion("ExpData", c("matrix", "SummarizedExperiment"))
cpd <- getCompendium()
colData(cpd)$country <- countrycode(colData(cpd)$iso, "iso2c", "country.name")
projects <- unique(colData(cpd)$project)
save_folder <- here("data")
if (!dir.exists(save_folder)) {
dir.create(save_folder)
}
for (project in projects) {
cat("Processing project:", project, "\n")
# Filter the compendium for the current project
project_data <- cpd[, colData(cpd)$project == project]
matrix_data <- counts(project_data)
# Create a graph from the matrix data
g <- graph_from_biadjacency_matrix(matrix_data)
edge_list <- as_edgelist(g) |> as.data.frame()
colnames(edge_list) <- c("source", "target")
edge_list_path <- here(save_folder, paste0(project, "_edge_list.csv.gz"))
write_csv(edge_list, file = edge_list_path)
supinfo_path <- here(save_folder, paste0(project, "_supinfo.csv.gz"))
supinfo_df <- as.data.frame(colData(project_data))
write_csv(supinfo_df, file = supinfo_path)
}
list_matrices <- lapply(projects, function(project) {
project_data <- cpd[, colData(cpd)$project == project]
matrix_data <- counts(project_data)
matrix_data
})
names(list_matrices) <- projects
projects_supinfo <- colData(cpd)
projects_supinfo <- as.data.frame(projects_supinfo)
write_csv(projects_supinfo, file = here(save_folder, "projects_supinfo.csv.gz"))
saveRDS(list_matrices, file = here(save_folder, "list_matrices.rds"))