Working sequential LBM for a given Q
This commit is contained in:
parent
3095f64b99
commit
14adda1a36
1 changed files with 71 additions and 41 deletions
|
|
@ -6,15 +6,7 @@ library(stringr)
|
|||
library(sbm)
|
||||
library(blockmodels)
|
||||
|
||||
the_data <- import_biom("data/chaillou/chaillou.biom")
|
||||
|
||||
per_taxa_network <- collapse_otu_at_taxo(the_data)
|
||||
|
||||
# Detect if first rank (higher) is fully collapsed
|
||||
if (nrow(per_taxa_network[[1]]) <= 1L) {
|
||||
message("The first network has only one row. Removing it.")
|
||||
per_taxa_network <- per_taxa_network[-1]
|
||||
}
|
||||
the_data <- import_biom("data/mach/kinetic.biom")
|
||||
|
||||
# fit_res <- lapply(per_taxa_network, function(network) {
|
||||
# withTimeout(
|
||||
|
|
@ -31,42 +23,80 @@ if (nrow(per_taxa_network[[1]]) <= 1L) {
|
|||
# return(fit)
|
||||
# })
|
||||
|
||||
model <- BM_poisson(
|
||||
bm_propagate_taus_max_ICL <- function(phyloseq_data) {
|
||||
# Prepare
|
||||
per_taxa_network <- collapse_otu_at_taxo(phyloseq_data)
|
||||
|
||||
removed_root <- FALSE
|
||||
# Detect if first rank (higher) is fully collapsed
|
||||
if (nrow(per_taxa_network[[1]]) <= 1L) {
|
||||
message("The first network has only one row. Removing it.")
|
||||
removed_root <- TRUE
|
||||
per_taxa_network <- per_taxa_network[-1]
|
||||
}
|
||||
result_list <- list()
|
||||
# Init
|
||||
mean_diff_vec <- c()
|
||||
PL_per_nb_vec <- c()
|
||||
max_rank_id <- tax_table(phyloseq_data) |> ncol()
|
||||
rank_id <- 2
|
||||
net_id <- ifelse(removed_root, rank_id - 1, rank_id)
|
||||
model <- BM_poisson(
|
||||
membership_type = "LBM",
|
||||
adj = per_taxa_network[[1]],
|
||||
verbosity = 0,
|
||||
adj = per_taxa_network[[net_id]], # Account for the root
|
||||
verbosity = 2,
|
||||
plotting = "",
|
||||
ncores = 1L
|
||||
)
|
||||
model$estimate()
|
||||
# Here we extract the memberships
|
||||
best_model_memberships <- model$memberships[[which.max(model$ICL)]]
|
||||
next_cc <- list(Z1 = best_model_memberships$Z1, Z2 = best_model_memberships$Z2)
|
||||
)
|
||||
model$estimate()
|
||||
result_list <- append(result_list, model)
|
||||
# Here we extract the memberships
|
||||
best_model_memberships <- model$memberships[[which.max(model$ICL)]]
|
||||
next_cc <- list(Z1 = best_model_memberships$Z1, Z2 = best_model_memberships$Z2)
|
||||
|
||||
# Here we dispatch the memberships for the OTU
|
||||
## Propagate the rownames
|
||||
rank_id <- 3
|
||||
# Here we dispatch the memberships for the OTU
|
||||
## Propagate the rownames
|
||||
while (rank_id < max_rank_id) {
|
||||
rownames(next_cc$Z1) <- rownames(per_taxa_network[[net_id]])
|
||||
rank_id <- rank_id + 1
|
||||
net_id <- ifelse(removed_root, rank_id - 1, rank_id)
|
||||
next_Z1 <- propagate_taus(tau_matrix = next_cc$Z1, physeq = phyloseq_data, taxrank = phyloseq::rank_names(phyloseq_data)[rank_id])
|
||||
|
||||
## Noising of the next_Z1
|
||||
dZ1 <- dim(next_Z1)
|
||||
nZ1 <- dZ1[1] * dZ1[2]
|
||||
next_Z1 <- next_Z1 + matrix(rnorm(n = nZ1, mean = 0, sd = 0.01), nrow = dZ1[1], ncol = dZ1[2])
|
||||
|
||||
## Normalizing by rows
|
||||
C_Z1 <- rowSums(next_Z1)
|
||||
next_Z1 <- next_Z1 / C_Z1
|
||||
|
||||
next_cc$Z1 <- next_Z1
|
||||
|
||||
|
||||
rownames(next_cc$Z1) <- rownames(per_taxa_network[[rank_id - 2]])
|
||||
next_Z1 <- propagate_taus(tau_matrix = next_cc$Z1, physeq = the_data, taxrank = phyloseq::rank_names(the_data)[rank_id])
|
||||
|
||||
## Noising of the next_Z1
|
||||
dZ1 <- dim(next_Z1)
|
||||
nZ1 <- dZ1[1] * dZ1[2]
|
||||
next_Z1 <- next_Z1 + matrix(rnorm(n = nZ1, mean = 0, sd = 0.001), nrow = dZ1[1], ncol = dZ1[2])
|
||||
|
||||
## Normalizing by rows
|
||||
C_Z1 <- rowSums(next_Z1)
|
||||
next_Z1 <- next_Z1 / C_Z1
|
||||
|
||||
next_cc$Z1 <- next_Z1
|
||||
|
||||
next_res <- blockmodels:::dispatcher(
|
||||
next_res <- blockmodels:::dispatcher(
|
||||
membership_name = "LBM",
|
||||
membership_init = next_cc,
|
||||
model_name = "poisson",
|
||||
network = list(adjacency = per_taxa_network[[2]]), real_EM = TRUE
|
||||
)
|
||||
next_cc <- next_res[["membership"]][c("Z1", "Z2")]
|
||||
rank_id <- rank_id + 1
|
||||
network = list(adjacency = per_taxa_network[[net_id]]), real_EM = TRUE
|
||||
) # account for the removed root
|
||||
result_list <- append(result_list, next_res)
|
||||
mean_diff_vec[paste0(rank_id - 1, "->", rank_id)] <- mean((next_res$membership$Z1 - next_Z1)^2)
|
||||
cat("Mean diff across ranks:\n")
|
||||
print(sqrt(mean_diff_vec))
|
||||
next_cc <- next_res[["membership"]][c("Z1", "Z2")]
|
||||
PL_per_nb_vec[paste0(rank_id - 1, "->", rank_id)] <- next_res$PL / (nrow(next_res$membership$Z1) * nrow(next_res$membership$Z2))
|
||||
cat("PL/nZ1*nZ2: \n")
|
||||
print(PL_per_nb_vec)
|
||||
}
|
||||
return(result_list)
|
||||
}
|
||||
|
||||
mach_out <- bm_propagate_taus_max_ICL(phyloseq_data = the_data)
|
||||
|
||||
chaillou_out <- bm_propagate_taus_max_ICL(phyloseq_data = import_biom("data/chaillou/chaillou.biom"))
|
||||
|
||||
ravel_out <- bm_propagate_taus_max_ICL(phyloseq_data = import_biom("data/ravel/ravel.biom"))
|
||||
|
||||
# To init a BM model I need to provide memberships and ICL
|
||||
# And to compute with the dispatcher the previous values to init the models with ICL and memberships
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue