mia-stage-2024/code/applications/utils.R

#' Select the n most recent files
#'
#' @param data_folder The folder in which data files are located.
#' @param n The number of files to return. Defaults to 4.
#'
#' @details This function return the n most recent files and returns a
#' warning if the number wanted `n` is larger than the number of files.
#'
#' @return A vector of size `n` with the file path.
get_recent_files <- function(data_folder, n = 4, pattern = NULL) {
    files_info <- file.info(file.path(data_folder, list.files(data_folder,
        include.dirs = FALSE, pattern = pattern
    )))
    files_info[["filepath"]] <- file.path(data_folder, list.files(data_folder,
        include.dirs = FALSE, pattern = pattern
    ))
    files_info <- sort_by(files_info, files_info[["ctime"]], decreasing = TRUE)
    if (n > nrow(files_info)) {
        warning(
            "n = ", n,
            " is too large ! It should be at most ",
            nrow(files_info)
        )
    }
    return(head(files_info[["filepath"]], n = n))
}

#' Identify models
identify_models <- function(files_vec, pattern = "(iid|pirho|pi|rho)") {
    names(files_vec) <- stringr::str_extract(
        string = files_vec,
        pattern = pattern
    )
    return(files_vec)
}

build_graph_size_dataframe <- function(collection_list) {
    if (!is.list(collection_list)) {
        return(data.frame(
            collection_id = factor(1L),
            M = collection_list[["M"]],
            net_id = factor(collection_list[["net_id"]]),
            nr = collection_list[["n"]][[1]],
            nc = collection_list[["n"]][[2]],
            Qr = collection_list[["Q"]][[1]],
            Qc = collection_list[["Q"]][[2]]
        ))
    }
    do.call("rbind", lapply(seq_len(length(collection_list)), function(idx) {
        collection <- collection_list[[idx]]
        data.frame(
            collection_id = factor(idx),
            M = collection[["M"]],
            net_id = factor(collection[["net_id"]]),
            nr = collection[["n"]][[1]],
            nc = collection[["n"]][[2]],
            Qr = collection[["Q"]][[1]],
            Qc = collection[["Q"]][[2]]
        )
    }))
}

extract_clustering <- function(clustering) {
    partition <- colSBM::extract_best_partition(
        l = clustering,
        unnest = TRUE
    )
    if (!is.list(partition)) {
        partition <- list(partition)
    }
    out <- unlist(sapply(seq_len(length(partition)), function(idx) {
        clust_vec <- rep(idx, partition[[idx]][["M"]])
        names(clust_vec) <- partition[[idx]][["net_id"]]
        clust_vec
    }))
    if (is.matrix(out)) {
        nm <- rownames(out)
        out <- setNames(c(out), nm)
    }
    return(out)
}