diff --git a/code/for_report/inference_analyze.R b/code/for_report/inference_analyze.R index fa39496..dac8d12 100644 --- a/code/for_report/inference_analyze.R +++ b/code/for_report/inference_analyze.R @@ -9,6 +9,7 @@ library("kableExtra") library("stringr") library("here") library("tikzDevice") +options(tikzDocumentDeclaration = "\\documentclass[10pt]{standalone}") meanse <- function(x, ...) { @@ -145,7 +146,7 @@ for (model in c("sep", "iid", "pi", "rho", "pirho")) { caption = paste0( "\\label{subtab:ari_per_model_", model, "}Quality metrics for ", - ifelse(model != "sep", paste0(model_name, "$\\text{-}colBiSBM$"), "$sep\\text{-}BiSBM$") + ifelse(model != "sep", paste0(model_name, "$\\text{-colBiSBM}$"), "$sep\\text{-BiSBM}$") ), col.names = kable_ari_colnames, format = "latex" @@ -159,7 +160,7 @@ for (model in c("sep", "iid", "pi", "rho", "pirho")) { caption = paste0( "\\label{subtab:blocrecov_per_model_", model, "}Bloc recovery for ", - ifelse(model != "sep", paste0(model_name, "$\\text{-}colBiSBM$"), "$sep\\text{-}BiSBM$") + ifelse(model != "sep", paste0(model_name, "$\\text{-colBiSBM}$"), "$sep\\text{-BiSBM}$") ), col.names = kable_blocrecov_colnames, format = "latex" @@ -173,7 +174,7 @@ for (model in c("sep", "iid", "pi", "rho", "pirho")) { gsub("\\begin{table}", "\\begin{subtable}{\\textwidth}", x = _, fixed = TRUE) |> gsub("\\end{table}", "\\end{subtable}", x = _, fixed = TRUE) cat("", - "\\begin{table}[!htb]", + "\\begin{table}[H]", "\\centering", paste0("\\caption{\\label{tab:inference_results_", model, "}Inference results for ", model_name, "}"), both_kables, @@ -239,10 +240,11 @@ if (!dir.exists(output_tikz_folder)) { tikz( file = file.path(output_tikz_folder, "model-proportions.tex"), width = 4L, - height = 3L + height = 3L, + standAlone = TRUE ) levels(proportion_preferred_data$preferred_model) <- c( - "sep", "iid", "$\\pi$", "$\\rho$", + "sep", "$iid$", "$\\pi$", "$\\rho$", "$\\pi\\rho$" ) plot <- proportion_preferred_data |> @@ -261,7 +263,11 @@ plot <- proportion_preferred_data |> xlab("$\\epsilon_{\\alpha}$") + ylab("Preferred model proportions") + theme_minimal() + - theme(aspect.ratio = 1L, axis.text.x = element_text(angle = -45, vjust = .5, hjust = 0)) + + theme( + aspect.ratio = 1L, + axis.text.x = element_text(angle = -45, vjust = .5, hjust = 0), + axis.text.y = element_text(size = 6) + ) + geom_col(position = "stack") print(plot) dev.off() @@ -289,7 +295,7 @@ ARI_type.labs <- c("$\\overline{\\mbox{ARI}}_d$", "$\\mbox{ARI}_d$") names(ARI_type.labs) <- c("mean", "double") levels(averaged_data$model) <- c( - "sep", "iid", "$\\pi$", "$\\rho$", + "sep", "$iid$", "$\\pi$", "$\\rho$", "$\\pi\\rho$" ) @@ -320,7 +326,8 @@ ARI_plots <- ggplot(averaged_data) + tikz( file = file.path(output_tikz_folder, "ari-plots.tex"), width = 6L, - height = 5L + height = 5L, + standAlone = TRUE ) print(ARI_plots) dev.off() diff --git a/code/for_report/model_selection_analyze.R b/code/for_report/model_selection_analyze.R index bd223b2..17f4d21 100644 --- a/code/for_report/model_selection_analyze.R +++ b/code/for_report/model_selection_analyze.R @@ -9,6 +9,7 @@ require("dplyr") require("here") require("ggh4x") require("tikzDevice") +options(tikzDocumentDeclaration = "\\documentclass[10pt]{standalone}") ## ----setup, echo = FALSE, include= FALSE---------------------------------------------------------------------------------------------------------------------------- @@ -101,15 +102,15 @@ table_df <- full_join(x = bloc_recovery_df, y = model_proportion_df) #| fig.height = 4, #| dpi=300 levels(model_comparison_eps_pi$preferred_model) <- c( - "sep", "iid", "$\\pi$", "$\\rho$", + "sep", "$iid$", "$\\pi$", "$\\rho$", "$\\pi\\rho$" ) levels(model_comparison_eps_rho$preferred_model) <- c( - "sep", "iid", "$\\pi$", "$\\rho$", + "sep", "$iid$", "$\\pi$", "$\\rho$", "$\\pi\\rho$" ) levels(model_comparison_eps_pi_rho$preferred_model) <- c( - "sep", "iid", "$\\pi$", "$\\rho$", + "$iid$", "$\\pi$", "$\\rho$", "$\\pi\\rho$" ) @@ -142,7 +143,8 @@ if (!dir.exists(output_tikz_folder)) { tikz( file = file.path(output_tikz_folder, "eps-pi-rho-preferred.tex"), width = 6, - height = 5 + height = 5, + standAlone = TRUE ) print(plot_pi_rho) dev.off() diff --git a/code/for_report/na_robustness_analyze.R b/code/for_report/na_robustness_analyze.R index a80b85c..4871b40 100644 --- a/code/for_report/na_robustness_analyze.R +++ b/code/for_report/na_robustness_analyze.R @@ -6,9 +6,11 @@ library("dplyr") library("stringr") library("knitr") library("kableExtra") +library("vctrs") library("stringr") library("here") library("tikzDevice") +options(tikzDocumentDeclaration = "\\documentclass[10pt]{standalone}") meanse <- function(x, ...) { @@ -45,41 +47,39 @@ results_df <- do.call(rbind, lapply(matching_filenames, readRDS)) |> auc_df <- results_df |> select(-contains(c("ari", "elapsed_secs"))) |> - mutate(auc_diff = auc_colBiSBM - auc_LBM) |> + pivot_longer( + cols = c(auc_LBM, auc_colBiSBM), + values_to = "auc", + names_prefix = "auc_", + names_to = "method" + ) |> + mutate(model = ifelse(method == "LBM", paste0("sep-", model), model)) |> mutate_at(vars(model, struct), as.factor) |> mutate(model = forcats::fct_relevel( model, - "iid", "pi", "rho", "pirho" - )) -# pivot_longer( -# cols = c(auc_LBM, auc_colBiSBM), -# values_to = "auc", -# names_prefix = "auc_", -# names_to = "method" -# ) |> + "iid", "sep-iid", "pi", "sep-pi", "rho", "sep-rho", "pirho", "sep-pirho" + )) |> + filter(prop_NAs <= 0.7) -auc_df_summarised <- auc_df |> - group_by(prop_NAs, model, struct) |> - summarise_at(vars(auc_diff), list("auc_mean" = mean, "auc_sd" = sd)) - -levels(auc_df$model) <- c( - "$iid$", "$\\pi$", "$\\rho$", - "$\\pi\\rho$" -) +levels(auc_df[["model"]]) <- levels(auc_df[["model"]]) |> + str_replace(fixed("iid"), "$iid$") |> + str_replace(fixed("pirho"), "$\\pi\\rho$") |> + str_replace("pi$", "$\\\\pi$") |> + str_replace("rho$", "$\\\\rho$") auc_plot <- ggplot(auc_df) + - aes(x = factor(prop_NAs), y = auc_diff, fill = model) + + aes(x = factor(prop_NAs), y = auc, fill = model, color = model) + geom_boxplot(notch = TRUE) + - geom_hline(yintercept = 0, color = "red", linetype = "dashed") + - labs(x = "$p_{\\texttt{NA}}$", y = "$\\Delta\\mbox{AUC}$") + - scale_fill_okabe_ito(order = 2:9) + + labs(x = "$p_{\\texttt{NA}}$", y = "AUC") + + scale_fill_okabe_ito(order = vec_interleave(seq(2, 5), seq(6, 9)), alpha = 0.5) + + scale_color_okabe_ito(order = vec_interleave(seq(2, 5), seq(6, 9))) + guides( fill = guide_legend(title = "Model"), color = guide_legend(title = "Model") ) + facet_grid(vars(struct)) + theme_minimal() - +auc_plot output_tikz_folder <- here( "mia-rapport-2024", "tikz", "simulations", "na_robustness" @@ -89,56 +89,57 @@ if (!dir.exists(output_tikz_folder)) { } tikz( - file = file.path(output_tikz_folder, "auc-model.tex"), width = 5L, - height = 2.5 + file = file.path(output_tikz_folder, "auc-model.tex"), width = 6.5, + height = 3.5, + standAlone = TRUE ) print(auc_plot) dev.off() ## ARI plots ari_df <- results_df |> select(-contains(c("auc", "elapsed_secs"))) |> + group_by(prop_NAs, model) |> + pivot_longer( + cols = c(arirow_LBM:aricol_colBiSBM), + values_to = "ari", names_to = c("dim", "method"), + names_pattern = "(row|col)_(LBM|colBiSBM)" + ) |> + mutate(model = ifelse(method == "LBM", paste0("sep-", model), model)) |> mutate_at(vars(model, struct), as.factor) |> mutate(model = forcats::fct_relevel( model, - "iid", "pi", "rho", "pirho" + "iid", "sep-iid", "pi", "sep-pi", "rho", "sep-rho", "pirho", "sep-pirho" )) |> - group_by(prop_NAs, model) |> - mutate( - arirow_diff = arirow_colBiSBM - arirow_LBM, - aricol_diff = aricol_colBiSBM - aricol_LBM - ) |> - select(-c(arirow_LBM:aricol_colBiSBM)) |> - pivot_longer( - cols = c(arirow_diff:aricol_diff), - values_to = "ari_diff", names_to = c("dim"), - names_pattern = "(row|col)" - ) + filter(prop_NAs <= 0.7) -dim.labs <- c("$d = 1$", "$d = 2$") +levels(ari_df[["model"]]) <- levels(ari_df[["model"]]) |> + str_replace(fixed("iid"), "$iid$") |> + str_replace(fixed("pirho"), "$\\pi\\rho$") |> + str_replace("pi$", "$\\\\pi$") |> + str_replace("rho$", "$\\\\rho$") + + +dim.labs <- c("Rows : $d = 1$", "Columns : $d = 2$") names(dim.labs) <- c("row", "col") -levels(ari_df$model) <- c( - "$iid$", "$\\pi$", "$\\rho$", - "$\\pi\\rho$" -) - ari_plot <- ggplot(ari_df) + - aes(x = factor(prop_NAs), y = ari_diff, fill = model) + + aes(x = factor(prop_NAs), y = ari, fill = model, color = model) + geom_boxplot(notch = TRUE) + - geom_hline(yintercept = 0, color = "red", linetype = "dashed") + - labs(x = "$p_{\\texttt{NA}}$", y = "$\\Delta\\mbox{ARI}$") + + labs(x = "$p_{\\texttt{NA}}$", y = "ARI") + guides( fill = guide_legend(title = "Model"), color = guide_legend(title = "Model") ) + - scale_fill_okabe_ito(order = 2:9) + + scale_fill_okabe_ito(order = vec_interleave(seq(2, 5), seq(6, 9)), alpha = 0.5) + + scale_color_okabe_ito(order = vec_interleave(seq(2, 5), seq(6, 9))) + facet_grid(struct ~ dim, labeller = labeller(dim = dim.labs)) + theme_minimal() + theme(axis.text.x = element_text(size = 5)) tikz( - file = file.path(output_tikz_folder, "ari-dim-model.tex"), width = 5.5, - height = 2.5 + file = file.path(output_tikz_folder, "ari-dim-model.tex"), width = 6.5, + height = 4, + standAlone = TRUE ) print(ari_plot) dev.off() diff --git a/code/for_report/network_clustering_analyze.R b/code/for_report/network_clustering_analyze.R index 3d1365e..c321cf1 100644 --- a/code/for_report/network_clustering_analyze.R +++ b/code/for_report/network_clustering_analyze.R @@ -9,6 +9,7 @@ library("kableExtra") library("stringr") library("here") library("tikzDevice") +options(tikzDocumentDeclaration = "\\documentclass[10pt]{standalone}") @@ -46,11 +47,14 @@ df_netclust$model <- df_netclust$model |> ari_plot <- ggplot(df_netclust) + aes(x = as.factor(epsilon), y = ARI) + scale_color_okabe_ito(order = 2L:9L) + - scale_fill_okabe_ito(order = 2L:9L) + + scale_fill_okabe_ito(order = 2L:9L, alpha = 0.5) + xlab("$\\epsilon$") + - guides(fill = guide_legend(title = "Model")) + + guides( + fill = guide_legend(title = "Model"), + color = guide_legend(title = "Model") + ) + ylab("ARI of the clustering") + - geom_boxplot(aes(fill = model), notch = TRUE) + + geom_boxplot(aes(fill = model, color = model), notch = TRUE) + theme_minimal() output_tikz_folder <- here( @@ -64,7 +68,8 @@ if (!dir.exists(output_tikz_folder)) { tikz( file = file.path(output_tikz_folder, "ari-clustering.tex"), width = 5L, - height = 3L + height = 3L, + standAlone = TRUE ) print(ari_plot) dev.off() diff --git a/mia-rapport-2024 b/mia-rapport-2024 index cc77bcc..fae7c80 160000 --- a/mia-rapport-2024 +++ b/mia-rapport-2024 @@ -1 +1 @@ -Subproject commit cc77bcc7fc6d75293ecf5791aad75d7d11be0287 +Subproject commit fae7c807e64a1ffb73b0f3b37ed44f5c4379aa84