Profiling wether max vem imports : IT DOES

This commit is contained in:
Louis Lacoste 2024-06-18 16:57:59 +02:00
parent bf06493174
commit a3a44e02a6
3 changed files with 747 additions and 0 deletions

View file

@ -0,0 +1,515 @@
<!DOCTYPE html>
<html xmlns="http://www.w3.org/1999/xhtml" lang="en" xml:lang="en"><head>
<meta charset="utf-8">
<meta name="generator" content="quarto-1.4.554">
<meta name="viewport" content="width=device-width, initial-scale=1.0, user-scalable=yes">
<title>Investigation sur limpact de max_vem_steps</title>
<style>
code{white-space: pre-wrap;}
span.smallcaps{font-variant: small-caps;}
div.columns{display: flex; gap: min(4vw, 1.5em);}
div.column{flex: auto; overflow-x: auto;}
div.hanging-indent{margin-left: 1.5em; text-indent: -1.5em;}
ul.task-list{list-style: none;}
ul.task-list li input[type="checkbox"] {
width: 0.8em;
margin: 0 0.8em 0.2em -1em; /* quarto-specific, see https://github.com/quarto-dev/quarto-cli/issues/4556 */
vertical-align: middle;
}
</style>
<script src="impact_of_vem_max_steps_files/libs/clipboard/clipboard.min.js"></script>
<script src="impact_of_vem_max_steps_files/libs/quarto-html/quarto.js"></script>
<script src="impact_of_vem_max_steps_files/libs/quarto-html/popper.min.js"></script>
<script src="impact_of_vem_max_steps_files/libs/quarto-html/tippy.umd.min.js"></script>
<script src="impact_of_vem_max_steps_files/libs/quarto-html/anchor.min.js"></script>
<link href="impact_of_vem_max_steps_files/libs/quarto-html/tippy.css" rel="stylesheet">
<link href="impact_of_vem_max_steps_files/libs/quarto-html/quarto-syntax-highlighting.css" rel="stylesheet" id="quarto-text-highlighting-styles">
<script src="impact_of_vem_max_steps_files/libs/bootstrap/bootstrap.min.js"></script>
<link href="impact_of_vem_max_steps_files/libs/bootstrap/bootstrap-icons.css" rel="stylesheet">
<link href="impact_of_vem_max_steps_files/libs/bootstrap/bootstrap.min.css" rel="stylesheet" id="quarto-bootstrap" data-mode="light">
<script src="https://polyfill.io/v3/polyfill.min.js?features=es6"></script>
<script src="https://cdn.jsdelivr.net/npm/mathjax@3/es5/tex-chtml-full.js" type="text/javascript"></script>
<script type="text/javascript">
const typesetMath = (el) => {
if (window.MathJax) {
// MathJax Typeset
window.MathJax.typeset([el]);
} else if (window.katex) {
// KaTeX Render
var mathElements = el.getElementsByClassName("math");
var macros = [];
for (var i = 0; i < mathElements.length; i++) {
var texText = mathElements[i].firstChild;
if (mathElements[i].tagName == "SPAN") {
window.katex.render(texText.data, mathElements[i], {
displayMode: mathElements[i].classList.contains('display'),
throwOnError: false,
macros: macros,
fleqn: false
});
}
}
}
}
window.Quarto = {
typesetMath
};
</script>
</head>
<body class="fullcontent">
<div id="quarto-content" class="page-columns page-rows-contents page-layout-article">
<main class="content" id="quarto-document-content">
<header id="title-block-header" class="quarto-title-block default">
<div class="quarto-title">
<h1 class="title">Investigation sur limpact de <code>max_vem_steps</code></h1>
</div>
<div class="quarto-title-meta">
</div>
</header>
<section id="générations-des-données" class="level2">
<h2 class="anchored" data-anchor-id="générations-des-données">Générations des données</h2>
<p>Générons les données avec une seed fixée (<span class="math inline">\(s_{net} = 0\)</span>)</p>
</section>
</main>
<!-- /main column -->
<script id="quarto-html-after-body" type="application/javascript">
window.document.addEventListener("DOMContentLoaded", function (event) {
const toggleBodyColorMode = (bsSheetEl) => {
const mode = bsSheetEl.getAttribute("data-mode");
const bodyEl = window.document.querySelector("body");
if (mode === "dark") {
bodyEl.classList.add("quarto-dark");
bodyEl.classList.remove("quarto-light");
} else {
bodyEl.classList.add("quarto-light");
bodyEl.classList.remove("quarto-dark");
}
}
const toggleBodyColorPrimary = () => {
const bsSheetEl = window.document.querySelector("link#quarto-bootstrap");
if (bsSheetEl) {
toggleBodyColorMode(bsSheetEl);
}
}
toggleBodyColorPrimary();
const icon = "";
const anchorJS = new window.AnchorJS();
anchorJS.options = {
placement: 'right',
icon: icon
};
anchorJS.add('.anchored');
const isCodeAnnotation = (el) => {
for (const clz of el.classList) {
if (clz.startsWith('code-annotation-')) {
return true;
}
}
return false;
}
const clipboard = new window.ClipboardJS('.code-copy-button', {
text: function(trigger) {
const codeEl = trigger.previousElementSibling.cloneNode(true);
for (const childEl of codeEl.children) {
if (isCodeAnnotation(childEl)) {
childEl.remove();
}
}
return codeEl.innerText;
}
});
clipboard.on('success', function(e) {
// button target
const button = e.trigger;
// don't keep focus
button.blur();
// flash "checked"
button.classList.add('code-copy-button-checked');
var currentTitle = button.getAttribute("title");
button.setAttribute("title", "Copied!");
let tooltip;
if (window.bootstrap) {
button.setAttribute("data-bs-toggle", "tooltip");
button.setAttribute("data-bs-placement", "left");
button.setAttribute("data-bs-title", "Copied!");
tooltip = new bootstrap.Tooltip(button,
{ trigger: "manual",
customClass: "code-copy-button-tooltip",
offset: [0, -8]});
tooltip.show();
}
setTimeout(function() {
if (tooltip) {
tooltip.hide();
button.removeAttribute("data-bs-title");
button.removeAttribute("data-bs-toggle");
button.removeAttribute("data-bs-placement");
}
button.setAttribute("title", currentTitle);
button.classList.remove('code-copy-button-checked');
}, 1000);
// clear code selection
e.clearSelection();
});
var localhostRegex = new RegExp(/^(?:http|https):\/\/localhost\:?[0-9]*\//);
var mailtoRegex = new RegExp(/^mailto:/);
var filterRegex = new RegExp('/' + window.location.host + '/');
var isInternal = (href) => {
return filterRegex.test(href) || localhostRegex.test(href) || mailtoRegex.test(href);
}
// Inspect non-navigation links and adorn them if external
var links = window.document.querySelectorAll('a[href]:not(.nav-link):not(.navbar-brand):not(.toc-action):not(.sidebar-link):not(.sidebar-item-toggle):not(.pagination-link):not(.no-external):not([aria-hidden]):not(.dropdown-item):not(.quarto-navigation-tool)');
for (var i=0; i<links.length; i++) {
const link = links[i];
if (!isInternal(link.href)) {
// undo the damage that might have been done by quarto-nav.js in the case of
// links that we want to consider external
if (link.dataset.originalHref !== undefined) {
link.href = link.dataset.originalHref;
}
}
}
function tippyHover(el, contentFn, onTriggerFn, onUntriggerFn) {
const config = {
allowHTML: true,
maxWidth: 500,
delay: 100,
arrow: false,
appendTo: function(el) {
return el.parentElement;
},
interactive: true,
interactiveBorder: 10,
theme: 'quarto',
placement: 'bottom-start',
};
if (contentFn) {
config.content = contentFn;
}
if (onTriggerFn) {
config.onTrigger = onTriggerFn;
}
if (onUntriggerFn) {
config.onUntrigger = onUntriggerFn;
}
window.tippy(el, config);
}
const noterefs = window.document.querySelectorAll('a[role="doc-noteref"]');
for (var i=0; i<noterefs.length; i++) {
const ref = noterefs[i];
tippyHover(ref, function() {
// use id or data attribute instead here
let href = ref.getAttribute('data-footnote-href') || ref.getAttribute('href');
try { href = new URL(href).hash; } catch {}
const id = href.replace(/^#\/?/, "");
const note = window.document.getElementById(id);
if (note) {
return note.innerHTML;
} else {
return "";
}
});
}
const xrefs = window.document.querySelectorAll('a.quarto-xref');
const processXRef = (id, note) => {
// Strip column container classes
const stripColumnClz = (el) => {
el.classList.remove("page-full", "page-columns");
if (el.children) {
for (const child of el.children) {
stripColumnClz(child);
}
}
}
stripColumnClz(note)
if (id === null || id.startsWith('sec-')) {
// Special case sections, only their first couple elements
const container = document.createElement("div");
if (note.children && note.children.length > 2) {
container.appendChild(note.children[0].cloneNode(true));
for (let i = 1; i < note.children.length; i++) {
const child = note.children[i];
if (child.tagName === "P" && child.innerText === "") {
continue;
} else {
container.appendChild(child.cloneNode(true));
break;
}
}
if (window.Quarto?.typesetMath) {
window.Quarto.typesetMath(container);
}
return container.innerHTML
} else {
if (window.Quarto?.typesetMath) {
window.Quarto.typesetMath(note);
}
return note.innerHTML;
}
} else {
// Remove any anchor links if they are present
const anchorLink = note.querySelector('a.anchorjs-link');
if (anchorLink) {
anchorLink.remove();
}
if (window.Quarto?.typesetMath) {
window.Quarto.typesetMath(note);
}
// TODO in 1.5, we should make sure this works without a callout special case
if (note.classList.contains("callout")) {
return note.outerHTML;
} else {
return note.innerHTML;
}
}
}
for (var i=0; i<xrefs.length; i++) {
const xref = xrefs[i];
tippyHover(xref, undefined, function(instance) {
instance.disable();
let url = xref.getAttribute('href');
let hash = undefined;
if (url.startsWith('#')) {
hash = url;
} else {
try { hash = new URL(url).hash; } catch {}
}
if (hash) {
const id = hash.replace(/^#\/?/, "");
const note = window.document.getElementById(id);
if (note !== null) {
try {
const html = processXRef(id, note.cloneNode(true));
instance.setContent(html);
} finally {
instance.enable();
instance.show();
}
} else {
// See if we can fetch this
fetch(url.split('#')[0])
.then(res => res.text())
.then(html => {
const parser = new DOMParser();
const htmlDoc = parser.parseFromString(html, "text/html");
const note = htmlDoc.getElementById(id);
if (note !== null) {
const html = processXRef(id, note);
instance.setContent(html);
}
}).finally(() => {
instance.enable();
instance.show();
});
}
} else {
// See if we can fetch a full url (with no hash to target)
// This is a special case and we should probably do some content thinning / targeting
fetch(url)
.then(res => res.text())
.then(html => {
const parser = new DOMParser();
const htmlDoc = parser.parseFromString(html, "text/html");
const note = htmlDoc.querySelector('main.content');
if (note !== null) {
// This should only happen for chapter cross references
// (since there is no id in the URL)
// remove the first header
if (note.children.length > 0 && note.children[0].tagName === "HEADER") {
note.children[0].remove();
}
const html = processXRef(null, note);
instance.setContent(html);
}
}).finally(() => {
instance.enable();
instance.show();
});
}
}, function(instance) {
});
}
let selectedAnnoteEl;
const selectorForAnnotation = ( cell, annotation) => {
let cellAttr = 'data-code-cell="' + cell + '"';
let lineAttr = 'data-code-annotation="' + annotation + '"';
const selector = 'span[' + cellAttr + '][' + lineAttr + ']';
return selector;
}
const selectCodeLines = (annoteEl) => {
const doc = window.document;
const targetCell = annoteEl.getAttribute("data-target-cell");
const targetAnnotation = annoteEl.getAttribute("data-target-annotation");
const annoteSpan = window.document.querySelector(selectorForAnnotation(targetCell, targetAnnotation));
const lines = annoteSpan.getAttribute("data-code-lines").split(",");
const lineIds = lines.map((line) => {
return targetCell + "-" + line;
})
let top = null;
let height = null;
let parent = null;
if (lineIds.length > 0) {
//compute the position of the single el (top and bottom and make a div)
const el = window.document.getElementById(lineIds[0]);
top = el.offsetTop;
height = el.offsetHeight;
parent = el.parentElement.parentElement;
if (lineIds.length > 1) {
const lastEl = window.document.getElementById(lineIds[lineIds.length - 1]);
const bottom = lastEl.offsetTop + lastEl.offsetHeight;
height = bottom - top;
}
if (top !== null && height !== null && parent !== null) {
// cook up a div (if necessary) and position it
let div = window.document.getElementById("code-annotation-line-highlight");
if (div === null) {
div = window.document.createElement("div");
div.setAttribute("id", "code-annotation-line-highlight");
div.style.position = 'absolute';
parent.appendChild(div);
}
div.style.top = top - 2 + "px";
div.style.height = height + 4 + "px";
div.style.left = 0;
let gutterDiv = window.document.getElementById("code-annotation-line-highlight-gutter");
if (gutterDiv === null) {
gutterDiv = window.document.createElement("div");
gutterDiv.setAttribute("id", "code-annotation-line-highlight-gutter");
gutterDiv.style.position = 'absolute';
const codeCell = window.document.getElementById(targetCell);
const gutter = codeCell.querySelector('.code-annotation-gutter');
gutter.appendChild(gutterDiv);
}
gutterDiv.style.top = top - 2 + "px";
gutterDiv.style.height = height + 4 + "px";
}
selectedAnnoteEl = annoteEl;
}
};
const unselectCodeLines = () => {
const elementsIds = ["code-annotation-line-highlight", "code-annotation-line-highlight-gutter"];
elementsIds.forEach((elId) => {
const div = window.document.getElementById(elId);
if (div) {
div.remove();
}
});
selectedAnnoteEl = undefined;
};
// Handle positioning of the toggle
window.addEventListener(
"resize",
throttle(() => {
elRect = undefined;
if (selectedAnnoteEl) {
selectCodeLines(selectedAnnoteEl);
}
}, 10)
);
function throttle(fn, ms) {
let throttle = false;
let timer;
return (...args) => {
if(!throttle) { // first call gets through
fn.apply(this, args);
throttle = true;
} else { // all the others get throttled
if(timer) clearTimeout(timer); // cancel #2
timer = setTimeout(() => {
fn.apply(this, args);
timer = throttle = false;
}, ms);
}
};
}
// Attach click handler to the DT
const annoteDls = window.document.querySelectorAll('dt[data-target-cell]');
for (const annoteDlNode of annoteDls) {
annoteDlNode.addEventListener('click', (event) => {
const clickedEl = event.target;
if (clickedEl !== selectedAnnoteEl) {
unselectCodeLines();
const activeEl = window.document.querySelector('dt[data-target-cell].code-annotation-active');
if (activeEl) {
activeEl.classList.remove('code-annotation-active');
}
selectCodeLines(clickedEl);
clickedEl.classList.add('code-annotation-active');
} else {
// Unselect the line
unselectCodeLines();
clickedEl.classList.remove('code-annotation-active');
}
});
}
const findCites = (el) => {
const parentEl = el.parentElement;
if (parentEl) {
const cites = parentEl.dataset.cites;
if (cites) {
return {
el,
cites: cites.split(' ')
};
} else {
return findCites(el.parentElement)
}
} else {
return undefined;
}
};
var bibliorefs = window.document.querySelectorAll('a[role="doc-biblioref"]');
for (var i=0; i<bibliorefs.length; i++) {
const ref = bibliorefs[i];
const citeInfo = findCites(ref);
if (citeInfo) {
tippyHover(citeInfo.el, function() {
var popup = window.document.createElement('div');
citeInfo.cites.forEach(function(cite) {
var citeDiv = window.document.createElement('div');
citeDiv.classList.add('hanging-indent');
citeDiv.classList.add('csl-entry');
var biblioDiv = window.document.getElementById('ref-' + cite);
if (biblioDiv) {
citeDiv.innerHTML = biblioDiv.innerHTML;
}
popup.appendChild(citeDiv);
});
return popup.innerHTML;
});
}
}
});
</script>
</div> <!-- /content -->
</body></html>

View file

@ -0,0 +1,62 @@
---
title: Investigation sur l'impact de `max_vem_steps`
format: html
execute:
echo: false
warning: true
---
```{r libraries}
library(colSBM)
library(here)
library(ggplot2)
```
```{r constants}
base_folder <- here("code", "results", "investigating", "vem_steps")
if (!dir.exists(base_folder)) {
dir.create(base_folder, recursive = TRUE)
}
net_seed <- 0
test_seeds <- c(12, 3)
epsilons <- c(0.1, 0.4)
vem_steps <- seq(10, 300, by = 40)
conditions <- expand.grid(
seeds = test_seeds,
epsilons = epsilons,
vem_steps = vem_steps
)
base_alpha <- matrix(rep(0.3, 9L), nrow = 3L)
pi <- c(0.3, 0.2, 0.5)
rho <- c(0.55, 0.15, 0.3)
M <- 10L
nr <- c(rep(30L, M / 2L), rep(95L, M / 2L))
nc <- c(rep(40L, M / 2L), rep(70L, M / 2L))
```
```{r}
df <- readRDS(file.path(base_folder, "impact_vem_1718196265.Rds"))
df$eps <- factor(df$eps)
df$seed <- factor(df$seed)
df$elapsed_time <- as.numeric(df$elapsed_time)
```
```{r ari}
ggplot(df) +
aes(x = max_vem_steps, y = ari, color = eps) +
geom_line(aes(linetype = seed)) +
geom_point()
```
```{r time}
ggplot(df) +
aes(x = max_vem_steps, y = elapsed_time, color = eps) +
geom_line(aes(linetype = seed)) +
geom_point()
```

View file

@ -0,0 +1,170 @@
library(colSBM)
library(aricode)
library(here)
base_folder <- here("code", "results", "investigating", "profiling_clustering")
if (!dir.exists(base_folder)) {
dir.create(base_folder, recursive = TRUE)
}
net_seed <- 0
test_seeds <- c(12, 3)
epsilons <- c(0.1, 0.4)
vem_steps <- seq(10, 300, by = 40)
conditions <- expand.grid(
seeds = test_seeds,
epsilons = epsilons,
vem_steps = vem_steps
)
base_alpha <- matrix(rep(0.3, 9L), nrow = 3L)
pi <- c(0.3, 0.2, 0.5)
rho <- c(0.55, 0.15, 0.3)
M <- 10L
nr <- c(rep(30L, M / 2L), rep(95L, M / 2L))
nc <- c(rep(40L, M / 2L), rep(70L, M / 2L))
generate_net <- function(eps, net_seed = 0) {
set.seed(net_seed)
as_alpha <- base_alpha + matrix(
c(
eps, -eps / 2L, -eps / 2L,
-eps / 2L, eps, -eps / 2L,
-eps / 2L, -eps / 2L, eps
),
nrow = 3L
)
cp_alpha <- base_alpha + matrix(
c(
3L * eps / 2L, eps, eps / 2L,
eps, eps / 2L, 0L,
eps / 2L, 0L, -eps / 2L
),
nrow = 3L
)
dis_alpha <- base_alpha + matrix(
c(
-eps / 2L, eps, eps,
eps, -eps / 2L, eps,
eps, eps, -eps / 2L
),
nrow = 3L
)
collection <- c(
generate_bipartite_collection(
nr = nr, nc = nc,
pi = pi, rho = rho,
alpha = as_alpha, M = M
),
generate_bipartite_collection(
nr = nr, nc = nc,
pi = pi, rho = rho,
alpha = cp_alpha, M = M
),
generate_bipartite_collection(
nr = nr, nc = nc,
pi = pi, rho = rho,
alpha = dis_alpha, M = M
)
)
names(collection) <- c(
0 + seq(0, M %/% 2), 0 + seq(M %/% 2 + 1, M - 1),
10 + seq(0, M %/% 2), 10 + seq(M %/% 2 + 1, M - 1),
20 + seq(0, M %/% 2), 20 + seq(M %/% 2 + 1, M - 1)
)
return(collection)
}
list_collections <- lapply(epsilons, function(eps) {
generate_net(eps = eps, net_seed = net_seed)
})
names(list_collections) <- epsilons
true_clustering <- c(rep(1, M), rep(2, M), rep(3, M))
begin_time <- format(Sys.time(), "%s")
tmp_folder <- file.path(base_folder, paste0("tmp", begin_time))
if (!dir.exists(tmp_folder)) {
dir.create(tmp_folder, recursive = TRUE)
}
results <- parallel::mclapply(seq_len(nrow(conditions)), function(idx) {
current_seed <- conditions[["seeds"]][idx]
eps <- conditions[["epsilons"]][idx]
max_vem_steps <- conditions[["vem_steps"]][idx]
message("Condition ", idx, " on ", nrow(conditions))
collection <- list_collections[[as.character(eps)]]
set.seed(current_seed)
start_time <- Sys.time()
clust <- clusterize_bipartite_networks(
netlist = collection, net_id = names(collection),
colsbm_model = "iid", fit_opts = list(max_vem_steps = max_vem_steps),
global_opts = list(
verbosity = 0L,
nb_cores = parallelly::availableCores(omit = 1L)
)
)
stop_time <- Sys.time()
elapsed_time <- stop_time - start_time
unlisted_best_partition <- extract_best_bipartite_partition(clust)
if (!is.list(unlisted_best_partition)) {
unlisted_best_partition <- list(unlisted_best_partition)
}
clustering_vec <- sort(unlist(lapply(seq_len(length(unlisted_best_partition)), function(idx) {
ids_nets <- as.numeric(unlisted_best_partition[[idx]]$net_id)
names(ids_nets) <- rep(idx, length(ids_nets))
ids_nets
})))
cluster_membership <- as.numeric(names(clustering_vec))
ari <- try(ARI(cluster_membership, true_clustering))
if (inherits(ari, "try-error")) {
ari <- NA
}
out <- data.frame(
eps = eps, seed = current_seed,
max_vem_steps = max_vem_steps,
ari = ari,
elapsed_time = elapsed_time,
start_time = start_time,
stop_time = stop_time,
clustering = matrix(
cluster_membership,
nrow = 1L
)
)
saveRDS(out,
file = file.path(
tmp_folder,
paste0("c_", idx, "_on_", nrow(conditions), ".Rds")
)
)
message("Finished condition ", idx)
out
},
mc.cores = parallelly::availableCores(omit = 1L)
)
to_save <- do.call(rbind, results)
filename_to_save <- paste0("impact_vem_", begin_time, ".Rds")
saveRDS(to_save, file = file.path(base_folder, filename_to_save))