From 78d46c5be5ee51da66d959ff813952d9b05d541a Mon Sep 17 00:00:00 2001 From: Louis Lacoste Date: Sat, 13 Dec 2025 15:31:02 +0100 Subject: [PATCH] Init sim R --- simulation_RL_V2.R | 307 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 307 insertions(+) create mode 100644 simulation_RL_V2.R diff --git a/simulation_RL_V2.R b/simulation_RL_V2.R new file mode 100644 index 0000000..edc4bef --- /dev/null +++ b/simulation_RL_V2.R @@ -0,0 +1,307 @@ +###################### simulation q learning ############################# +# Install and load required libraries +# Packages nécessaires +if (!requireNamespace("dplyr", quietly = TRUE)) install.packages("dplyr") +if (!requireNamespace("ggplot2", quietly = TRUE)) install.packages("ggplot2") +library(dplyr) +library(ggplot2) +library(tidyr) + +######################## selon l'article rat ############################ + +# Paramètres d'apprentissage (indépendants pour chaque option) +alpha_g <- rep(0.8, 4) # Taux d'apprentissage pour les gains (pour chaque option) +alpha_l <- rep(0.8, 4) # Taux d'apprentissage pour les pertes (pour chaque option) +lambda_g <- rep(1, 4) # Poids pour les gains (individuel pour chaque option) +lambda_l <- rep(1, 4) # Poids pour les pertes (individuel pour chaque option) +fl <- rep(0.8, 4) # Facteurs d'oubli pertes (spécifique pour chaque option) remplace les alpha pour les options non choisi +fg <- rep(0.8, 4) # Facteurs d'oubli gains (spécifique pour chaque option) remplace les alpha pour les options non choisi + +n_choices <- 500 # Nombre total de choix + +# Paramètres des options (récompenses) +options <- list( + option1 = list( + gain = sample(3:4, n_choices, replace = TRUE), + loss = sample(-9:-8, n_choices, replace = TRUE), + jp = 3000, bs = 0, p_jp = 0.01, p_bs = 0 + ), # Antifragile + option2 = list( + gain = sample(8:9, n_choices, replace = TRUE), + loss = sample(-9:-8, n_choices, replace = TRUE), + jp = 0, bs = 0, p_jp = 0, p_bs = 0 + ), # Robuste + option3 = list( + gain = sample(8:9, n_choices, replace = TRUE), + loss = sample(-3:-4, n_choices, replace = TRUE), + jp = 0, bs = -3000, p_jp = 0, p_bs = 0.01 + ), # Fragile + option4 = list( + gain = sample(3:4, n_choices, replace = TRUE), + loss = sample(-3:-4, n_choices, replace = TRUE), + jp = 3000, bs = -3000, p_jp = 0.01, p_bs = 0.01 + ) # Vulnerable +) + +simulation_agentRL <- function(alpha_g, alpha_l, lambda_g, lambda_l, fg, fl, n_choices, options) { + # Initialisation des Q-values pour chaque option (gains et pertes séparés) + Q1_gain <- 0 + Q2_gain <- 0 + Q3_gain <- 0 + Q4_gain <- 0 + Q1_loss <- 0 + Q2_loss <- 0 + Q3_loss <- 0 + Q4_loss <- 0 + + # Historique des choix, outcome et Q value + choices_history <- integer(n_choices) + rewards_history <- numeric(n_choices) + Q1_gain_history <- numeric(n_choices) + Q2_gain_history <- numeric(n_choices) + Q3_gain_history <- numeric(n_choices) + Q4_gain_history <- numeric(n_choices) + Q1_loss_history <- numeric(n_choices) + Q2_loss_history <- numeric(n_choices) + Q3_loss_history <- numeric(n_choices) + Q4_loss_history <- numeric(n_choices) + + # Simulation du processus d'apprentissage + for (t in 1:n_choices) { + # Calcul des valeurs V pour chaque option + V1 <- lambda_g[1] * Q1_gain + lambda_l[1] * Q1_loss + V2 <- lambda_g[2] * Q2_gain + lambda_l[2] * Q2_loss + V3 <- lambda_g[3] * Q3_gain + lambda_l[3] * Q3_loss + V4 <- lambda_g[4] * Q4_gain + lambda_l[4] * Q4_loss + print(c(V1, V2, V3, V4)) + + # Calcul des valeurs exponentielles de chaque option + exp_V1 <- exp(V1) + if (is.infinite(exp_V1)) { + exp_V1 <- .Machine$double.xmax + } + exp_V2 <- exp(V2) + if (is.infinite(exp_V2)) { + exp_V2 <- .Machine$double.xmax + } + exp_V3 <- exp(V3) + if (is.infinite(exp_V3)) { + exp_V3 <- .Machine$double.xmax + } + exp_V4 <- exp(V4) + if (is.infinite(exp_V4)) { + exp_V4 <- .Machine$double.xmax + } + + # Somme des valeurs exponentielles + sum_exp_V <- exp_V1 + exp_V2 + exp_V3 + exp_V4 + + # Probabilités pour chaque option + p1 <- exp_V1 / sum_exp_V + p2 <- exp_V2 / sum_exp_V + p3 <- exp_V3 / sum_exp_V + p4 <- exp_V4 / sum_exp_V + + # Création du vecteur de probabilités + probabilities <- c(p1, p2, p3, p4) + print(probabilities) + + # Choix d'une option en fonction des probabilités / ici c'est là ou je pourrais ajouter une boucle if avec epsilon greedy + choice <- sample(1:4, 1, prob = probabilities) + choices_history[t] <- choice + # enregistre les Q value + Q1_gain_history[t] <- Q1_gain + Q2_gain_history[t] <- Q2_gain + Q3_gain_history[t] <- Q3_gain + Q4_gain_history[t] <- Q4_gain + Q1_loss_history[t] <- Q1_loss + Q2_loss_history[t] <- Q2_loss + Q3_loss_history[t] <- Q3_loss + Q4_loss_history[t] <- Q4_loss + + # Sélection de l'option choisie et calcul de la récompense + selected_option <- options[[paste0("option", choice)]] # ou juste choice normalement ça devrait marcher et me prendre l'indice correspondant + + reward <- if (runif(1) < selected_option$p_jp) { + selected_option$jp # Gain extrême (JP) + } else if (runif(1) < selected_option$p_bs) { + selected_option$bs # Perte extrême (BS) + } else if (runif(1) < 0.5) { + selected_option$gain[t] # Gain normal + } else { + selected_option$loss[t] # Perte normale + } + rewards_history[t] <- reward + + # Mise à jour des Q-values pour l'option choisie + if (choice == 1) { + if (reward > 0) { + Q1_gain <- Q1_gain + alpha_g[1] * (reward - Q1_gain) + } else { + Q1_loss <- Q1_loss + alpha_l[1] * (reward - Q1_loss) + } + } else if (choice == 2) { + if (reward > 0) { + Q2_gain <- Q2_gain + alpha_g[2] * (reward - Q2_gain) + } else { + Q2_loss <- Q2_loss + alpha_l[2] * (reward - Q2_loss) + } + } else if (choice == 3) { + if (reward > 0) { + Q3_gain <- Q3_gain + alpha_g[3] * (reward - Q3_gain) + } else { + Q3_loss <- Q3_loss + alpha_l[3] * (reward - Q3_loss) + } + } else if (choice == 4) { + if (reward > 0) { + Q4_gain <- Q4_gain + alpha_g[4] * (reward - Q4_gain) + } else { + Q4_loss <- Q4_loss + alpha_l[4] * (reward - Q4_loss) + } + } + + # Mise à jour des Q-values pour les options non choisies avec facteur d'oubli + if (choice != 1) { + Q1_gain <- Q1_gain * (1 - fg[1]) + Q1_loss <- Q1_loss * (1 - fl[1]) + } + if (choice != 2) { + Q2_gain <- Q2_gain * (1 - fg[2]) + Q2_loss <- Q2_loss * (1 - fl[2]) + } + if (choice != 3) { + Q3_gain <- Q3_gain * (1 - fg[3]) + Q3_loss <- Q3_loss * (1 - fl[3]) + } + if (choice != 4) { + Q4_gain <- Q4_gain * (1 - fg[4]) + Q4_loss <- Q4_loss * (1 - fl[4]) + } + } + + # Calcul de la proportion cumulée des choix pour chaque option au cours du temps + proportions_data <- data.frame( + Iteration = 1:n_choices, + Antifragile = cumsum(choices_history == 1) / 1:n_choices, + Robust = cumsum(choices_history == 2) / 1:n_choices, + Fragil = cumsum(choices_history == 3) / 1:n_choices, + Vulnerable = cumsum(choices_history == 4) / 1:n_choices + ) + result <- list( + proportions_data = proportions_data, rewards_history = rewards_history, choices_history = choices_history, + Q1_gain_history = Q1_gain_history, Q2_gain_history = Q2_gain_history, Q3_gain_history = Q3_gain_history, Q4_gain_history = Q4_gain_history, + Q1_loss_history = Q1_loss_history, Q2_loss_history = Q2_loss_history, Q3_loss_history = Q3_loss_history, Q4_loss_history = Q4_loss_history + ) + + return(result) +} + +#### pour un agent RL +result <- simulation_agentRL(alpha_g, alpha_l, lambda_g, lambda_l, fg, fl, n_choices, options) +proportions_data <- result$proportions_data +# Conversion des données pour ggplot +proportions_long <- reshape2::melt(proportions_data, + id.vars = "Iteration", + variable.name = "Option", value.name = "Proportion" +) + +# Tracé du graphique +ggplot(proportions_long, aes(x = Iteration, y = Proportion, color = Option)) + + geom_line(size = 1.2) + + labs( + title = "Proportion of simulated choices through trials", + x = "trials", + y = "Proportion of choices" + ) + + scale_color_manual(values = c( + "Antifragile" = "blue", "Robust" = "red", + "Fragil" = "green", "Vulnerable" = "purple" + )) + + theme_minimal() + + theme(legend.title = element_blank()) + +### pour plusieurs agents RL + +n_agent <- 100 + +result_multi_agent <- lapply(1:n_agent, function(i) { + # Paramètres d'apprentissage (indépendants pour chaque option) + alpha_g <- rep(0.9, 4) # Taux d'apprentissage pour les gains (pour chaque option) + alpha_l <- rep(0.9, 4) # Taux d'apprentissage pour les pertes (pour chaque option) + lambda_g <- rep(1, 4) # Poids pour les gains (individuel pour chaque option) + lambda_l <- rep(1, 4) # Poids pour les pertes (individuel pour chaque option) + fg <- rep(0.9, 4) # Facteurs d'oubli gains (spécifique pour chaque option) remplace les alpha pour les options non choisi + fl <- rep(0.9, 4) # Facteurs d'oubli pertes (spécifique pour chaque option) remplace les alpha pour les options non choisi + + n_choices <- 1000 # Nombre total de choix + + # Paramètres des options (récompenses) + options <- list( + option1 = list( + gain = sample(3:4, n_choices, replace = TRUE), + loss = sample(-9:-8, n_choices, replace = TRUE), + jp = 3000, bs = 0, p_jp = 0.05, p_bs = 0 + ), + option2 = list( + gain = sample(8:9, n_choices, replace = TRUE), + loss = sample(-9:-8, n_choices, replace = TRUE), + jp = 0, bs = 0, p_jp = 0, p_bs = 0 + ), + option3 = list( + gain = sample(8:9, n_choices, replace = TRUE), + loss = sample(-3:-4, n_choices, replace = TRUE), + jp = 0, bs = -3000, p_jp = 0, p_bs = 0.05 + ), + option4 = list( + gain = sample(3:4, n_choices, replace = TRUE), + loss = sample(-3:-4, n_choices, replace = TRUE), + jp = 3000, bs = -3000, p_jp = 0.05, p_bs = 0.05 + ) + ) + + result <- simulation_agentRL(alpha_g, alpha_l, lambda_g, lambda_l, fg, fl, n_choices, options) + result$parameters <- list(alpha_g = alpha_g, alpha_l = alpha_l, lambda_g = lambda_g, lambda_l = lambda_l, fg = fg, fl = fl) + result$option <- options + + return(result) +}) + +proportions_data_multi_agent <- do.call("rbind", lapply(seq_along(result_multi_agent), function(i) { + # Ici on récupère les données de la simu i + current_proportions_data <- result_multi_agent[[i]]$proportions_data + current_proportions_data$repetition <- i + current_proportions_data +})) + +## plot des comportements moyens TSREE OSREE +TSREE <- rep(0, length(unique(proportions_data_multi_agent$repetition))) # axe des y +OSSREE <- rep(0, length(unique(proportions_data_multi_agent$repetition))) # axe des x +for (i in unique(proportions_data_multi_agent$repetition)) { + OSSREE[i] <- mean(proportions_data_multi_agent[proportions_data_multi_agent$repetition == i, ]$Vulnerable) - mean(proportions_data_multi_agent[proportions_data_multi_agent$repetition == i, ]$Robust) # f vulnérable - f robuste + TSREE[i] <- 1 + mean(proportions_data_multi_agent[proportions_data_multi_agent$repetition == i, ]$Antifragile) - mean(proportions_data_multi_agent[proportions_data_multi_agent$repetition == i, ]$Fragil) # 1 + f antifragile - f fragile +} + +plot(OSSREE, TSREE, col = "darkblue", pch = "+", cex = 2, xlim = c(-1, 1), ylim = c(0, 2)) +lines(c(0, 1, 0, -1, 0), c(0, 1, 2, 1, 0)) +lines(c(0, 0), c(0, 2), lty = 2) +lines(c(-1, 1), c(1, 1), lty = 2) + +# llabels=seq(1,length(OSSREE)) +# for (i in 1:length(OSSREE)){text(OSSREE[i],TSREE[i]-.1,llabels[i])} + +## plot des proportions en fonctions des trials +summarised_proportions_data <- proportions_data_multi_agent %>% + group_by(Iteration) %>% + summarise_at(vars(Antifragile:Vulnerable), list(mean = mean, sd = sd)) %>% + pivot_longer(cols = Antifragile_mean:Vulnerable_sd, names_to = c("Option", ".value"), names_sep = "_") %>% + mutate(CI_Upper = mean + 1.96 * sd / sqrt(n_agent), CI_Lower = mean - 1.96 * sd / sqrt(n_agent)) + +ggplot(summarised_proportions_data, aes(x = Iteration, y = mean, color = Option)) + + geom_line(size = 1) + + geom_ribbon(aes(ymin = CI_Lower, ymax = CI_Upper, fill = Option), alpha = 0.2) + + labs( + title = "Simulation of choices through trials", + x = "trials", y = "proportion of choices" + ) + + # xlim(0,50)+ + theme_minimal()