From 78d46c5be5ee51da66d959ff813952d9b05d541a Mon Sep 17 00:00:00 2001
From: Louis Lacoste <louis.lacoste@agroparistech.fr>
Date: Sat, 13 Dec 2025 15:31:02 +0100
Subject: [PATCH] Init sim R

---
 simulation_RL_V2.R | 307 +++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 307 insertions(+)
 create mode 100644 simulation_RL_V2.R

diff --git a/simulation_RL_V2.R b/simulation_RL_V2.R
new file mode 100644
index 0000000..edc4bef
--- /dev/null
+++ b/simulation_RL_V2.R
@@ -0,0 +1,307 @@
+###################### simulation q learning #############################
+# Install and load required libraries
+# Packages nécessaires
+if (!requireNamespace("dplyr", quietly = TRUE)) install.packages("dplyr")
+if (!requireNamespace("ggplot2", quietly = TRUE)) install.packages("ggplot2")
+library(dplyr)
+library(ggplot2)
+library(tidyr)
+
+######################## selon l'article rat ############################
+
+# Paramètres d'apprentissage (indépendants pour chaque option)
+alpha_g <- rep(0.8, 4) # Taux d'apprentissage pour les gains (pour chaque option)
+alpha_l <- rep(0.8, 4) # Taux d'apprentissage pour les pertes (pour chaque option)
+lambda_g <- rep(1, 4) # Poids pour les gains (individuel pour chaque option)
+lambda_l <- rep(1, 4) # Poids pour les pertes (individuel pour chaque option)
+fl <- rep(0.8, 4) # Facteurs d'oubli pertes (spécifique pour chaque option) remplace les alpha pour les options non choisi
+fg <- rep(0.8, 4) # Facteurs d'oubli gains (spécifique pour chaque option) remplace les alpha pour les options non choisi
+
+n_choices <- 500 # Nombre total de choix
+
+# Paramètres des options (récompenses)
+options <- list(
+  option1 = list(
+    gain = sample(3:4, n_choices, replace = TRUE),
+    loss = sample(-9:-8, n_choices, replace = TRUE),
+    jp = 3000, bs = 0, p_jp = 0.01, p_bs = 0
+  ), # Antifragile
+  option2 = list(
+    gain = sample(8:9, n_choices, replace = TRUE),
+    loss = sample(-9:-8, n_choices, replace = TRUE),
+    jp = 0, bs = 0, p_jp = 0, p_bs = 0
+  ), # Robuste
+  option3 = list(
+    gain = sample(8:9, n_choices, replace = TRUE),
+    loss = sample(-3:-4, n_choices, replace = TRUE),
+    jp = 0, bs = -3000, p_jp = 0, p_bs = 0.01
+  ), # Fragile
+  option4 = list(
+    gain = sample(3:4, n_choices, replace = TRUE),
+    loss = sample(-3:-4, n_choices, replace = TRUE),
+    jp = 3000, bs = -3000, p_jp = 0.01, p_bs = 0.01
+  ) # Vulnerable
+)
+
+simulation_agentRL <- function(alpha_g, alpha_l, lambda_g, lambda_l, fg, fl, n_choices, options) {
+  # Initialisation des Q-values pour chaque option (gains et pertes séparés)
+  Q1_gain <- 0
+  Q2_gain <- 0
+  Q3_gain <- 0
+  Q4_gain <- 0
+  Q1_loss <- 0
+  Q2_loss <- 0
+  Q3_loss <- 0
+  Q4_loss <- 0
+
+  # Historique des choix, outcome et Q value
+  choices_history <- integer(n_choices)
+  rewards_history <- numeric(n_choices)
+  Q1_gain_history <- numeric(n_choices)
+  Q2_gain_history <- numeric(n_choices)
+  Q3_gain_history <- numeric(n_choices)
+  Q4_gain_history <- numeric(n_choices)
+  Q1_loss_history <- numeric(n_choices)
+  Q2_loss_history <- numeric(n_choices)
+  Q3_loss_history <- numeric(n_choices)
+  Q4_loss_history <- numeric(n_choices)
+
+  # Simulation du processus d'apprentissage
+  for (t in 1:n_choices) {
+    # Calcul des valeurs V pour chaque option
+    V1 <- lambda_g[1] * Q1_gain + lambda_l[1] * Q1_loss
+    V2 <- lambda_g[2] * Q2_gain + lambda_l[2] * Q2_loss
+    V3 <- lambda_g[3] * Q3_gain + lambda_l[3] * Q3_loss
+    V4 <- lambda_g[4] * Q4_gain + lambda_l[4] * Q4_loss
+    print(c(V1, V2, V3, V4))
+
+    # Calcul des valeurs exponentielles de chaque option
+    exp_V1 <- exp(V1)
+    if (is.infinite(exp_V1)) {
+      exp_V1 <- .Machine$double.xmax
+    }
+    exp_V2 <- exp(V2)
+    if (is.infinite(exp_V2)) {
+      exp_V2 <- .Machine$double.xmax
+    }
+    exp_V3 <- exp(V3)
+    if (is.infinite(exp_V3)) {
+      exp_V3 <- .Machine$double.xmax
+    }
+    exp_V4 <- exp(V4)
+    if (is.infinite(exp_V4)) {
+      exp_V4 <- .Machine$double.xmax
+    }
+
+    # Somme des valeurs exponentielles
+    sum_exp_V <- exp_V1 + exp_V2 + exp_V3 + exp_V4
+
+    # Probabilités pour chaque option
+    p1 <- exp_V1 / sum_exp_V
+    p2 <- exp_V2 / sum_exp_V
+    p3 <- exp_V3 / sum_exp_V
+    p4 <- exp_V4 / sum_exp_V
+
+    # Création du vecteur de probabilités
+    probabilities <- c(p1, p2, p3, p4)
+    print(probabilities)
+
+    # Choix d'une option en fonction des probabilités / ici c'est là ou je pourrais ajouter une boucle if avec epsilon greedy
+    choice <- sample(1:4, 1, prob = probabilities)
+    choices_history[t] <- choice
+    # enregistre les Q value
+    Q1_gain_history[t] <- Q1_gain
+    Q2_gain_history[t] <- Q2_gain
+    Q3_gain_history[t] <- Q3_gain
+    Q4_gain_history[t] <- Q4_gain
+    Q1_loss_history[t] <- Q1_loss
+    Q2_loss_history[t] <- Q2_loss
+    Q3_loss_history[t] <- Q3_loss
+    Q4_loss_history[t] <- Q4_loss
+
+    # Sélection de l'option choisie et calcul de la récompense
+    selected_option <- options[[paste0("option", choice)]] # ou juste choice normalement ça devrait marcher et me prendre l'indice correspondant
+
+    reward <- if (runif(1) < selected_option$p_jp) {
+      selected_option$jp # Gain extrême (JP)
+    } else if (runif(1) < selected_option$p_bs) {
+      selected_option$bs # Perte extrême (BS)
+    } else if (runif(1) < 0.5) {
+      selected_option$gain[t] # Gain normal
+    } else {
+      selected_option$loss[t] # Perte normale
+    }
+    rewards_history[t] <- reward
+
+    # Mise à jour des Q-values pour l'option choisie
+    if (choice == 1) {
+      if (reward > 0) {
+        Q1_gain <- Q1_gain + alpha_g[1] * (reward - Q1_gain)
+      } else {
+        Q1_loss <- Q1_loss + alpha_l[1] * (reward - Q1_loss)
+      }
+    } else if (choice == 2) {
+      if (reward > 0) {
+        Q2_gain <- Q2_gain + alpha_g[2] * (reward - Q2_gain)
+      } else {
+        Q2_loss <- Q2_loss + alpha_l[2] * (reward - Q2_loss)
+      }
+    } else if (choice == 3) {
+      if (reward > 0) {
+        Q3_gain <- Q3_gain + alpha_g[3] * (reward - Q3_gain)
+      } else {
+        Q3_loss <- Q3_loss + alpha_l[3] * (reward - Q3_loss)
+      }
+    } else if (choice == 4) {
+      if (reward > 0) {
+        Q4_gain <- Q4_gain + alpha_g[4] * (reward - Q4_gain)
+      } else {
+        Q4_loss <- Q4_loss + alpha_l[4] * (reward - Q4_loss)
+      }
+    }
+
+    # Mise à jour des Q-values pour les options non choisies avec facteur d'oubli
+    if (choice != 1) {
+      Q1_gain <- Q1_gain * (1 - fg[1])
+      Q1_loss <- Q1_loss * (1 - fl[1])
+    }
+    if (choice != 2) {
+      Q2_gain <- Q2_gain * (1 - fg[2])
+      Q2_loss <- Q2_loss * (1 - fl[2])
+    }
+    if (choice != 3) {
+      Q3_gain <- Q3_gain * (1 - fg[3])
+      Q3_loss <- Q3_loss * (1 - fl[3])
+    }
+    if (choice != 4) {
+      Q4_gain <- Q4_gain * (1 - fg[4])
+      Q4_loss <- Q4_loss * (1 - fl[4])
+    }
+  }
+
+  # Calcul de la proportion cumulée des choix pour chaque option au cours du temps
+  proportions_data <- data.frame(
+    Iteration = 1:n_choices,
+    Antifragile = cumsum(choices_history == 1) / 1:n_choices,
+    Robust = cumsum(choices_history == 2) / 1:n_choices,
+    Fragil = cumsum(choices_history == 3) / 1:n_choices,
+    Vulnerable = cumsum(choices_history == 4) / 1:n_choices
+  )
+  result <- list(
+    proportions_data = proportions_data, rewards_history = rewards_history, choices_history = choices_history,
+    Q1_gain_history = Q1_gain_history, Q2_gain_history = Q2_gain_history, Q3_gain_history = Q3_gain_history, Q4_gain_history = Q4_gain_history,
+    Q1_loss_history = Q1_loss_history, Q2_loss_history = Q2_loss_history, Q3_loss_history = Q3_loss_history, Q4_loss_history = Q4_loss_history
+  )
+
+  return(result)
+}
+
+#### pour un agent RL
+result <- simulation_agentRL(alpha_g, alpha_l, lambda_g, lambda_l, fg, fl, n_choices, options)
+proportions_data <- result$proportions_data
+# Conversion des données pour ggplot
+proportions_long <- reshape2::melt(proportions_data,
+  id.vars = "Iteration",
+  variable.name = "Option", value.name = "Proportion"
+)
+
+# Tracé du graphique
+ggplot(proportions_long, aes(x = Iteration, y = Proportion, color = Option)) +
+  geom_line(size = 1.2) +
+  labs(
+    title = "Proportion of simulated choices through trials",
+    x = "trials",
+    y = "Proportion of choices"
+  ) +
+  scale_color_manual(values = c(
+    "Antifragile" = "blue", "Robust" = "red",
+    "Fragil" = "green", "Vulnerable" = "purple"
+  )) +
+  theme_minimal() +
+  theme(legend.title = element_blank())
+
+### pour plusieurs agents RL
+
+n_agent <- 100
+
+result_multi_agent <- lapply(1:n_agent, function(i) {
+  # Paramètres d'apprentissage (indépendants pour chaque option)
+  alpha_g <- rep(0.9, 4) # Taux d'apprentissage pour les gains (pour chaque option)
+  alpha_l <- rep(0.9, 4) # Taux d'apprentissage pour les pertes (pour chaque option)
+  lambda_g <- rep(1, 4) # Poids pour les gains (individuel pour chaque option)
+  lambda_l <- rep(1, 4) # Poids pour les pertes (individuel pour chaque option)
+  fg <- rep(0.9, 4) # Facteurs d'oubli gains (spécifique pour chaque option) remplace les alpha pour les options non choisi
+  fl <- rep(0.9, 4) # Facteurs d'oubli pertes (spécifique pour chaque option) remplace les alpha pour les options non choisi
+
+  n_choices <- 1000 # Nombre total de choix
+
+  # Paramètres des options (récompenses)
+  options <- list(
+    option1 = list(
+      gain = sample(3:4, n_choices, replace = TRUE),
+      loss = sample(-9:-8, n_choices, replace = TRUE),
+      jp = 3000, bs = 0, p_jp = 0.05, p_bs = 0
+    ),
+    option2 = list(
+      gain = sample(8:9, n_choices, replace = TRUE),
+      loss = sample(-9:-8, n_choices, replace = TRUE),
+      jp = 0, bs = 0, p_jp = 0, p_bs = 0
+    ),
+    option3 = list(
+      gain = sample(8:9, n_choices, replace = TRUE),
+      loss = sample(-3:-4, n_choices, replace = TRUE),
+      jp = 0, bs = -3000, p_jp = 0, p_bs = 0.05
+    ),
+    option4 = list(
+      gain = sample(3:4, n_choices, replace = TRUE),
+      loss = sample(-3:-4, n_choices, replace = TRUE),
+      jp = 3000, bs = -3000, p_jp = 0.05, p_bs = 0.05
+    )
+  )
+
+  result <- simulation_agentRL(alpha_g, alpha_l, lambda_g, lambda_l, fg, fl, n_choices, options)
+  result$parameters <- list(alpha_g = alpha_g, alpha_l = alpha_l, lambda_g = lambda_g, lambda_l = lambda_l, fg = fg, fl = fl)
+  result$option <- options
+
+  return(result)
+})
+
+proportions_data_multi_agent <- do.call("rbind", lapply(seq_along(result_multi_agent), function(i) {
+  # Ici on récupère les données de la simu i
+  current_proportions_data <- result_multi_agent[[i]]$proportions_data
+  current_proportions_data$repetition <- i
+  current_proportions_data
+}))
+
+## plot des comportements moyens TSREE OSREE
+TSREE <- rep(0, length(unique(proportions_data_multi_agent$repetition))) # axe des y
+OSSREE <- rep(0, length(unique(proportions_data_multi_agent$repetition))) # axe des x
+for (i in unique(proportions_data_multi_agent$repetition)) {
+  OSSREE[i] <- mean(proportions_data_multi_agent[proportions_data_multi_agent$repetition == i, ]$Vulnerable) - mean(proportions_data_multi_agent[proportions_data_multi_agent$repetition == i, ]$Robust) # f vulnérable - f robuste
+  TSREE[i] <- 1 + mean(proportions_data_multi_agent[proportions_data_multi_agent$repetition == i, ]$Antifragile) - mean(proportions_data_multi_agent[proportions_data_multi_agent$repetition == i, ]$Fragil) # 1 + f antifragile - f fragile
+}
+
+plot(OSSREE, TSREE, col = "darkblue", pch = "+", cex = 2, xlim = c(-1, 1), ylim = c(0, 2))
+lines(c(0, 1, 0, -1, 0), c(0, 1, 2, 1, 0))
+lines(c(0, 0), c(0, 2), lty = 2)
+lines(c(-1, 1), c(1, 1), lty = 2)
+
+# llabels=seq(1,length(OSSREE))
+# for (i in 1:length(OSSREE)){text(OSSREE[i],TSREE[i]-.1,llabels[i])}
+
+## plot des proportions en fonctions des trials
+summarised_proportions_data <- proportions_data_multi_agent %>%
+  group_by(Iteration) %>%
+  summarise_at(vars(Antifragile:Vulnerable), list(mean = mean, sd = sd)) %>%
+  pivot_longer(cols = Antifragile_mean:Vulnerable_sd, names_to = c("Option", ".value"), names_sep = "_") %>%
+  mutate(CI_Upper = mean + 1.96 * sd / sqrt(n_agent), CI_Lower = mean - 1.96 * sd / sqrt(n_agent))
+
+ggplot(summarised_proportions_data, aes(x = Iteration, y = mean, color = Option)) +
+  geom_line(size = 1) +
+  geom_ribbon(aes(ymin = CI_Lower, ymax = CI_Upper, fill = Option), alpha = 0.2) +
+  labs(
+    title = "Simulation of choices through trials",
+    x = "trials", y = "proportion of choices"
+  ) +
+  # xlim(0,50)+
+  theme_minimal()