diff --git a/simulation_RL_V2.R b/simulation_RL_V2.R index edc4bef..93f92a5 100644 --- a/simulation_RL_V2.R +++ b/simulation_RL_V2.R @@ -43,6 +43,255 @@ options <- list( ) # Vulnerable ) +#' This function is the actual runner for the simulation with the params +#' provided by other functions that will prepare the parameters to be run by +#' this one +simulation_runner_RL <- function(n_choices, options, params, model_name = "undefined") { + # Nombre d'options + n_arms <- length(options) + + # Initialisation + Q_values <- rep(0, n_arms) + Q_values_history <- matrix(NA_real_, nrow = n_choices, ncol = n_arms) + colnames(Q_values_history) <- paste0("Q", seq_len(n_arms)) + + choices_history <- integer(n_choices) + rewards_history <- numeric(n_choices) + probs_history <- matrix(NA_real_, nrow = n_choices, ncol = n_arms) + colnames(probs_history) <- paste0("p", seq_len(n_arms)) + + # Récupération des paramètres depuis la liste params + alphas <- params$alphas + forgets <- params$forgets + lambdas <- params$lambdas + rhos <- params$rhos + + # Normaliser les formats: si scalar, étendre à n_arms + expand_param <- function(x, default = 0) { + if (is.null(x)) { + return(rep(default, n_arms)) + } + if (length(x) == 1) { + return(rep(unname(x), n_arms)) + } + if (!is.null(names(x)) && all(grepl("^lambda_?", names(x)))) { + # ordered lambda_1..lambda_n + v <- as.numeric(x) + return(v[1:n_arms]) + } + return(as.numeric(x)[1:n_arms]) + } + + lambda_vec <- expand_param(lambdas, default = 1) + + # forgets may be named 'forget' or per-arm + forget_vec <- expand_param(forgets, default = 0) + + # Alphas more complexe: can be a single 'alpha', two (alpha_loss, alpha_gain) or vectors per arm + # We store separate gain and loss vectors for easy lookup + if (!is.null(alphas)) { + if (!is.null(names(alphas)) && "alpha" %in% names(alphas) && length(alphas) == 1) { + alpha_gain_vec <- alpha_loss_vec <- rep(unname(alphas["alpha"]), n_arms) + } else if (!is.null(names(alphas)) && all(c("alpha_loss", "alpha_gain") %in% names(alphas)) && length(alphas) == 2) { + alpha_loss_vec <- rep(unname(alphas["alpha_loss"]), n_arms) + alpha_gain_vec <- rep(unname(alphas["alpha_gain"]), n_arms) + } else if (!is.null(names(alphas)) && any(grepl("^alpha_gain", names(alphas)))) { + # per-arm alpha_gain_1..4 and alpha_loss_1..4 + # fallback to numeric + alpha_gain_vec <- expand_param(alphas[grepl("gain", names(alphas))], default = 0.1) + alpha_loss_vec <- expand_param(alphas[grepl("loss", names(alphas))], default = 0.1) + } else if (length(alphas) == n_arms) { + # assume same for gain and loss if vector provided + alpha_gain_vec <- alpha_loss_vec <- as.numeric(alphas) + } else { + alpha_gain_vec <- alpha_loss_vec <- rep(as.numeric(alphas[1]), n_arms) + } + } else { + alpha_gain_vec <- alpha_loss_vec <- rep(0.1, n_arms) + } + + # rhos: named vector with rho_BS and rho_JP optionally + rho_JP_val <- 0 + rho_BS_val <- 0 + if (!is.null(rhos)) { + if (!is.null(names(rhos)) && "rho_JP" %in% names(rhos)) rho_JP_val <- as.numeric(rhos["rho_JP"]) + if (!is.null(names(rhos)) && "rho_BS" %in% names(rhos)) rho_BS_val <- as.numeric(rhos["rho_BS"]) + } + + # Simulation loop + for (t in seq_len(n_choices)) { + # Compute subjective values + V_values <- lambda_vec * Q_values + + # Add rhos according to the simulation file option mapping: + # option1 = Antifragile (JP possible) + # option2 = Robust + # option3 = Fragile (BS possible) + # option4 = Vulnerable (both) + if (!is.null(rhos)) { + V_values[1] <- V_values[1] + rho_JP_val + if (n_arms >= 3) V_values[3] <- V_values[3] + rho_BS_val + if (n_arms >= 4) V_values[4] <- V_values[4] + rho_BS_val + rho_JP_val + } + + # Softmax (numerical stability) + V_max <- max(V_values) + exp_V <- exp(V_values - V_max) + probs <- exp_V / sum(exp_V) + probs <- pmax(probs, 1e-10) + probs <- probs / sum(probs) + + # Draw choice + choice <- sample(seq_len(n_arms), size = 1, prob = probs) + + # Draw reward according to option structure + opt <- options[[choice]] + u <- runif(1) + jp_p <- ifelse(is.null(opt$p_jp), 0, opt$p_jp) + bs_p <- ifelse(is.null(opt$p_bs), 0, opt$p_bs) + + if (u < jp_p) { + reward <- ifelse(is.null(opt$jp), 0, opt$jp) + } else if (u < jp_p + bs_p) { + reward <- ifelse(is.null(opt$bs), 0, opt$bs) + } else { + # normal outcome: either gain or loss + if (runif(1) < 0.5) { + reward <- opt$gain[t] + } else { + reward <- opt$loss[t] + } + } + + # Record probabilities, choice and reward + probs_history[t, ] <- probs + choices_history[t] <- choice + rewards_history[t] <- reward + + # Select learning rate + if (reward >= 0) { + alpha_used <- alpha_gain_vec[choice] + } else { + alpha_used <- alpha_loss_vec[choice] + } + + # Q update + prediction_error <- reward - Q_values[choice] + Q_values[choice] <- Q_values[choice] + alpha_used * prediction_error + + # Forgetting for non-chosen arms + not_chosen <- setdiff(seq_len(n_arms), choice) + Q_values[not_chosen] <- Q_values[not_chosen] * (1 - forget_vec[not_chosen]) + + # Save Q history (after update) + Q_values_history[t, ] <- Q_values + } + + # Convert histories to data.frame for output + choices_df <- tibble::tibble( + trial = seq_len(n_choices), + choice = choices_history, + reward = rewards_history + ) + + probs_df <- as.data.frame(probs_history) + probs_df$trial <- seq_len(n_choices) + + Q_history_df <- as.data.frame(Q_values_history) + Q_history_df$trial <- seq_len(n_choices) + + # Calcul de la proportion cumulée des choix pour chaque option au cours du temps + proportions_data <- data.frame( + Iteration = 1:n_choices, + Antifragile = cumsum(choices_history == 1) / 1:n_choices, + Robust = cumsum(choices_history == 2) / 1:n_choices, + Fragil = cumsum(choices_history == 3) / 1:n_choices, + Vulnerable = cumsum(choices_history == 4) / 1:n_choices + ) + + result <- list( + model = model_name, + params = params, + choices = choices_df, + probs = probs_df, + Q_history = Q_history_df, + proportions_data = proportions_data + ) + + return(result) +} + +simulation_homogeneous_RL <- function(n_choices, options, alpha, forget, lambda) { + # Preparing the param list for the simulation runner + params <- list( + alphas = c("alpha" = alpha), + forgets = c("forget" = forget), + lambdas = c("lambda" = lambda) + ) + + results <- simulation_runner_RL(n_choices = n_choices, options = options, params = params, model_name = "HOMOGENEOUS") + return(results) +} + +simulation_gain_loss_RL <- function(n_choices, options, alpha_loss, alpha_gain, forget, lambda) { + params <- list( + alphas = c("alpha_loss" = alpha_loss, "alpha_gain" = alpha_gain), + forgets = c("forget" = forget), + lambdas = c("lambda" = lambda) + ) + results <- simulation_runner_RL(n_choices = n_choices, options = options, params = params, model_name = "GAIN_LOSS") + return(results) +} + +simulation_biased_RL <- function(n_choices, options, alpha_loss, alpha_gain, forgets_vec, lambdas_vec) { + params <- list( + alphas = c("alpha_loss" = alpha_loss, "alpha_gain" = alpha_gain), + forgets = lambdas_vec, # here user may pass full vector as forgets_vec + lambdas = lambdas_vec + ) + results <- simulation_runner_RL(n_choices = n_choices, options = options, params = params, model_name = "BIASED") + return(results) +} + +simulation_ree_biased_simple_RL <- function( + n_choices, + options, + alpha_l, alpha_g, + rho_BS, rho_JP, + forget, lambda) { + # Preparing the param list for the simulation runner + params <- list( + alphas = c("alpha_loss" = alpha_l, "alpha_gain" = alpha_g), + forgets = c("forget" = forget), + lambdas = c("lambda" = lambda), + rhos = c("rho_BS" = rho_BS, "rho_JP" = rho_JP) + ) + + results <- simulation_runner_RL(n_choices = n_choices, options = options, params = params, model_name = "REE_BIASED_SIMPLE") + return(results) +} + +simulation_ree_learning_simple_RL <- function(n_choices, options, alpha1, alpha2, alpha3, alpha4, forget, lambda) { + params <- list( + alphas = c(alpha1, alpha2, alpha3, alpha4), + forgets = c("forget" = forget), + lambdas = c("lambda" = lambda) + ) + results <- simulation_runner_RL(n_choices = n_choices, options = options, params = params, model_name = "REE_LEARNING_SIMPLE") + return(results) +} + +simulation_ree_learning_biased_simple_RL <- function(n_choices, options, alpha1, alpha2, alpha3, alpha4, forget, lambda, rho_BS, rho_JP) { + params <- list( + alphas = c(alpha1, alpha2, alpha3, alpha4), + forgets = c("forget" = forget), + lambdas = c("lambda" = lambda), + rhos = c("rho_BS" = rho_BS, "rho_JP" = rho_JP) + ) + results <- simulation_runner_RL(n_choices = n_choices, options = options, params = params, model_name = "REE_LEARNING_BIASED_SIMPLE") + return(results) +} + simulation_agentRL <- function(alpha_g, alpha_l, lambda_g, lambda_l, fg, fl, n_choices, options) { # Initialisation des Q-values pour chaque option (gains et pertes séparés) Q1_gain <- 0 @@ -196,45 +445,83 @@ simulation_agentRL <- function(alpha_g, alpha_l, lambda_g, lambda_l, fg, fl, n_c return(result) } +compute_TSREE <- function(proportions_data) { + TSREE <- 1 + proportions_data$Antifragile - proportions_data$Fragil + return(TSREE) +} + +compute_OSSREE <- function(proportions_data) { + OSSREE <- proportions_data$Vulnerable - proportions_data$Robust + return(OSSREE) +} + +plot_TSREE_OSSREE <- function(proportions_data) { + OSSREE <- compute_OSSREE(proportions_data) + TSREE <- compute_TSREE(proportions_data) + plot(OSSREE, TSREE, + col = "darkblue", cex = 2, xlim = c(-1, 1), ylim = c(0, 2), type = "l", + xlab = "OSSREE", ylab = "TSREE", main = "Evolution of TSREE and OSSREE over trials" + ) + lines(c(0, 1, 0, -1, 0), c(0, 1, 2, 1, 0)) + lines(c(0, 0), c(0, 2), lty = 2) + lines(c(-1, 1), c(1, 1), lty = 2) +} + +plot_mean_TSREE_OSSREE_one_agent <- function(proportions_data) { + OSSREE <- compute_OSSREE(proportions_data) + TSREE <- compute_TSREE(proportions_data) + mean_OSSREE <- mean(OSSREE) + mean_TSREE <- mean(TSREE) + plot(mean_OSSREE, mean_TSREE, + col = "purple", cex = 2, pch = "+", xlim = c(-1, 1), ylim = c(0, 2), type = "p", + xlab = "OSSREE", ylab = "TSREE", main = "Mean TSREE and OSSREE over trials" + ) + lines(c(0, 1, 0, -1, 0), c(0, 1, 2, 1, 0)) + lines(c(0, 0), c(0, 2), lty = 2) + lines(c(-1, 1), c(1, 1), lty = 2) +} + +plot_choices_proportions <- function(proportions_data) { + # Conversion des données pour ggplot + proportions_long <- reshape2::melt(proportions_data, + id.vars = "Iteration", + variable.name = "Option", value.name = "Proportion" + ) + + # Tracé du graphique + ggplot(proportions_long, aes(x = Iteration, y = Proportion, color = Option)) + + geom_line(size = 1.2) + + labs( + title = "Proportion of simulated choices through trials", + x = "trials", + y = "Proportion of choices" + ) + + scale_color_manual(values = c( + "Antifragile" = "blue", "Robust" = "red", + "Fragil" = "green", "Vulnerable" = "purple" + )) + + theme_minimal() + + theme(legend.title = element_blank()) +} + #### pour un agent RL -result <- simulation_agentRL(alpha_g, alpha_l, lambda_g, lambda_l, fg, fl, n_choices, options) -proportions_data <- result$proportions_data -# Conversion des données pour ggplot -proportions_long <- reshape2::melt(proportions_data, - id.vars = "Iteration", - variable.name = "Option", value.name = "Proportion" +result <- simulation_ree_learning_biased_simple_RL( + n_choices = n_choices, + options = options, alpha1 = 0.5, alpha2 = 0.5, alpha3 = 0.5, alpha4 = 0.5, forget = 0.1, lambda = 1, rho_BS = 0, rho_JP = 0 ) -# Tracé du graphique -ggplot(proportions_long, aes(x = Iteration, y = Proportion, color = Option)) + - geom_line(size = 1.2) + - labs( - title = "Proportion of simulated choices through trials", - x = "trials", - y = "Proportion of choices" - ) + - scale_color_manual(values = c( - "Antifragile" = "blue", "Robust" = "red", - "Fragil" = "green", "Vulnerable" = "purple" - )) + - theme_minimal() + - theme(legend.title = element_blank()) +proportions_data <- result$proportions_data +plot_TSREE_OSSREE(proportions_data) +plot_mean_TSREE_OSSREE_one_agent(proportions_data) +plot_choices_proportions(proportions_data) + ### pour plusieurs agents RL -n_agent <- 100 +n_agent <- 1000 result_multi_agent <- lapply(1:n_agent, function(i) { - # Paramètres d'apprentissage (indépendants pour chaque option) - alpha_g <- rep(0.9, 4) # Taux d'apprentissage pour les gains (pour chaque option) - alpha_l <- rep(0.9, 4) # Taux d'apprentissage pour les pertes (pour chaque option) - lambda_g <- rep(1, 4) # Poids pour les gains (individuel pour chaque option) - lambda_l <- rep(1, 4) # Poids pour les pertes (individuel pour chaque option) - fg <- rep(0.9, 4) # Facteurs d'oubli gains (spécifique pour chaque option) remplace les alpha pour les options non choisi - fl <- rep(0.9, 4) # Facteurs d'oubli pertes (spécifique pour chaque option) remplace les alpha pour les options non choisi - - n_choices <- 1000 # Nombre total de choix - + n_choices <- 400 # Nombre total de choix # Paramètres des options (récompenses) options <- list( option1 = list( @@ -258,10 +545,12 @@ result_multi_agent <- lapply(1:n_agent, function(i) { jp = 3000, bs = -3000, p_jp = 0.05, p_bs = 0.05 ) ) - - result <- simulation_agentRL(alpha_g, alpha_l, lambda_g, lambda_l, fg, fl, n_choices, options) - result$parameters <- list(alpha_g = alpha_g, alpha_l = alpha_l, lambda_g = lambda_g, lambda_l = lambda_l, fg = fg, fl = fl) - result$option <- options + result <- simulation_ree_learning_biased_simple_RL( + n_choices = n_choices, + options = options, + alpha1 = 0.5, alpha2 = 0.5, alpha3 = 0.5, alpha4 = 0.5, + forget = 0.2, lambda = 2, rho_BS = -1, rho_JP = 1 + ) return(result) })