Init
This commit is contained in:
commit
94b4e8f836
4 changed files with 290 additions and 0 deletions
1
.gitignore
vendored
Normal file
1
.gitignore
vendored
Normal file
|
|
@ -0,0 +1 @@
|
|||
data/
|
||||
4
.lintr
Normal file
4
.lintr
Normal file
|
|
@ -0,0 +1,4 @@
|
|||
linters: linters_with_tags(tags = c("readability", "best_practices",
|
||||
"common_mistakes"),
|
||||
indentation_linter(indent = 4L)) # see vignette("lintr")
|
||||
encoding: "UTF-8"
|
||||
151
APT_PARCOURS.R
Normal file
151
APT_PARCOURS.R
Normal file
|
|
@ -0,0 +1,151 @@
|
|||
library(dplyr)
|
||||
|
||||
|
||||
# Charger les données avec deux colonnes
|
||||
|
||||
df_APT<- read.csv("Downloads/Suppl.csv",
|
||||
sep= ";",
|
||||
encoding = "UTF-8",
|
||||
# header = TRUE,
|
||||
na.strings = T,
|
||||
dec=","
|
||||
)
|
||||
|
||||
|
||||
## Enlever les lignes et colonnes vides
|
||||
|
||||
fun_enlever_lignes_vides <- function(df) {
|
||||
df <-filter(df,!(df$parcelle_id==""))
|
||||
|
||||
}
|
||||
|
||||
df_APT <- df_APT[!apply(is.na(df_APT) | df_APT == "", 1, all), ]
|
||||
df_APT <- df_APT[, 1:2]
|
||||
df_APT <- df_APT[!grepl("\\d", df_APT[,2]), ]
|
||||
|
||||
|
||||
## Enlever les accents
|
||||
|
||||
df_APT_sans_accents <- df_APT
|
||||
df_APT_sans_accents[,1] <- iconv(df_APT[,1], "UTF-8", "ASCII", sub = "")
|
||||
df_APT_sans_accents[,2] <- iconv(df_APT[,2], "UTF-8", "ASCII", sub = "")
|
||||
df_APT <- df_APT_sans_accents
|
||||
|
||||
|
||||
|
||||
## Copier les noms dans la colonne 2 jusqu'à un nouveau nom
|
||||
|
||||
df_APT <- df_APT
|
||||
|
||||
for (i in 2:nrow(df_APT)) {
|
||||
if (df_APT[i, 2] == "") {
|
||||
df_APT[i, 2] <- df_APT[i - 1, 2]
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
## Identifier les apprentis et créer 2 df apprentis et initaux car différents
|
||||
|
||||
df_APT$apprenti <- NA
|
||||
df_APT$apprenti[df_APT$X == "Accompagnement des apprentis"] <- "apprenti"
|
||||
df_APT$apprenti[df_APT$X == "Priodes d'apprentissage en entreprise, semestre 1"] <- "apprenti" # cas des 2A faux apprentis
|
||||
|
||||
|
||||
|
||||
df_APT <- df_APT %>%
|
||||
group_by(X.1) %>%
|
||||
mutate(apprenti = ifelse("apprenti" %in% apprenti, "apprenti", apprenti))
|
||||
df_APT$apprenti <- ifelse(is.na(df_APT$apprenti), "non-apprenti", df_APT$apprenti)
|
||||
|
||||
|
||||
df_APT_apprenti <- df_APT[df_APT$apprenti == "apprenti", ]
|
||||
df_APT_initiaux <- df_APT[df_APT$apprenti != "apprenti", ]
|
||||
|
||||
|
||||
|
||||
|
||||
# Liste des termes à exclure
|
||||
termes_a_exclure <- c("Ing - 1A", "Intitul du cours", "Approche professionnelle et sectorielle", "Approches des domaines", "Enjeux et dfis des sciences et technologies du vivant et de l'environnement", "Communication", "La recherche d'informations en sciences du vivant")
|
||||
|
||||
# Filtrer les lignes qui ne contiennent pas les termes spécifiés dans la colonne "X"
|
||||
df_final <- df_APT_initiaux %>%
|
||||
filter(!X %in% termes_a_exclure)
|
||||
|
||||
# Afficher le résultat final
|
||||
print(df_final)
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
## Les approches domaines de 1A
|
||||
|
||||
index_approches_domaines <- which(df_APT_initiaux[, 1] == "Approches des domaines")
|
||||
lignes_a_garder <- list()
|
||||
|
||||
for (i in index_approches_domaines) {
|
||||
lignes_a_garder[[i]] <- c((i - 4):(i - 1))
|
||||
}
|
||||
|
||||
df_APT_final <- df_APT_initiaux[-unlist(lignes_a_garder), ]
|
||||
|
||||
## Les MODULE INTEGRATIF de 1A
|
||||
|
||||
index_approches_domaines <- which(df_APT_final[, 1] == "MODULE INTEGRATIF")
|
||||
lignes_a_garder <- list()
|
||||
|
||||
for (i in index_approches_domaines) {
|
||||
lignes_a_garder[[i]] <- c((i - 12):(i - 1))
|
||||
}
|
||||
|
||||
df_APT_final <- df_APT_final[-unlist(lignes_a_garder), ]
|
||||
|
||||
|
||||
## DOMINANTE
|
||||
|
||||
index_approches_domaines <- which(df_APT_final[, 1] == "Ing - 2A")
|
||||
lignes_a_garder <- list()
|
||||
|
||||
for (i in index_approches_domaines) {
|
||||
lignes_a_garder[[i]] <- c((i - 44):(i - 1))
|
||||
}
|
||||
|
||||
df_APT_final <- df_APT_final[-unlist(lignes_a_garder), ]
|
||||
|
||||
|
||||
|
||||
|
||||
## LES UC à choix 2 A
|
||||
|
||||
index_approches_domaines <- which(df_APT_final[, 1] == "UE choix Semestre 1")
|
||||
lignes_a_garder <- list()
|
||||
|
||||
for (i in index_approches_domaines) {
|
||||
lignes_a_garder[[i]] <- c((i - 34):(i - 1))
|
||||
}
|
||||
|
||||
df_APT_final <- df_APT_final[-unlist(lignes_a_garder), ]
|
||||
|
||||
## LA SPE 3 A
|
||||
|
||||
index_approches_domaines <- which(df_APT_final[, 1] == "Ing - 3A")
|
||||
lignes_a_garder <- list()
|
||||
|
||||
for (i in index_approches_domaines) {
|
||||
lignes_a_garder[[i]] <- c((i - 3):(i - 1))
|
||||
}
|
||||
|
||||
test <- df_APT_final[-unlist(lignes_a_garder), ]
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
134
extract_data.R
Normal file
134
extract_data.R
Normal file
|
|
@ -0,0 +1,134 @@
|
|||
library(readxl)
|
||||
library(dplyr)
|
||||
library(tidyr)
|
||||
|
||||
# Supplement_diplome-1.xls ligne 4
|
||||
ligne_prenom <- 5
|
||||
colonne_prenom <- 50
|
||||
|
||||
folder_path <- file.path("data", "Bulletins promotion 2023")
|
||||
|
||||
df <- do.call("rbind", lapply(list.files(folder_path), function(filename) {
|
||||
current_data <- as.data.frame(read_excel(file.path(folder_path, filename), sheet = 1L, col_names = FALSE))
|
||||
if (ncol(current_data) == 63L) {
|
||||
return(current_data)
|
||||
}
|
||||
}))
|
||||
|
||||
|
||||
indices_numero_ine <- which(df == "Numéro INE", arr.ind = TRUE)
|
||||
decalage_ine <- c(1, 6)
|
||||
|
||||
# Indices Total ECTS validés
|
||||
which(df == "Total Ects validés", arr.ind = TRUE)
|
||||
|
||||
|
||||
# Indices Ing - 1A
|
||||
|
||||
indices_cours_Ing_1A <- which(df == "Ing - 1A", arr.ind = TRUE)
|
||||
indices_cours_Ing_2A <- which(df == "Ing - 2A", arr.ind = TRUE)
|
||||
indices_cours_Ing_3A <- which(df == "Ing - 3A", arr.ind = TRUE)
|
||||
indices_stage_fin_etude <- which((df == "Stage de fin d'études") | (df == "Stage de fin d'études de 3ème année"), arr.ind = TRUE)
|
||||
|
||||
col_ECTS <- 63
|
||||
|
||||
# Décalages
|
||||
|
||||
dec_col_cours_ine <- -23
|
||||
dec_row_cours_ine <- 35
|
||||
|
||||
dec_col_annee <- -13
|
||||
|
||||
dec_row_parcours_ing1A <- -4
|
||||
dec_col_parcours_ing1A <- 2
|
||||
|
||||
dec_ECTS <- 42
|
||||
|
||||
get_row_to_remove_cours <- function(cours) {
|
||||
vec_cours <- cours
|
||||
which((is.na(vec_cours) | (vec_cours == "Intitulé du cours") |
|
||||
(vec_cours == "Ing - 1A") | (vec_cours == "Ing - 2A") |
|
||||
(vec_cours == "Ing - 3A")))
|
||||
}
|
||||
|
||||
|
||||
full <- do.call("rbind", lapply(seq_len(nrow(indices_numero_ine)), function(idx) {
|
||||
# Trouver l'INE
|
||||
current_row <- indices_numero_ine[idx, 1]
|
||||
current_col <- indices_numero_ine[idx, 2]
|
||||
|
||||
# Ici si on est au dernier indice on va au bout du tableau et on nettoiera
|
||||
# après
|
||||
next_row <- ifelse(idx != nrow(indices_numero_ine),
|
||||
indices_numero_ine[idx + 1, 1],
|
||||
nrow(df)
|
||||
)
|
||||
# La colonne ne bouge pas
|
||||
next_col <- current_col
|
||||
ine <- df[[
|
||||
current_row + decalage_ine[[1]],
|
||||
current_col + decalage_ine[[2]]
|
||||
]]
|
||||
# Cours
|
||||
cours <- df[seq(
|
||||
current_row + dec_row_cours_ine,
|
||||
next_row + dec_row_cours_ine - 1
|
||||
), current_col + dec_col_cours_ine]
|
||||
ects <- df[seq(
|
||||
current_row + dec_row_cours_ine,
|
||||
next_row + dec_row_cours_ine - 1
|
||||
), col_ECTS]
|
||||
|
||||
longdata <- data.frame(ine = ine, cours = cours, ects = ects)
|
||||
longdata <- longdata[which(!(is.na(longdata[["cours"]]) & is.na(longdata[["ects"]]))), ]
|
||||
row_remove_intit_cours <- which((longdata[["cours"]] == "Intitulé du cours" &
|
||||
longdata[["ects"]] == "Ects"))
|
||||
if (!identical(row_remove_intit_cours, integer(0))) {
|
||||
longdata <- longdata[-row_remove_intit_cours, ]
|
||||
}
|
||||
row_remove_ing <- which((longdata[["cours"]] == "Ing"))
|
||||
if (!identical(row_remove_ing, integer(0))) {
|
||||
longdata <- longdata[-row_remove_ing, ]
|
||||
}
|
||||
|
||||
id_cycle_ing <- which(grepl("Ing - [1-3]A", longdata[["cours"]]))
|
||||
id_cycle_ing <- c(id_cycle_ing, nrow(longdata))
|
||||
annee_cycle_ing <- grep("20[0-9][0-9]-[0-9][0-9]", df[seq(
|
||||
current_row + dec_row_cours_ine,
|
||||
next_row + dec_row_cours_ine - 1
|
||||
), 8], value = TRUE)
|
||||
type_annee <- rep("Unknown", nrow(longdata))
|
||||
annee <- rep("", nrow(longdata))
|
||||
for (idx in seq_len(length(id_cycle_ing) - 1L)) {
|
||||
type_annee[seq(id_cycle_ing[idx], id_cycle_ing[idx + 1])] <- longdata[["cours"]][id_cycle_ing[idx]]
|
||||
annee[seq(id_cycle_ing[idx], id_cycle_ing[idx + 1])] <- annee_cycle_ing[idx]
|
||||
}
|
||||
longdata[["annee"]] <- annee
|
||||
longdata[["type_annee"]] <- type_annee
|
||||
|
||||
row_remove_annee_cycle_ing <- which(grepl("Ing - [0-9]{1}A", longdata[["cours"]]))
|
||||
if (!identical(row_remove_annee_cycle_ing, integer(0))) {
|
||||
longdata <- longdata[-row_remove_annee_cycle_ing, ]
|
||||
}
|
||||
|
||||
|
||||
# Trouver les blocs et les mettre en bloc
|
||||
|
||||
# Identifier les indices des lignes où les valeurs ECTS ne sont pas NA
|
||||
indices_blocs <- which(!is.na(longdata[["ects"]]))
|
||||
noms_blocs <- longdata[indices_blocs, "cours"]
|
||||
# Créer une nouvelle colonne "bloc" en remplissant les valeurs manquantes
|
||||
longdata$bloc <- NA
|
||||
longdata$bloc[indices_blocs] <- noms_blocs
|
||||
# Remplir les valeurs manquantes dans la colonne "bloc" en utilisant une boucle
|
||||
for (i in 2:nrow(longdata)) {
|
||||
if (is.na(longdata[["bloc"]][i])) {
|
||||
longdata[["bloc"]][i] <- longdata[["bloc"]][i - 1]
|
||||
}
|
||||
}
|
||||
|
||||
longdata
|
||||
}))
|
||||
|
||||
|
||||
df_ue_choix <- full[grepl("UE à choix *", full[["bloc"]]),]
|
||||
Loading…
Add table
Reference in a new issue