#+TITLE: Analyse du journal #+AUTHOR: Louis Lacoste #+DATE: 2022-11-20 #+LANGUAGE: fr # #+PROPERTY: header-args :eval never-export #+HTML_HEAD: #+HTML_HEAD: #+HTML_HEAD: #+HTML_HEAD: #+HTML_HEAD: #+HTML_HEAD: * Récupération des données du journal Ici nous allons importer les étiquettes et les exporter dans un fichier =data.csv=. #+begin_src shell :results output :exports both grep -oP "(?<=:)([a-zA-Z]*)(?=:)" ~/org/journal.org > data.csv head -n 5 data.csv #+end_src #+RESULTS: : informatique : wikipedia : biologie : virus : allergie * Traitement des données #+NAME: import-python #+begin_src python :results output :session :exports both :dir /home/polarolouis/Documents/MOOC/RR/mooc-rr/module2/exo4 import csv from collections import Counter temporaryList = [] with open('data.csv', 'r', encoding='utf8') as csvfile: reader = csv.reader(csvfile) for row in reader: temporaryList.append(row[0]) tagCount = Counter(temporaryList) tagList = [] countList = [] for tag in tagCount: tagList.append(tag) countList.append(tagCount[tag]) #+end_src Ici on convertit les données en dataframe =pandas= afin de pouvoir faire l'affichage plus facilement. #+RESULTS: import-python #+NAME: conversion-dataframe #+begin_src python :results output :session :exports both import pandas as pd preDataframe = dict(tagCount) print(preDataframe) tagCountDataframe = pd.DataFrame.from_dict({'tags':list(preDataframe), 'values':list(preDataframe.values())}) tagCountDataframe['values'] = pd.to_numeric(tagCountDataframe['values']) print(tagCountDataframe) #+end_src #+RESULTS: conversion-dataframe #+begin_example {'informatique': 2, 'wikipedia': 1, 'biologie': 2, 'virus': 1, 'allergie': 1, 'noexport': 2, 'LOGBOOK': 2, 'END': 2, 'mooc': 1, 'science': 1, 'Epistemology': 1} tags values 0 informatique 2 1 wikipedia 1 2 biologie 2 3 virus 1 4 allergie 1 5 noexport 2 6 LOGBOOK 2 7 END 2 8 mooc 1 9 science 1 10 Epistemology 1 #+end_example * Affichage des données ** Diverses infos #+begin_src python :results output :session :exports both print(f"Les tags les plus cités : {tagCount.most_common(3)}") #+end_src #+RESULTS: : Les tags les plus cités : [('informatique', 2), ('biologie', 2), ('noexport', 2)] ** Graphiques #+begin_src python :results file :session :var matplot_lib_filename=(org-babel-temp-file "figure" ".png") :exports both import matplotlib.pyplot as plt plt.figure(figsize=(10,5)) plt.tight_layout() # Affichage ax = tagCountDataframe.plot(x="tags", y="values", kind='bar') plt.savefig(matplot_lib_filename) matplot_lib_filename #+end_src #+RESULTS: [[file:/tmp/babel-Eb8JSG/figure6P1Fep.png]]