mirror of
https://app-learninglab.inria.fr/moocrr/gitlab/da84ababf0696af51bddad556af86353/mooc-rr.git
synced 2026-06-17 09:35:24 +02:00
116 lines
3.4 KiB
Org Mode
116 lines
3.4 KiB
Org Mode
#+TITLE: Analyse du journal
|
|
#+AUTHOR: Louis Lacoste
|
|
#+DATE: 2022-11-20
|
|
#+LANGUAGE: fr
|
|
# #+PROPERTY: header-args :eval never-export
|
|
|
|
#+HTML_HEAD: <link rel="stylesheet" type="text/css" href="http://www.pirilampo.org/styles/readtheorg/css/htmlize.css"/>
|
|
#+HTML_HEAD: <link rel="stylesheet" type="text/css" href="http://www.pirilampo.org/styles/readtheorg/css/readtheorg.css"/>
|
|
#+HTML_HEAD: <script src="https://ajax.googleapis.com/ajax/libs/jquery/2.1.3/jquery.min.js"></script>
|
|
#+HTML_HEAD: <script src="https://maxcdn.bootstrapcdn.com/bootstrap/3.3.4/js/bootstrap.min.js"></script>
|
|
#+HTML_HEAD: <script type="text/javascript" src="http://www.pirilampo.org/styles/lib/js/jquery.stickytableheaders.js"></script>
|
|
#+HTML_HEAD: <script type="text/javascript" src="http://www.pirilampo.org/styles/readtheorg/js/readtheorg.js"></script>
|
|
|
|
* Récupération des données du journal
|
|
|
|
Ici nous allons importer les étiquettes et les exporter dans un
|
|
fichier =data.csv=.
|
|
|
|
#+begin_src shell :results output :exports both
|
|
grep -oP "(?<=:)([a-zA-Z]*)(?=:)" ~/org/journal.org > data.csv
|
|
head -n 5 data.csv
|
|
#+end_src
|
|
|
|
#+RESULTS:
|
|
: informatique
|
|
: wikipedia
|
|
: biologie
|
|
: virus
|
|
: allergie
|
|
|
|
* Traitement des données
|
|
|
|
#+NAME: import-python
|
|
#+begin_src python :results output :session :exports both :dir /home/polarolouis/Documents/MOOC/RR/mooc-rr/module2/exo4
|
|
import csv
|
|
from collections import Counter
|
|
|
|
temporaryList = []
|
|
|
|
with open('data.csv', 'r', encoding='utf8') as csvfile:
|
|
reader = csv.reader(csvfile)
|
|
for row in reader:
|
|
temporaryList.append(row[0])
|
|
|
|
tagCount = Counter(temporaryList)
|
|
|
|
tagList = []
|
|
countList = []
|
|
|
|
for tag in tagCount:
|
|
tagList.append(tag)
|
|
countList.append(tagCount[tag])
|
|
|
|
#+end_src
|
|
|
|
Ici on convertit les données en dataframe =pandas= afin de pouvoir faire
|
|
l'affichage plus facilement.
|
|
|
|
#+RESULTS: import-python
|
|
|
|
#+NAME: conversion-dataframe
|
|
#+begin_src python :results output :session :exports both
|
|
import pandas as pd
|
|
|
|
preDataframe = dict(tagCount)
|
|
|
|
|
|
print(preDataframe)
|
|
|
|
tagCountDataframe = pd.DataFrame.from_dict({'tags':list(preDataframe), 'values':list(preDataframe.values())})
|
|
tagCountDataframe['values'] = pd.to_numeric(tagCountDataframe['values'])
|
|
|
|
print(tagCountDataframe)
|
|
#+end_src
|
|
|
|
#+RESULTS: conversion-dataframe
|
|
#+begin_example
|
|
{'informatique': 2, 'wikipedia': 1, 'biologie': 2, 'virus': 1, 'allergie': 1, 'noexport': 2, 'LOGBOOK': 2, 'END': 2, 'mooc': 1, 'science': 1, 'Epistemology': 1}
|
|
tags values
|
|
0 informatique 2
|
|
1 wikipedia 1
|
|
2 biologie 2
|
|
3 virus 1
|
|
4 allergie 1
|
|
5 noexport 2
|
|
6 LOGBOOK 2
|
|
7 END 2
|
|
8 mooc 1
|
|
9 science 1
|
|
10 Epistemology 1
|
|
#+end_example
|
|
|
|
* Affichage des données
|
|
** Diverses infos
|
|
#+begin_src python :results output :session :exports both
|
|
print(f"Les tags les plus cités : {tagCount.most_common(3)}")
|
|
#+end_src
|
|
|
|
#+RESULTS:
|
|
: Les tags les plus cités : [('informatique', 2), ('biologie', 2), ('noexport', 2)]
|
|
** Graphiques
|
|
#+begin_src python :results file :session :var matplot_lib_filename=(org-babel-temp-file "figure" ".png") :exports both
|
|
import matplotlib.pyplot as plt
|
|
plt.figure(figsize=(10,5))
|
|
plt.tight_layout()
|
|
|
|
# Affichage
|
|
ax = tagCountDataframe.plot(x="tags", y="values", kind='bar')
|
|
|
|
plt.savefig(matplot_lib_filename)
|
|
matplot_lib_filename
|
|
|
|
#+end_src
|
|
|
|
#+RESULTS:
|
|
[[file:/tmp/babel-Eb8JSG/figure6P1Fep.png]]
|