diff --git a/README.md b/README.md index ff66c43..f953097 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,11 @@ # README -A simple covid plotter. Data are provided by : https://github.com/owid/covid-19-data/tree/master/public/data +A simple covid plotter. + +Data are provided by : +* https://github.com/owid/covid-19-data/tree/master/public/data +* https://data.drees.solidarites-sante.gouv.fr/api/records/1.0/search/?dataset=covid-19-resultats-par-age-issus-des-appariements-entre-si-vic-si-dep-et-vac-si&q=&rows=-1&facet=date&facet=vac_statut&facet=age" + ## Installation @@ -11,8 +16,12 @@ pip install -r requirements.txt ## Launch ```bash -python main.py -# Ex : python main.py FRA new_cases_smoothed new_deaths_smoothed people_fully_vaccinated hosp_patients +# owid dataset +python owid.py +# Ex : python owid.py FRA new_cases_smoothed new_deaths_smoothed people_fully_vaccinated hosp_patients + +# drees dataset +python dress.py ``` **NOTE** : `-r` option is used to fetch update data \ No newline at end of file diff --git a/drees.py b/drees.py new file mode 100644 index 0000000..0584831 --- /dev/null +++ b/drees.py @@ -0,0 +1,160 @@ +import argparse +import json +import logging +import os +from enum import Enum +from typing import Any, Dict, List, Optional, OrderedDict + +import requests +from matplotlib import dates as md +from matplotlib import pyplot as plt + +FORMAT = "%(asctime)s - %(levelname)s - %(message)s" +logging.basicConfig(format=FORMAT, level=logging.INFO) + +DATA_URL = "https://data.drees.solidarites-sante.gouv.fr/api/records/1.0/search/?dataset=covid-19-resultats-par-age-issus-des-appariements-entre-si-vic-si-dep-et-vac-si&q=&rows=-1&facet=date&facet=vac_statut&facet=age" +DATA_REPOSITORY = "data" +OUTPUT_REPOSITORY = "output" + + +class Field(str, Enum): + HC = "hc" + SC = "sc" + DC = "dc" + EFF = "effectif" + + +class VacStatus(str, Enum): + NC = "Non-vaccinés" + PDR = "Primo dose récente" + PDE = "Primo dose efficace" + CM3MSR = "Complet de moins de 3 mois - sans rappel" + CM3MAR = "Complet de moins de 3 mois - avec rappel" + CM36MSR = "Complet entre 3 mois et 6 mois - sans rappel" + CM36MAR = "Complet entre 3 mois et 6 mois - avec rappel" + + +class AgeGroup(str, Enum): + VERY_YOUNG = "[0,19]" + YONG = "[20,39]" + MID_OLD = "[40,59]" + OLD = "[60,79]" + VERY_OLD = "[80;+]" + + +def get_data( + file_path: Optional[str] = None, + extension: Optional[str] = "json", + refresh=False, +) -> Dict[str, Any]: + """ + Collect covid data by age from DREES + """ + os.makedirs(DATA_REPOSITORY, exist_ok=True) + data_url = DATA_URL.format(extension=extension) + if data_url.endswith("/"): + data_url = data_url[:-1] + file_path = ( + os.path.join(DATA_REPOSITORY, data_url.split("/")[-1]) + if file_path is None + else file_path + ) + if not os.path.isfile(file_path) or refresh: + r = requests.get(data_url) + if not r.content: + raise ValueError("no data provided froim the url : {}".format(data_url)) + with open(file_path, "wb") as f: + f.write(r.content) + return json.loads(r.content) + return json.load(open(file_path, "rb")) + + +def group_by_age_date(data: Dict[str, Any], fields: List[str]) -> Dict[str, Any]: + """ + Group the original dictionnary into a more readable one + 'date': { + 'age' : { + 'vac_status' : { + 'hc', + 'sc', + 'dc', + ... + } + } + } + """ + dic_data_grouped: Dict[str, Any] = OrderedDict() + for row in data["records"]: + row_fields = row["fields"] + date = row_fields["date"] + age = row_fields["age"] + vac_status = row_fields["vac_statut"] + if date not in dic_data_grouped: + dic_data_grouped[date] = OrderedDict() + if age not in dic_data_grouped[date]: + dic_data_grouped[date][age] = OrderedDict() + if vac_status not in dic_data_grouped[date][age]: + dic_data_grouped[date][age][vac_status] = OrderedDict() + for field in fields: + dic_data_grouped[date][age][vac_status][field] = row_fields[field] + return dic_data_grouped + + +def plot(dic_data_grouped: Dict[str, Any], age: str, vac_status: str) -> None: + """ + Plot data by vaccine status and age + """ + x: List[str] = list() + hc: List[float] = list() + sc: List[float] = list() + dc: List[float] = list() + fig, ax = plt.subplots() + for date, dic_age_grouped in dic_data_grouped.items(): + print(Field.HC.value) + hc.append(dic_age_grouped[age][vac_status][Field.HC.value]) + sc.append(dic_age_grouped[age][vac_status][Field.SC.value]) + dc.append(dic_age_grouped[age][vac_status][Field.DC.value]) + x.append(date) + plt.plot(x, hc, label="hospitalisation") + plt.plot(x, sc, label="soin_critique") + plt.plot(x, dc, label="deces") + plt.xlabel("date") + plt.ylabel("nombre") + plt.title(f"{age}ans - {vac_status}") + ax.grid(True) + ax.xaxis.set_major_locator(md.MonthLocator()) + fig.autofmt_xdate() + plt.legend() + plt.savefig(os.path.join(OUTPUT_REPOSITORY, f"{age}_{vac_status}.pdf")) + + +if __name__ == "__main__": + """ + This script aims to plot DRESS data with vaccine status and ages grouped + Main indicators are : + - hospitalisations + - criticals + - deaths + """ + + parser = argparse.ArgumentParser() + parser.add_argument( + "-r", + "--refresh", + action="store_true", + default=False, + help="redownload data for updates", + ) + + args = parser.parse_args() + + os.makedirs(OUTPUT_REPOSITORY, exist_ok=True) + + dic_data: Dict[str, Any] = get_data( + file_path=os.path.join(DATA_REPOSITORY, "dress.json"), refresh=args.refresh + ) + dic_data_grouped: Dict[str, Any] = group_by_age_date( + dic_data, [x.value for x in Field] + ) + plot(dic_data_grouped, AgeGroup.YONG, VacStatus.NC.value) + plot(dic_data_grouped, AgeGroup.YONG, VacStatus.CM3MSR.value) diff --git a/main.py b/owid.py similarity index 100% rename from main.py rename to owid.py