From 615094877ce948b3b3c6927bef851445ca7dd287 Mon Sep 17 00:00:00 2001 From: rmanach Date: Wed, 22 Dec 2021 23:36:47 +0100 Subject: [PATCH] add bar graph plot showing vac/unvac percent indicator grouped by age --- drees.py | 77 ++++++++++++++++++++++++++++++++++++++++++++++++++------ 1 file changed, 69 insertions(+), 8 deletions(-) diff --git a/drees.py b/drees.py index 37ec0e3..675bc7b 100644 --- a/drees.py +++ b/drees.py @@ -8,8 +8,8 @@ from functools import partial from multiprocessing import Pool from typing import Any, Dict, List, Optional, OrderedDict, Tuple -import matplotlib import numpy as np +import pandas as pd import requests from matplotlib import dates as md from matplotlib import pyplot as plt @@ -55,7 +55,7 @@ def get_data( refresh=False, ) -> Dict[str, Any]: """ - Collect covid data by age from DREES + collect covid data by age from DREES """ os.makedirs(DATA_REPOSITORY, exist_ok=True) data_url = DATA_URL.format(extension=extension) @@ -78,7 +78,7 @@ def get_data( def group_by_age_date(data: Dict[str, Any], fields: List[str]) -> Dict[dt, Any]: """ - Group the original dictionnary into a more readable one + group the original dictionnary into a more readable one 'date': { 'age' : { 'vac_status' : { @@ -221,11 +221,11 @@ def extract_field_values(fields: List[Dict[str, Any]], field: Field) -> np.ndarr return np.asarray(field_values) -def plot_data_by_field( +def plot_data_by_age_vac( dic_data_grouped: Dict[dt, Any], age: AgeGroup, vac_status: VacStatus ) -> None: """ - Plot data by vaccine status, age and field + plot data by vaccine status, age and field """ fig = get_plot_fig() @@ -238,8 +238,64 @@ def plot_data_by_field( plt.ylabel("nombre") plt.title(f"{age}ans - {vac_status}") - save_and_close_fig( - fig, os.path.join(OUTPUT_REPOSITORY, f"{age}_{vac_status}_{field}.pdf") + save_and_close_fig(fig, os.path.join(OUTPUT_REPOSITORY, f"{age}_{vac_status}.pdf")) + + +def group_by_date_age_vac( + dic_data_grouped: Dict[dt, Any], + field: Field, + is_vac: Optional[bool] = True, + limit_days: Optional[int] = 30, +) -> Dict[str, Any]: + dic_data: Dict[str, Any] = OrderedDict() + for date, dic_age in dic_data_grouped.items(): + if abs((date - dt.now())).days >= limit_days: + continue + date_format = date.strftime(DATE_FORMAT) + dic_data[date_format] = OrderedDict() + for age, dic_vac in dic_age.items(): + nb_vac, nb_unvac = 0, 0 + for vac_status, dic_field in dic_vac.items(): + if vac_status == VacStatus.NC.value: + nb_unvac += dic_field.get(field.value, 0) + continue + nb_vac += dic_field.get(field.value, 0) + sum_vac = nb_vac + nb_unvac + try: + percent_vac = (nb_vac / sum_vac) * 100 + except ZeroDivisionError: + percent_vac = 0 + try: + percent_unvac = (nb_unvac / sum_vac) * 100 + except ZeroDivisionError: + percent_unvac = 0 + dic_data[date_format][age] = percent_vac if is_vac else percent_unvac + return dic_data + + +def plot_bar_data_by_field( + dic_data_grouped: Dict[dt, Any], field: Field, is_vac: Optional[bool] = True +) -> None: + """ + display a bar graph by field grouped by age over the data period + bars displays vaccine status percent + """ + plt.rcParams["font.size"] = "24" + dic_data = group_by_date_age_vac(dic_data_grouped, field, is_vac=is_vac) + df = pd.DataFrame(dic_data).T + + ax = df.plot.bar(figsize=(26, 15)) + ax.set_title(f"{field.value} vaccinate percent grouped by age") + ax.set_xlabel("date") + fig = ax.get_figure() + + plt.xticks(rotation=45) + plt.legend(loc="upper right") + plt.tight_layout() + + filename = "vac" if is_vac else "unvac" + fig.savefig( + os.path.join(OUTPUT_REPOSITORY, f"{filename}_age_grouped_{field.value}.pdf") ) @@ -260,6 +316,7 @@ if __name__ == "__main__": Plots availables : - cumulative deaths by age - indicators by vaccine status and age + - indicators vaccine/unvaccine percent grouped by age Main indicators are : - hospitalisations - criticals @@ -287,9 +344,13 @@ if __name__ == "__main__": ) plot_data_pool_args = build_data_pool_args() - f = partial(plot_data_by_field, dic_data_grouped) + f = partial(plot_data_by_age_vac, dic_data_grouped) with Pool() as pool: pool.starmap(f, plot_data_pool_args) for field in Field: plot_cumulative_field(dic_data_grouped, field) + + for field in Field: + plot_bar_data_by_field(dic_data_grouped, field) + plot_bar_data_by_field(dic_data_grouped, field, is_vac=False)