From 2357303295981762088345cb2a5df54417420260 Mon Sep 17 00:00:00 2001 From: rmanach Date: Mon, 27 Dec 2021 15:02:09 +0100 Subject: [PATCH] add fields plot by vac status grouped by age --- drees.py | 153 +++++++++++++++++++++++++++++++++++++++++++++++-------- 1 file changed, 131 insertions(+), 22 deletions(-) diff --git a/drees.py b/drees.py index 98ffd97..6933209 100644 --- a/drees.py +++ b/drees.py @@ -22,6 +22,15 @@ DATA_URL = "https://data.drees.solidarites-sante.gouv.fr/api/records/1.0/search/ DATA_REPOSITORY = "data" OUTPUT_REPOSITORY = "output" +# cycler could be better, but for ages plots it's ok +AGE_COLORS = { + 0: "pink", + 1: "green", + 2: "blue", + 3: "red", + 4: "gray", +} + class DreesEnum(bytes, Enum): def __new__(cls, value, label): @@ -65,7 +74,6 @@ def get_data( """ collect covid data by age from DREES """ - logging.info("fetching data...") os.makedirs(DATA_REPOSITORY, exist_ok=True) data_url = DATA_URL.format(extension=extension) if data_url.endswith("/"): @@ -76,12 +84,14 @@ def get_data( else file_path ) if not os.path.isfile(file_path) or refresh: + logging.info("fetching data...") r = requests.get(data_url) if not r.content: raise ValueError("no data provided froim the url : {}".format(data_url)) with open(file_path, "wb") as f: f.write(r.content) return json.loads(r.content) + logging.info(f"opening {file_path}...") return json.load(open(file_path, "rb")) @@ -160,6 +170,49 @@ def get_np_data(dic_data_grouped: Dict[dt, Any]) -> Tuple[np.ndarray, np.ndarray return np_data, np_date +def split_by_vac_status(np_data: np.ndarray) -> Tuple[np.ndarray, np.ndarray]: + return np.sum(np_data[:, :, 1:, :], axis=2), np_data[:, :, VacStatus.NC.value, :] + + +def get_vaccine_percent(np_data: np.ndarray) -> Tuple[np.ndarray, np.ndarray]: + """ + get the vaccine percent per date, age and field + the vaccine data holds all the vaccine status except unvaccine + """ + np_data_vac, np_data_unvac = split_by_vac_status(np_data) + np_percent_vac = np_data_vac / np.sum(np_data, axis=2) + np_percent_unvac = np_data_unvac / np.sum(np_data, axis=2) + return np_percent_vac, np_percent_unvac + + +def get_percent_age_by_date_field( + np_data: np.ndarray, field: Field +) -> Tuple[np.ndarray, np.ndarray]: + """ + get numpy percent age grouped by date and field splited by vaccine status + """ + np_percent_age_vac = np.empty((len(np_data), len(AgeGroup))) + np_percent_age_unvac = np.copy(np_percent_age_vac) + np_data_vac, np_data_unvac = split_by_vac_status(np_data) + for idx_date in range(len(np_data_vac)): + sum_effectif = np.nansum(np_data_vac[idx_date, :, field.value]) + for age_group in AgeGroup: + np_percent_age_vac[idx_date, age_group.value] = np.round( + (np_data_vac[idx_date, age_group.value, field.value] / sum_effectif) + * 100, + 2, + ) + for idx_date in range(len(np_data_unvac)): + sum_effectif = np.nansum(np_data_unvac[idx_date, :, field.value]) + for age_group in AgeGroup: + np_percent_age_unvac[idx_date, age_group.value] = np.round( + (np_data_unvac[idx_date, age_group.value, field.value] / sum_effectif) + * 100, + 2, + ) + return np_percent_age_vac, np_percent_age_unvac + + def get_plot_fig( grid: Optional[bool] = True, date_format: Optional[str] = DATE_FORMAT, @@ -193,8 +246,68 @@ def save_and_close_fig( logging.info(f"{output_path} plotted") -def split_by_vac_status(np_data: np.ndarray) -> Tuple[np.ndarray, np.ndarray]: - return np.sum(np_data[:, :, 1:, :], axis=2), np_data[:, :, VacStatus.NC.value, :] +def analyse(np_data: np.ndarray, np_date: np.ndarray) -> None: + """ + analyse data + """ + logging.info("analysing data...") + np_percent_vac, _ = get_vaccine_percent(np_data) + + logging.info("--- vaccine mean percent ---") + for age_group in AgeGroup: + for field in Field: + mean_vac_percent = np.round( + np.nanmean(np_percent_vac[:, age_group.value, field.value]) * 100, 2 + ) + print(f"{field.label} - {age_group.label} - vac : {mean_vac_percent}%") + + +def plot_bar_age_percent_vac_status_by_field( + np_data_vac_status: np.ndarray, field: Field, is_vac: Optional[bool] = True +) -> None: + fig, ax = get_plot_fig(figsize=(22, 8)) + bottom = np_data_vac_status[:, 0] + title = "vac" if is_vac else "no vac" + for age_group in AgeGroup: + percents_age = np_data_vac_status[:, age_group.value] + if age_group.value > 0: + ax.bar( + np_date, + percents_age, + label=age_group.label, + bottom=bottom, + color=AGE_COLORS[age_group.value], + ) + bottom += percents_age + else: + ax.bar( + np_date, + percents_age, + label=age_group.label, + color=AGE_COLORS[age_group.value], + ) + + ax.set_ylabel("%") + ax.set_title(f"{field.label} - {title}") + plt.legend( + [age_group.label for age_group in AgeGroup], loc="upper right", frameon=True + ) + save_and_close_fig( + fig, + os.path.join(OUTPUT_REPOSITORY, f"age_percent_{title}_{field.label}"), + has_legend=False, + ) + + +def plot_bar_age_percent_by_field(np_data: np.ndarray, field: Field) -> None: + """ + plot percent vaccinated field group by age bar diagram + """ + np_percent_age_vac, np_percent_age_unvac = get_percent_age_by_date_field( + np_data, field + ) + plot_bar_age_percent_vac_status_by_field(np_percent_age_vac, field) + plot_bar_age_percent_vac_status_by_field(np_percent_age_unvac, field, is_vac=False) def plot_cumulative_field( @@ -210,8 +323,8 @@ def plot_cumulative_field( np_cumulate_unvac: np.ndarray = np.cumsum( np_data_unvac[:, age_group.value, field.value], axis=0 ) - plt.plot(np_date, np_cumulate_vac, label=f"{age_group.label} vax") - plt.plot(np_date, np_cumulate_unvac, label=f"{age_group.label} no vax") + plt.plot(np_date, np_cumulate_vac, label=f"{age_group.label} vac") + plt.plot(np_date, np_cumulate_unvac, label=f"{age_group.label} no vac") plt.title(f"nombre de {field.label} cumulé par age") plt.xlabel("date") @@ -244,17 +357,6 @@ def plot_fields_by_age_vac( ) -def get_vaccine_percent(np_data: np.ndarray) -> Tuple[np.ndarray, np.ndarray]: - """ - get the vaccine percent per date, age and field - the vaccine data holds all the vaccine status except unvaccine - """ - np_data_vac, np_data_unvac = split_by_vac_status(np_data) - np_percent_vac = np_data_vac / np.sum(np_data, axis=2) - np_percent_unvac = np_data_unvac / np.sum(np_data, axis=2) - return np_percent_vac, np_percent_unvac - - def plot_bar_data_by_age_field( np_data: np.ndarray, np_date: np.ndarray, @@ -272,14 +374,16 @@ def plot_bar_data_by_age_field( for idx_date in range(len(np_date)): vac_percent = np.round( - np_percent_vac[idx_date, age_group.value, field.value] * 100 + np_percent_vac[idx_date, age_group.value, field.value] * 100, 2 ) unvac_percent = np.round( - np_percent_unvac[idx_date, age_group.value, field.value] * 100 + np_percent_unvac[idx_date, age_group.value, field.value] * 100, 2 + ) + bar_vac = ax.bar(idx_date, vac_percent, color="b", label="vac") + ax.bar(idx_date, unvac_percent, bottom=vac_percent, color="r", label="no vac") + ax.bar_label( + bar_vac, label_type="edge", color="black", fontsize="7", fmt="%.0f" ) - bar_vac = ax.bar(idx_date, vac_percent, color="b", label="vax") - ax.bar(idx_date, unvac_percent, bottom=vac_percent, color="r", label="no vax") - ax.bar_label(bar_vac, label_type="edge", color="black", fontsize="8") ax.set_ylim(top=105) # to display 100% label ax.set_ylabel("%") @@ -293,7 +397,7 @@ def plot_bar_data_by_age_field( for idx, d in enumerate(np_date.astype(dt)) ], ) - plt.legend(["vax", "no vax"], loc=0, frameon=True) + plt.legend(["vac", "no vac"], loc="upper right", frameon=True) save_and_close_fig( fig, @@ -331,10 +435,12 @@ if __name__ == "__main__": - cumulative deaths by age - hc, sc, dc by vaccine status and age - hc, sc, dc (vaccine/unvaccine percent) by age + - hc, sc, dc (age grouped percent) by field Main indicators are : - hospitalisations (hc) - criticals (sc) - deaths (dc) + hc, sc, dc include positive PCR tests """ parser = argparse.ArgumentParser() @@ -357,6 +463,8 @@ if __name__ == "__main__": np_data, np_date = get_np_data(dic_data_grouped) + analyse(np_data, np_date) + plot_fields_args = get_age_vac_args() f_fields = partial(plot_fields_by_age_vac, np_data, np_date) plot_vac_percent_age_args = get_age_field_args() @@ -367,3 +475,4 @@ if __name__ == "__main__": for field in Field: plot_cumulative_field(np_data, np_date, field) + plot_bar_age_percent_by_field(np_data, field)