add fields plot by vac status grouped by age

This commit is contained in:
rmanach 2021-12-27 15:02:09 +01:00
parent 9c657a7887
commit 2357303295

153
drees.py
View File

@ -22,6 +22,15 @@ DATA_URL = "https://data.drees.solidarites-sante.gouv.fr/api/records/1.0/search/
DATA_REPOSITORY = "data"
OUTPUT_REPOSITORY = "output"
# cycler could be better, but for ages plots it's ok
AGE_COLORS = {
0: "pink",
1: "green",
2: "blue",
3: "red",
4: "gray",
}
class DreesEnum(bytes, Enum):
def __new__(cls, value, label):
@ -65,7 +74,6 @@ def get_data(
"""
collect covid data by age from DREES
"""
logging.info("fetching data...")
os.makedirs(DATA_REPOSITORY, exist_ok=True)
data_url = DATA_URL.format(extension=extension)
if data_url.endswith("/"):
@ -76,12 +84,14 @@ def get_data(
else file_path
)
if not os.path.isfile(file_path) or refresh:
logging.info("fetching data...")
r = requests.get(data_url)
if not r.content:
raise ValueError("no data provided froim the url : {}".format(data_url))
with open(file_path, "wb") as f:
f.write(r.content)
return json.loads(r.content)
logging.info(f"opening {file_path}...")
return json.load(open(file_path, "rb"))
@ -160,6 +170,49 @@ def get_np_data(dic_data_grouped: Dict[dt, Any]) -> Tuple[np.ndarray, np.ndarray
return np_data, np_date
def split_by_vac_status(np_data: np.ndarray) -> Tuple[np.ndarray, np.ndarray]:
return np.sum(np_data[:, :, 1:, :], axis=2), np_data[:, :, VacStatus.NC.value, :]
def get_vaccine_percent(np_data: np.ndarray) -> Tuple[np.ndarray, np.ndarray]:
"""
get the vaccine percent per date, age and field
the vaccine data holds all the vaccine status except unvaccine
"""
np_data_vac, np_data_unvac = split_by_vac_status(np_data)
np_percent_vac = np_data_vac / np.sum(np_data, axis=2)
np_percent_unvac = np_data_unvac / np.sum(np_data, axis=2)
return np_percent_vac, np_percent_unvac
def get_percent_age_by_date_field(
np_data: np.ndarray, field: Field
) -> Tuple[np.ndarray, np.ndarray]:
"""
get numpy percent age grouped by date and field splited by vaccine status
"""
np_percent_age_vac = np.empty((len(np_data), len(AgeGroup)))
np_percent_age_unvac = np.copy(np_percent_age_vac)
np_data_vac, np_data_unvac = split_by_vac_status(np_data)
for idx_date in range(len(np_data_vac)):
sum_effectif = np.nansum(np_data_vac[idx_date, :, field.value])
for age_group in AgeGroup:
np_percent_age_vac[idx_date, age_group.value] = np.round(
(np_data_vac[idx_date, age_group.value, field.value] / sum_effectif)
* 100,
2,
)
for idx_date in range(len(np_data_unvac)):
sum_effectif = np.nansum(np_data_unvac[idx_date, :, field.value])
for age_group in AgeGroup:
np_percent_age_unvac[idx_date, age_group.value] = np.round(
(np_data_unvac[idx_date, age_group.value, field.value] / sum_effectif)
* 100,
2,
)
return np_percent_age_vac, np_percent_age_unvac
def get_plot_fig(
grid: Optional[bool] = True,
date_format: Optional[str] = DATE_FORMAT,
@ -193,8 +246,68 @@ def save_and_close_fig(
logging.info(f"{output_path} plotted")
def split_by_vac_status(np_data: np.ndarray) -> Tuple[np.ndarray, np.ndarray]:
return np.sum(np_data[:, :, 1:, :], axis=2), np_data[:, :, VacStatus.NC.value, :]
def analyse(np_data: np.ndarray, np_date: np.ndarray) -> None:
"""
analyse data
"""
logging.info("analysing data...")
np_percent_vac, _ = get_vaccine_percent(np_data)
logging.info("--- vaccine mean percent ---")
for age_group in AgeGroup:
for field in Field:
mean_vac_percent = np.round(
np.nanmean(np_percent_vac[:, age_group.value, field.value]) * 100, 2
)
print(f"{field.label} - {age_group.label} - vac : {mean_vac_percent}%")
def plot_bar_age_percent_vac_status_by_field(
np_data_vac_status: np.ndarray, field: Field, is_vac: Optional[bool] = True
) -> None:
fig, ax = get_plot_fig(figsize=(22, 8))
bottom = np_data_vac_status[:, 0]
title = "vac" if is_vac else "no vac"
for age_group in AgeGroup:
percents_age = np_data_vac_status[:, age_group.value]
if age_group.value > 0:
ax.bar(
np_date,
percents_age,
label=age_group.label,
bottom=bottom,
color=AGE_COLORS[age_group.value],
)
bottom += percents_age
else:
ax.bar(
np_date,
percents_age,
label=age_group.label,
color=AGE_COLORS[age_group.value],
)
ax.set_ylabel("%")
ax.set_title(f"{field.label} - {title}")
plt.legend(
[age_group.label for age_group in AgeGroup], loc="upper right", frameon=True
)
save_and_close_fig(
fig,
os.path.join(OUTPUT_REPOSITORY, f"age_percent_{title}_{field.label}"),
has_legend=False,
)
def plot_bar_age_percent_by_field(np_data: np.ndarray, field: Field) -> None:
"""
plot percent vaccinated field group by age bar diagram
"""
np_percent_age_vac, np_percent_age_unvac = get_percent_age_by_date_field(
np_data, field
)
plot_bar_age_percent_vac_status_by_field(np_percent_age_vac, field)
plot_bar_age_percent_vac_status_by_field(np_percent_age_unvac, field, is_vac=False)
def plot_cumulative_field(
@ -210,8 +323,8 @@ def plot_cumulative_field(
np_cumulate_unvac: np.ndarray = np.cumsum(
np_data_unvac[:, age_group.value, field.value], axis=0
)
plt.plot(np_date, np_cumulate_vac, label=f"{age_group.label} vax")
plt.plot(np_date, np_cumulate_unvac, label=f"{age_group.label} no vax")
plt.plot(np_date, np_cumulate_vac, label=f"{age_group.label} vac")
plt.plot(np_date, np_cumulate_unvac, label=f"{age_group.label} no vac")
plt.title(f"nombre de {field.label} cumulé par age")
plt.xlabel("date")
@ -244,17 +357,6 @@ def plot_fields_by_age_vac(
)
def get_vaccine_percent(np_data: np.ndarray) -> Tuple[np.ndarray, np.ndarray]:
"""
get the vaccine percent per date, age and field
the vaccine data holds all the vaccine status except unvaccine
"""
np_data_vac, np_data_unvac = split_by_vac_status(np_data)
np_percent_vac = np_data_vac / np.sum(np_data, axis=2)
np_percent_unvac = np_data_unvac / np.sum(np_data, axis=2)
return np_percent_vac, np_percent_unvac
def plot_bar_data_by_age_field(
np_data: np.ndarray,
np_date: np.ndarray,
@ -272,14 +374,16 @@ def plot_bar_data_by_age_field(
for idx_date in range(len(np_date)):
vac_percent = np.round(
np_percent_vac[idx_date, age_group.value, field.value] * 100
np_percent_vac[idx_date, age_group.value, field.value] * 100, 2
)
unvac_percent = np.round(
np_percent_unvac[idx_date, age_group.value, field.value] * 100
np_percent_unvac[idx_date, age_group.value, field.value] * 100, 2
)
bar_vac = ax.bar(idx_date, vac_percent, color="b", label="vac")
ax.bar(idx_date, unvac_percent, bottom=vac_percent, color="r", label="no vac")
ax.bar_label(
bar_vac, label_type="edge", color="black", fontsize="7", fmt="%.0f"
)
bar_vac = ax.bar(idx_date, vac_percent, color="b", label="vax")
ax.bar(idx_date, unvac_percent, bottom=vac_percent, color="r", label="no vax")
ax.bar_label(bar_vac, label_type="edge", color="black", fontsize="8")
ax.set_ylim(top=105) # to display 100% label
ax.set_ylabel("%")
@ -293,7 +397,7 @@ def plot_bar_data_by_age_field(
for idx, d in enumerate(np_date.astype(dt))
],
)
plt.legend(["vax", "no vax"], loc=0, frameon=True)
plt.legend(["vac", "no vac"], loc="upper right", frameon=True)
save_and_close_fig(
fig,
@ -331,10 +435,12 @@ if __name__ == "__main__":
- cumulative deaths by age
- hc, sc, dc by vaccine status and age
- hc, sc, dc (vaccine/unvaccine percent) by age
- hc, sc, dc (age grouped percent) by field
Main indicators are :
- hospitalisations (hc)
- criticals (sc)
- deaths (dc)
hc, sc, dc include positive PCR tests
"""
parser = argparse.ArgumentParser()
@ -357,6 +463,8 @@ if __name__ == "__main__":
np_data, np_date = get_np_data(dic_data_grouped)
analyse(np_data, np_date)
plot_fields_args = get_age_vac_args()
f_fields = partial(plot_fields_by_age_vac, np_data, np_date)
plot_vac_percent_age_args = get_age_field_args()
@ -367,3 +475,4 @@ if __name__ == "__main__":
for field in Field:
plot_cumulative_field(np_data, np_date, field)
plot_bar_age_percent_by_field(np_data, field)