704 lines
23 KiB
Python
704 lines
23 KiB
Python
import argparse
|
|
import json
|
|
import logging
|
|
import os
|
|
import re
|
|
from collections import namedtuple
|
|
from datetime import datetime as dt
|
|
from enum import Enum
|
|
from functools import partial
|
|
from multiprocessing import Pool
|
|
from typing import Any, Dict, List, Optional, OrderedDict, Tuple, Union
|
|
|
|
import numpy as np
|
|
import requests
|
|
from jinja2 import Environment, FileSystemLoader, select_autoescape
|
|
from matplotlib import dates as md
|
|
from matplotlib import pyplot as plt
|
|
|
|
FORMAT = "%(asctime)s - %(levelname)s - %(message)s"
|
|
logging.basicConfig(format=FORMAT, level=logging.INFO)
|
|
|
|
DATE_FORMAT = "%Y-%m-%d"
|
|
|
|
DATA_URL = "https://data.drees.solidarites-sante.gouv.fr/api/records/1.0/search/?dataset=covid-19-resultats-par-age-issus-des-appariements-entre-si-vic-si-dep-et-vac-si&q=&rows=-1&facet=date&facet=vac_statut&facet=age"
|
|
DATA_REPOSITORY = "data"
|
|
|
|
STATIC_REPOSITORY = "static"
|
|
OUTPUT_REPOSITORY = os.path.join(STATIC_REPOSITORY, "plots")
|
|
BUILD_REPOSITORY = "build"
|
|
|
|
TMP_SUFFIX = ".tmp"
|
|
FORMAT_SUFFIX = ".png"
|
|
OUTPUT_SUFFIX = f"{TMP_SUFFIX}{FORMAT_SUFFIX}"
|
|
|
|
TMP_FILE_REGEX = re.compile(r"^.*{}$".format(OUTPUT_SUFFIX.replace(".", "\.")))
|
|
|
|
MAIN_URL = "https://covid.thegux.fr/"
|
|
# MAIN_URL = "/home/romain/code/covid-plotter/" # to debug (adjust with your local path)
|
|
|
|
# cycler could be better, but for ages plots it's ok
|
|
AGE_COLORS = {
|
|
0: "pink",
|
|
1: "green",
|
|
2: "blue",
|
|
3: "red",
|
|
4: "gray",
|
|
}
|
|
|
|
|
|
class DreesEnum(bytes, Enum):
|
|
def __new__(cls, value, label):
|
|
obj = bytes.__new__(cls, [value])
|
|
obj._value_ = value
|
|
obj.label = label
|
|
return obj
|
|
|
|
|
|
class Field(DreesEnum):
|
|
HC = (0, "Hospitalisations")
|
|
SC = (1, "Soins critiques")
|
|
DC = (2, "Décés")
|
|
|
|
|
|
class Quota(DreesEnum):
|
|
EFFECTIF = (0 + len(Field), "Effectif")
|
|
|
|
|
|
class VacStatus(DreesEnum):
|
|
"""
|
|
WARN: Be careful, after refreshing dataset, some VacStatus can changed
|
|
"""
|
|
|
|
NC = (0, "Non-vaccinés")
|
|
PDR = (1, "Primo dose récente")
|
|
PDE = (2, "Primo dose efficace")
|
|
CM3MSR = (3, "Complet de moins de 3 mois - sans rappel")
|
|
CM3MAR = (4, "Complet - avec 1 rappel de moins de 3 mois")
|
|
CM36MSR = (5, "Complet entre 3 mois et 6 mois - sans rappel")
|
|
CM36MAR = (6, "Complet - avec 1 rappel entre 3 mois et 6 mois")
|
|
C6MAR = (7, "Complet - avec 1 rappel de 6 mois ou plus")
|
|
C6MSR = (8, "Complet de 6 mois ou plus - sans rappel")
|
|
CM3MAR2 = (9, "Complet - avec 2 rappel de moins de 3 mois")
|
|
CM36MAR2 = (10, "Complet - avec 2 rappel entre 3 mois et 6 mois")
|
|
C6MAR2 = (11, "Complet - avec 2 rappel de 6 mois ou plus")
|
|
|
|
|
|
class AgeGroup(DreesEnum):
|
|
VERY_YOUNG = (0, "[0,19]")
|
|
YOUNG = (1, "[20,39]")
|
|
MID_OLD = (2, "[40,59]")
|
|
OLD = (3, "[60,79]")
|
|
VERY_OLD = (4, "[80;+]")
|
|
|
|
|
|
# namedtuple used to store stats (could be better...)
|
|
VaccineMean = namedtuple("VaccineMean", ["age", "field", "percent"])
|
|
AgeMean = namedtuple("AgeMean", ["age", "field", "percent"])
|
|
|
|
|
|
def get_data(
|
|
file_path: Optional[str] = None,
|
|
extension: Optional[str] = "json",
|
|
refresh=False,
|
|
) -> Dict[str, Any]:
|
|
"""
|
|
collect covid data by age from DREES
|
|
src: DATA_URL
|
|
"""
|
|
os.makedirs(DATA_REPOSITORY, exist_ok=True)
|
|
data_url = DATA_URL.format(extension=extension)
|
|
if data_url.endswith("/"):
|
|
data_url = data_url[:-1]
|
|
file_path = (
|
|
os.path.join(DATA_REPOSITORY, data_url.split("/")[-1])
|
|
if file_path is None
|
|
else file_path
|
|
)
|
|
if not os.path.isfile(file_path) or refresh:
|
|
logging.info("fetching data...")
|
|
r = requests.get(data_url)
|
|
if not r.content:
|
|
raise ValueError("no data provided froim the url : {}".format(data_url))
|
|
with open(file_path, "wb") as f:
|
|
f.write(r.content)
|
|
return json.loads(r.content)
|
|
logging.info(f"opening {file_path}...")
|
|
return json.load(open(file_path, "rb"))
|
|
|
|
|
|
def get_enum_vac_status(value):
|
|
for vac_status in VacStatus:
|
|
if vac_status.label == value:
|
|
return vac_status.value
|
|
raise Exception(f"vac status : {value} does not exit in enum 'VacStatus'")
|
|
|
|
|
|
def get_enum_age(value):
|
|
for age_group in AgeGroup:
|
|
if age_group.label == value:
|
|
return age_group.value
|
|
raise Exception(f"age : {value} does not exit in enum 'AgeGroup'")
|
|
|
|
|
|
def get_enum_field(value):
|
|
for field in Field:
|
|
if field.name.lower() == value:
|
|
return field.value
|
|
for quota in Quota:
|
|
if quota.name.lower() == value:
|
|
return quota.value
|
|
raise Exception(f"field : {value} does not exit in enum 'Field'")
|
|
|
|
|
|
def structure_data(data: Dict[str, Any]) -> Dict[dt, Any]:
|
|
"""
|
|
struture the original dictionnary into a more readable one
|
|
'date': {
|
|
'age' : {
|
|
'vac_status' : {
|
|
'hc',
|
|
'sc',
|
|
'dc',
|
|
...
|
|
}
|
|
}
|
|
}
|
|
"""
|
|
logging.info("restructuring the data...")
|
|
dic_data: Dict[dt, Any] = OrderedDict()
|
|
for row in data["records"]:
|
|
row_fields = row["fields"]
|
|
date = dt.strptime(row_fields["date"], DATE_FORMAT)
|
|
age = row_fields["age"]
|
|
vac_status = row_fields["vac_statut"]
|
|
if date not in dic_data:
|
|
dic_data[date] = OrderedDict()
|
|
if age not in dic_data[date]:
|
|
dic_data[date][age] = OrderedDict()
|
|
if vac_status not in dic_data[date][age]:
|
|
dic_data[date][age][vac_status] = OrderedDict()
|
|
for field in Field:
|
|
field_name = field.name.lower()
|
|
dic_data[date][age][vac_status][field_name] = row_fields[field_name]
|
|
for quota in Quota:
|
|
quota_name = quota.name.lower()
|
|
dic_data[date][age][vac_status][quota_name] = row_fields[quota_name]
|
|
# order `dic_data` date keys in ascending order
|
|
dic_data = OrderedDict(sorted(dic_data.items(), key=lambda t: t[0]))
|
|
logging.info("data restructured")
|
|
return dic_data
|
|
|
|
|
|
def get_np_data(dic_data: Dict[dt, Any]) -> Tuple[np.ndarray, np.ndarray]:
|
|
"""
|
|
store the data in numpy data structure
|
|
"""
|
|
logging.info("storing data in numpy data structure...")
|
|
np_data = np.empty(
|
|
(len(dic_data), len(AgeGroup), len(VacStatus), len(Field) + len(Quota))
|
|
)
|
|
np_date = np.empty((len(dic_data)), dtype="datetime64[s]")
|
|
for idx_date, (date, dic_age) in enumerate(dic_data.items()):
|
|
np_date[idx_date] = date
|
|
for age, dic_vac in dic_age.items():
|
|
idx_age = get_enum_age(age)
|
|
for vac, dic_field in dic_vac.items():
|
|
idx_vac = get_enum_vac_status(vac)
|
|
for field, value in dic_field.items():
|
|
idx_field = get_enum_field(field)
|
|
np_data[idx_date, idx_age, idx_vac, idx_field] = value
|
|
logging.info("date and data generated")
|
|
date_start = np_date[0]
|
|
date_end = np_date[len(np_date) - 1]
|
|
logging.info(f"range period : {date_start} - {date_end}")
|
|
# set 'effectif' equals to 0 if effectif < 1 (0.04 means nothing...)
|
|
quota_mask = np_data[:, :, :, 3] < 1
|
|
np_data[quota_mask] = 0
|
|
return np_data, np_date
|
|
|
|
|
|
def split_by_vac_status(np_data: np.ndarray) -> Tuple[np.ndarray, np.ndarray]:
|
|
"""
|
|
split data to get vaccine data (all vaccine status) and unvaccine data (no vaccine)
|
|
"""
|
|
return np.sum(np_data[:, :, 1:, :], axis=2), np_data[:, :, VacStatus.NC.value, :]
|
|
|
|
|
|
def get_vaccine_status_distribution(
|
|
np_data: np.ndarray,
|
|
) -> Tuple[np.ndarray, np.ndarray]:
|
|
"""
|
|
get the vaccine distribution over the whole time period by age and field
|
|
the vaccine data holds all the vaccine status except unvaccine
|
|
"""
|
|
np_data_vac, np_data_unvac = split_by_vac_status(np_data)
|
|
np_vac_distri = np_data_vac / np.sum(np_data, axis=2)
|
|
np_unvac_distri = np_data_unvac / np.sum(np_data, axis=2)
|
|
return np_vac_distri, np_unvac_distri
|
|
|
|
|
|
def get_distribution_age_by_field_and_vac_status(
|
|
np_data: np.ndarray, field: Field
|
|
) -> Tuple[np.ndarray, np.ndarray]:
|
|
"""
|
|
get distribution age (percent) by field grouped by vaccine status
|
|
"""
|
|
np_age_vac_percent = np.empty((len(np_data), len(AgeGroup)))
|
|
np_age_unvac_percent = np.copy(np_age_vac_percent)
|
|
np_data_vac, np_data_unvac = split_by_vac_status(np_data)
|
|
for idx_date in range(len(np_data_vac)):
|
|
sum_effectif = np.nansum(np_data_vac[idx_date, :, field.value])
|
|
for age_group in AgeGroup:
|
|
np_age_vac_percent[idx_date, age_group.value] = np.round(
|
|
(np_data_vac[idx_date, age_group.value, field.value] / sum_effectif)
|
|
* 100,
|
|
2,
|
|
)
|
|
for idx_date in range(len(np_data_unvac)):
|
|
sum_effectif = np.nansum(np_data_unvac[idx_date, :, field.value])
|
|
for age_group in AgeGroup:
|
|
np_age_unvac_percent[idx_date, age_group.value] = np.round(
|
|
(np_data_unvac[idx_date, age_group.value, field.value] / sum_effectif)
|
|
* 100,
|
|
2,
|
|
)
|
|
return np_age_vac_percent, np_age_unvac_percent
|
|
|
|
|
|
def get_distribution_age_by_field(np_data: np.ndarray, field: Field) -> np.ndarray:
|
|
"""
|
|
get age distribution (percent) over the whole period by field
|
|
"""
|
|
np_percent_age = np.empty((len(np_data), len(AgeGroup)))
|
|
for idx_date in range(len(np_data)):
|
|
sum_effectif = np.nansum(
|
|
np.nansum(np_data[idx_date, :, :, field.value], axis=1)
|
|
)
|
|
for age_group in AgeGroup:
|
|
np_percent_age[idx_date, age_group.value] = np.round(
|
|
(
|
|
np.sum(np_data[idx_date, age_group.value, :, field.value], axis=0)
|
|
/ sum_effectif
|
|
)
|
|
* 100,
|
|
2,
|
|
)
|
|
return np_percent_age
|
|
|
|
|
|
def get_plot_fig(
|
|
grid: Optional[bool] = True,
|
|
date_format: Optional[str] = DATE_FORMAT,
|
|
figsize: Optional[Tuple[int, int]] = None,
|
|
locator: Optional[Any] = md.MonthLocator(),
|
|
auto_date_fmt: Optional[bool] = True,
|
|
) -> plt.figure:
|
|
"""
|
|
return pyplot fig, ax to plot data over range period with date formatting
|
|
"""
|
|
fig, ax = plt.subplots(figsize=figsize)
|
|
ax.grid(grid)
|
|
date_formatter = md.DateFormatter(date_format)
|
|
ax.xaxis.set_major_locator(locator)
|
|
ax.xaxis.set_major_formatter(date_formatter)
|
|
if auto_date_fmt:
|
|
fig.autofmt_xdate()
|
|
return fig, ax
|
|
|
|
|
|
def save_and_close_fig(
|
|
fig: plt.figure,
|
|
output_path: str,
|
|
has_legend: Optional[bool] = True,
|
|
is_tight: Optional[bool] = True,
|
|
):
|
|
logging.info(f"plotting : {output_path}...")
|
|
if has_legend:
|
|
plt.legend()
|
|
if is_tight:
|
|
plt.tight_layout()
|
|
plt.savefig(f"{output_path}{OUTPUT_SUFFIX}")
|
|
plt.close(fig)
|
|
logging.info(f"{output_path} plotted")
|
|
|
|
|
|
def analyse(np_data: np.ndarray) -> List[Union[VaccineMean, AgeMean]]:
|
|
"""
|
|
analyse DREES dataset
|
|
useful stats can be compute here if no plots needed
|
|
"""
|
|
logging.info("analysing data...")
|
|
lst_analyse_data: List[Union[VaccineMean, AgeMean]] = list()
|
|
np_vac_distri, _ = get_vaccine_status_distribution(np_data)
|
|
|
|
logging.info(
|
|
"--- field distribution by age and only vaccine status (averaged over the whole period) ---"
|
|
)
|
|
for age_group in AgeGroup:
|
|
for field in Field:
|
|
vac_percent_mean = np.round(
|
|
np.nanmean(np_vac_distri[:, age_group.value, field.value]) * 100, 2
|
|
)
|
|
print(f"{field.name} - {age_group.label} - vac : {vac_percent_mean}%")
|
|
lst_analyse_data.append(
|
|
VaccineMean(age_group.label, field.label, vac_percent_mean)
|
|
)
|
|
|
|
logging.info(
|
|
"--- age distribution by field and vac status (averaged over the whole period) ---"
|
|
)
|
|
for field in Field:
|
|
np_age_percent = get_distribution_age_by_field(np_data, field)
|
|
(
|
|
np_percent_age_vac,
|
|
np_percent_age_unvac,
|
|
) = get_distribution_age_by_field_and_vac_status(np_data, field)
|
|
for age_group in AgeGroup:
|
|
percent_age_mean = np.round(
|
|
np.nanmean(np_age_percent[:, age_group.value]), 2
|
|
)
|
|
print(f"age: {age_group.label} - field: {field.name} = {percent_age_mean}%")
|
|
lst_analyse_data.append(
|
|
AgeMean(age_group.label, field.label, percent_age_mean)
|
|
)
|
|
|
|
percent_age_vac_mean = np.round(
|
|
np.nanmean(np_percent_age_vac[:, age_group.value]), 2
|
|
)
|
|
print(
|
|
f"age: {age_group.label} - status: vac - field: {field.name} = {percent_age_vac_mean}%"
|
|
)
|
|
|
|
percent_age_unvac_mean = np.round(
|
|
np.nanmean(np_percent_age_unvac[:, age_group.value]), 2
|
|
)
|
|
print(
|
|
f"age: {age_group.label} - status: unvac - field: {field.name} = {percent_age_unvac_mean}%"
|
|
)
|
|
return lst_analyse_data
|
|
|
|
|
|
def plot_bar_age_distribution_by_field_and_vac_status(
|
|
np_data: np.ndarray,
|
|
np_date: np.ndarray,
|
|
field: Field,
|
|
is_vac: Optional[bool] = True,
|
|
) -> None:
|
|
"""
|
|
plot age distribution distribution (percent) by field and vaccine status
|
|
"""
|
|
fig, ax = get_plot_fig(figsize=(22, 8), locator=md.WeekdayLocator())
|
|
bottom = np_data[:, 0]
|
|
suffix = "vac" if is_vac else "unvac"
|
|
title = "Vaccinés" if is_vac else "Non vaccinés"
|
|
for age_group in AgeGroup:
|
|
percents_age = np_data[:, age_group.value]
|
|
if age_group.value > 0:
|
|
ax.bar(
|
|
np_date,
|
|
percents_age,
|
|
label=age_group.label,
|
|
bottom=bottom,
|
|
color=AGE_COLORS[age_group.value],
|
|
)
|
|
bottom += percents_age
|
|
else:
|
|
ax.bar(
|
|
np_date,
|
|
percents_age,
|
|
label=age_group.label,
|
|
color=AGE_COLORS[age_group.value],
|
|
)
|
|
|
|
ax.set_ylabel("%")
|
|
ax.set_title(f"{field.label} - {title}")
|
|
plt.legend(
|
|
[age_group.label for age_group in AgeGroup], loc="upper right", frameon=True
|
|
)
|
|
save_and_close_fig(
|
|
fig,
|
|
os.path.join(OUTPUT_REPOSITORY, f"age_percent_{suffix}_{field.name.lower()}"),
|
|
has_legend=False,
|
|
)
|
|
|
|
|
|
def plot_bar_age_distribution_by_field(
|
|
np_data: np.ndarray, np_date: np.ndarray, field: Field
|
|
) -> None:
|
|
"""
|
|
plot age distribution (percent) by field
|
|
"""
|
|
(
|
|
np_age_vac_percent,
|
|
np_age_unvac_percent,
|
|
) = get_distribution_age_by_field_and_vac_status(np_data, field)
|
|
plot_bar_age_distribution_by_field_and_vac_status(
|
|
np_age_vac_percent, np_date, field
|
|
)
|
|
plot_bar_age_distribution_by_field_and_vac_status(
|
|
np_age_unvac_percent, np_date, field, is_vac=False
|
|
)
|
|
|
|
|
|
def plot_cumulative_field(
|
|
np_data: np.ndarray, np_date: np.ndarray, field: Field
|
|
) -> None:
|
|
"""
|
|
plot cumulative field by age and vaccine status (cases per million)
|
|
"""
|
|
np_data_vac, np_data_unvac = split_by_vac_status(np_data)
|
|
for age_group in AgeGroup:
|
|
fig, _ = get_plot_fig(auto_date_fmt=False)
|
|
np_cumulate_vac: np.ndarray = np.cumsum(
|
|
np_data_vac[:, age_group.value, field.value], axis=0
|
|
)
|
|
np_cumulate_unvac: np.ndarray = np.cumsum(
|
|
np_data_unvac[:, age_group.value, field.value], axis=0
|
|
)
|
|
plt.plot(np_date, np_cumulate_vac, label=f"Vaccinés")
|
|
plt.plot(np_date, np_cumulate_unvac, label=f"Non vaccinés")
|
|
|
|
plt.title(f"{age_group.label} - {field.label}")
|
|
plt.xlabel("Date")
|
|
plt.ylabel("Nombre de cas")
|
|
plt.xticks(rotation=30)
|
|
save_and_close_fig(
|
|
fig,
|
|
os.path.join(
|
|
OUTPUT_REPOSITORY,
|
|
f"cumulative_{age_group.name.lower()}_{field.name.lower()}",
|
|
),
|
|
)
|
|
|
|
|
|
def plot_fields_by_age_vac(
|
|
np_data: np.ndarray, np_date: np.ndarray, age_group: AgeGroup, vac_status: VacStatus
|
|
) -> None:
|
|
"""
|
|
plot field data by age and vaccine status (cases per million)
|
|
"""
|
|
fig, _ = get_plot_fig(auto_date_fmt=False)
|
|
|
|
for field in Field:
|
|
np_result = (
|
|
10e6
|
|
* np_data[:, age_group.value, vac_status.value, field.value]
|
|
/ np_data[:, age_group.value, vac_status.value, Quota.EFFECTIF.value]
|
|
)
|
|
plt.plot(
|
|
np_date, np_result, label=f"{field.label}", linestyle="dotted", linewidth=2
|
|
)
|
|
plt.xlabel("Date")
|
|
plt.ylabel("Cas par million de personnes")
|
|
plt.xticks(rotation=30)
|
|
plt.title(f"{age_group.label} - {vac_status.label}")
|
|
|
|
save_and_close_fig(
|
|
fig,
|
|
os.path.join(
|
|
OUTPUT_REPOSITORY, f"all_{age_group.name.lower()}_{vac_status.name.lower()}"
|
|
),
|
|
)
|
|
|
|
|
|
def plot_bar_vaccine_status_distribution_by_age_field(
|
|
np_data: np.ndarray,
|
|
np_date: np.ndarray,
|
|
age_group: AgeGroup,
|
|
field: Field,
|
|
) -> None:
|
|
"""
|
|
display vaccine/unvaccine distribution (percent) over the whole period by age and field
|
|
"""
|
|
np_vac_distri, np_unvac_distri = get_vaccine_status_distribution(np_data)
|
|
# adjust the fig size to display correctly bars and labels
|
|
fig, ax = get_plot_fig(figsize=(22, 8), locator=md.WeekdayLocator())
|
|
|
|
for idx_date, date in enumerate(np_date):
|
|
vac_percent = np.round(
|
|
np_vac_distri[idx_date, age_group.value, field.value] * 100, 2
|
|
)
|
|
unvac_percent = np.round(
|
|
np_unvac_distri[idx_date, age_group.value, field.value] * 100, 2
|
|
)
|
|
bar_vac = ax.bar(date, vac_percent, color="b", label="Vaccinés")
|
|
ax.bar(date, unvac_percent, bottom=vac_percent, color="r", label="Non vaccinés")
|
|
if vac_percent not in (0, 100):
|
|
ax.bar_label(
|
|
bar_vac, label_type="edge", color="black", fontsize="6.5", fmt="%.0f"
|
|
)
|
|
|
|
ax.set_ylabel("%")
|
|
ax.set_title(f"{age_group.label} - {field.label}")
|
|
|
|
plt.legend(["Vaccinés", "Non vaccinés"], loc="upper right", frameon=True)
|
|
|
|
save_and_close_fig(
|
|
fig,
|
|
os.path.join(
|
|
OUTPUT_REPOSITORY,
|
|
f"vac_percent_{age_group.name.lower()}_{field.name.lower()}",
|
|
),
|
|
has_legend=False,
|
|
)
|
|
|
|
|
|
def check_timestep(np_date: np.ndarray):
|
|
# get the difference between each element (return timedelta64 array)
|
|
np_diff = np.diff(np_date)
|
|
# check if all timestep are equals
|
|
assert np.all(np_diff == np_diff[0]), "some timesteps missing !"
|
|
|
|
|
|
def get_age_vac_args() -> List[Tuple[AgeGroup, VacStatus]]:
|
|
"""
|
|
build pool age and vac status arguments
|
|
"""
|
|
pool_args: List[Tuple[AgeGroup, VacStatus]] = list()
|
|
for age_group in AgeGroup:
|
|
for vac_status in VacStatus:
|
|
pool_args.append((age_group, vac_status))
|
|
return pool_args
|
|
|
|
|
|
def get_age_field_args() -> List[Tuple[AgeGroup, Field]]:
|
|
"""
|
|
build pool age and field arguments
|
|
"""
|
|
pool_args: List[Tuple[AgeGroup, Field]] = list()
|
|
for age_group in AgeGroup:
|
|
for field in Field:
|
|
pool_args.append((age_group, field))
|
|
return pool_args
|
|
|
|
|
|
def get_field_args() -> List[Tuple[Field]]:
|
|
"""
|
|
build pool field arguments
|
|
"""
|
|
pool_args: List[Tuple[Field]] = list()
|
|
for field in Field:
|
|
pool_args.append((field,))
|
|
return pool_args
|
|
|
|
|
|
def move_tmp_plots() -> None:
|
|
"""
|
|
move .tmp.png plots into .png after generation
|
|
"""
|
|
logging.info(f"moving '{OUTPUT_SUFFIX}' file in {FORMAT_SUFFIX}...")
|
|
for filename in os.listdir(OUTPUT_REPOSITORY):
|
|
file_path = os.path.join(OUTPUT_REPOSITORY, filename)
|
|
if re.match(TMP_FILE_REGEX, filename):
|
|
os.rename(file_path, file_path.replace(OUTPUT_SUFFIX, FORMAT_SUFFIX))
|
|
logging.info("files moved")
|
|
|
|
|
|
def generate_html_page(
|
|
np_date: np.ndarray, lst_analyse_data: List[Union[VaccineMean, AgeMean]]
|
|
) -> None:
|
|
logging.info("generating html page with plots...")
|
|
os.makedirs(BUILD_REPOSITORY, exist_ok=True)
|
|
env = Environment(
|
|
loader=FileSystemLoader("templates"), autoescape=select_autoescape()
|
|
)
|
|
template = env.get_template("index.template.html")
|
|
date_start = np_date[0].astype(dt).strftime(DATE_FORMAT)
|
|
date_end = np_date[-1].astype(dt).strftime(DATE_FORMAT)
|
|
|
|
date_build = dt.strftime(dt.now(), "%Y%m%d")
|
|
owid_path = f"fra-{date_build}.png"
|
|
data = template.render(
|
|
**{
|
|
"fields": Field,
|
|
"ages": AgeGroup,
|
|
"status": VacStatus,
|
|
"static": os.path.join(MAIN_URL, STATIC_REPOSITORY),
|
|
"src": DATA_URL,
|
|
"period": f"{date_start} - {date_end}",
|
|
"vaccine_mean": [x for x in lst_analyse_data if type(x) == VaccineMean],
|
|
"age_mean": [x for x in lst_analyse_data if type(x) == AgeMean],
|
|
"owid_path": owid_path
|
|
if os.path.isfile(os.path.join(OUTPUT_REPOSITORY, owid_path))
|
|
else "",
|
|
}
|
|
)
|
|
with open(os.path.join(BUILD_REPOSITORY, "index.html"), "w") as f:
|
|
f.write(data)
|
|
logging.info("html page build")
|
|
|
|
|
|
if __name__ == "__main__":
|
|
"""
|
|
This script aims to analyse and plot DREES data
|
|
Stats availables:
|
|
- Age distribution (percent) by field (vaccine and unvaccine)
|
|
- Vaccine/unvaccine distribution (percent) by field and age
|
|
Plots availables :
|
|
- cumulative hc, sc, dc by age and vaccine status
|
|
- hc, sc, dc by vaccine status and age (cases per million)
|
|
- hc, sc, dc (vaccine/unvaccine percent distribution) by age
|
|
- hc, sc, dc (age percent distribution) by field
|
|
Main indicators are :
|
|
- hospitalisations (hc)
|
|
- criticals (sc)
|
|
- deaths (dc)
|
|
hc, sc, dc include positive PCR tests
|
|
"""
|
|
|
|
parser = argparse.ArgumentParser()
|
|
parser.add_argument(
|
|
"-r",
|
|
"--refresh",
|
|
action="store_true",
|
|
default=False,
|
|
help="redownload data for updates",
|
|
)
|
|
parser.add_argument(
|
|
"-np",
|
|
"--no-plot",
|
|
action="store_true",
|
|
default=False,
|
|
help="no plot data",
|
|
)
|
|
parser.add_argument(
|
|
"-th",
|
|
"--to-html",
|
|
action="store_true",
|
|
default=False,
|
|
help="create an html with the plots",
|
|
)
|
|
|
|
args = parser.parse_args()
|
|
|
|
dic_data_unstructured: Dict[str, Any] = get_data(
|
|
file_path=os.path.join(DATA_REPOSITORY, "drees.json"), refresh=args.refresh
|
|
)
|
|
dic_data: Dict[dt, Any] = structure_data(dic_data_unstructured)
|
|
np_data, np_date = get_np_data(dic_data)
|
|
|
|
lst_analyse_data = analyse(np_data)
|
|
check_timestep(np_date)
|
|
|
|
if not args.no_plot:
|
|
os.makedirs(OUTPUT_REPOSITORY, exist_ok=True)
|
|
|
|
f_fields = partial(plot_fields_by_age_vac, np_data, np_date)
|
|
f_bars_vaccine = partial(
|
|
plot_bar_vaccine_status_distribution_by_age_field, np_data, np_date
|
|
)
|
|
f_bars_age = partial(plot_bar_age_distribution_by_field, np_data, np_date)
|
|
f_cumulate = partial(plot_cumulative_field, np_data, np_date)
|
|
|
|
with Pool(2) as pool:
|
|
pool.starmap(f_fields, get_age_vac_args())
|
|
pool.starmap(f_bars_vaccine, get_age_field_args())
|
|
pool.starmap(f_bars_age, get_field_args())
|
|
pool.starmap(f_cumulate, get_field_args())
|
|
|
|
move_tmp_plots()
|
|
|
|
if args.to_html:
|
|
generate_html_page(np_date, lst_analyse_data)
|