field by age and vaccine status in case per million + rename functions + fix functions doc
This commit is contained in:
parent
88854c8631
commit
fc7b84da28
216
drees.py
216
drees.py
@ -61,6 +61,10 @@ class Field(DreesEnum):
|
||||
DC = (2, "Décés")
|
||||
|
||||
|
||||
class Quota(DreesEnum):
|
||||
EFFECTIF = (0 + len(Field), "Effectif")
|
||||
|
||||
|
||||
class VacStatus(DreesEnum):
|
||||
NC = (0, "Non-vaccinés")
|
||||
PDR = (1, "Primo dose récente")
|
||||
@ -81,6 +85,7 @@ class AgeGroup(DreesEnum):
|
||||
VERY_OLD = (4, "[80;+]")
|
||||
|
||||
|
||||
# namedtuple used to store stats (could be better...)
|
||||
VaccineMean = namedtuple("VaccineMean", ["age", "field", "percent"])
|
||||
AgeMean = namedtuple("AgeMean", ["age", "field", "percent"])
|
||||
|
||||
@ -92,6 +97,7 @@ def get_data(
|
||||
) -> Dict[str, Any]:
|
||||
"""
|
||||
collect covid data by age from DREES
|
||||
src: DATA_URL
|
||||
"""
|
||||
os.makedirs(DATA_REPOSITORY, exist_ok=True)
|
||||
data_url = DATA_URL.format(extension=extension)
|
||||
@ -130,11 +136,14 @@ def get_enum_field(value):
|
||||
for field in Field:
|
||||
if field.name.lower() == value:
|
||||
return field.value
|
||||
for quota in Quota:
|
||||
if quota.name.lower() == value:
|
||||
return quota.value
|
||||
|
||||
|
||||
def group_by_age_date(data: Dict[str, Any]) -> Dict[dt, Any]:
|
||||
def structure_data(data: Dict[str, Any]) -> Dict[dt, Any]:
|
||||
"""
|
||||
group the original dictionnary into a more readable one
|
||||
struture the original dictionnary into a more readable one
|
||||
'date': {
|
||||
'age' : {
|
||||
'vac_status' : {
|
||||
@ -147,35 +156,38 @@ def group_by_age_date(data: Dict[str, Any]) -> Dict[dt, Any]:
|
||||
}
|
||||
"""
|
||||
logging.info("restructuring the data...")
|
||||
dic_data_grouped: Dict[dt, Any] = OrderedDict()
|
||||
dic_data: Dict[dt, Any] = OrderedDict()
|
||||
for row in data["records"]:
|
||||
row_fields = row["fields"]
|
||||
date = dt.strptime(row_fields["date"], DATE_FORMAT)
|
||||
age = row_fields["age"]
|
||||
vac_status = row_fields["vac_statut"]
|
||||
if date not in dic_data_grouped:
|
||||
dic_data_grouped[date] = OrderedDict()
|
||||
if age not in dic_data_grouped[date]:
|
||||
dic_data_grouped[date][age] = OrderedDict()
|
||||
if vac_status not in dic_data_grouped[date][age]:
|
||||
dic_data_grouped[date][age][vac_status] = OrderedDict()
|
||||
if date not in dic_data:
|
||||
dic_data[date] = OrderedDict()
|
||||
if age not in dic_data[date]:
|
||||
dic_data[date][age] = OrderedDict()
|
||||
if vac_status not in dic_data[date][age]:
|
||||
dic_data[date][age][vac_status] = OrderedDict()
|
||||
for field in Field:
|
||||
field_name = field.name.lower()
|
||||
dic_data_grouped[date][age][vac_status][field_name] = row_fields[field_name]
|
||||
dic_data[date][age][vac_status][field_name] = row_fields[field_name]
|
||||
for quota in Quota:
|
||||
quota_name = quota.name.lower()
|
||||
dic_data[date][age][vac_status][quota_name] = row_fields[quota_name]
|
||||
logging.info("data restructured")
|
||||
return dic_data_grouped
|
||||
return dic_data
|
||||
|
||||
|
||||
def get_np_data(dic_data_grouped: Dict[dt, Any]) -> Tuple[np.ndarray, np.ndarray]:
|
||||
def get_np_data(dic_data: Dict[dt, Any]) -> Tuple[np.ndarray, np.ndarray]:
|
||||
"""
|
||||
store the data in numpy data structure helped by Enum
|
||||
store the data in numpy data structure
|
||||
"""
|
||||
logging.info("storing data in numpy data structure...")
|
||||
np_data = np.empty(
|
||||
(len(dic_data_grouped), len(AgeGroup), len(VacStatus), len(Field))
|
||||
(len(dic_data), len(AgeGroup), len(VacStatus), len(Field) + len(Quota))
|
||||
)
|
||||
np_date = np.empty((len(dic_data_grouped)), dtype="datetime64[s]")
|
||||
for idx_date, (date, dic_age) in enumerate(dic_data_grouped.items()):
|
||||
np_date = np.empty((len(dic_data)), dtype="datetime64[s]")
|
||||
for idx_date, (date, dic_age) in enumerate(dic_data.items()):
|
||||
np_date[idx_date] = date
|
||||
for age, dic_vac in dic_age.items():
|
||||
idx_age = get_enum_age(age)
|
||||
@ -192,33 +204,38 @@ def get_np_data(dic_data_grouped: Dict[dt, Any]) -> Tuple[np.ndarray, np.ndarray
|
||||
|
||||
|
||||
def split_by_vac_status(np_data: np.ndarray) -> Tuple[np.ndarray, np.ndarray]:
|
||||
"""
|
||||
split data to get vaccine data (all vaccine status) and unvaccine data (no vaccine)
|
||||
"""
|
||||
return np.sum(np_data[:, :, 1:, :], axis=2), np_data[:, :, VacStatus.NC.value, :]
|
||||
|
||||
|
||||
def get_vaccine_percent(np_data: np.ndarray) -> Tuple[np.ndarray, np.ndarray]:
|
||||
def get_vaccine_status_distribution(
|
||||
np_data: np.ndarray,
|
||||
) -> Tuple[np.ndarray, np.ndarray]:
|
||||
"""
|
||||
get the vaccine percent per date, age and field
|
||||
get the vaccine distribution over the whole time period by age and field
|
||||
the vaccine data holds all the vaccine status except unvaccine
|
||||
"""
|
||||
np_data_vac, np_data_unvac = split_by_vac_status(np_data)
|
||||
np_percent_vac = np_data_vac / np.sum(np_data, axis=2)
|
||||
np_percent_unvac = np_data_unvac / np.sum(np_data, axis=2)
|
||||
return np_percent_vac, np_percent_unvac
|
||||
np_vac_distri = np_data_vac / np.sum(np_data, axis=2)
|
||||
np_unvac_distri = np_data_unvac / np.sum(np_data, axis=2)
|
||||
return np_vac_distri, np_unvac_distri
|
||||
|
||||
|
||||
def get_percent_age_by_date_field_vac_splited(
|
||||
def get_distribution_age_by_field_and_vac_status(
|
||||
np_data: np.ndarray, field: Field
|
||||
) -> Tuple[np.ndarray, np.ndarray]:
|
||||
"""
|
||||
get numpy percent age grouped by date and field splited by vaccine status
|
||||
get distribution age (percent) by field grouped by vaccine status
|
||||
"""
|
||||
np_percent_age_vac = np.empty((len(np_data), len(AgeGroup)))
|
||||
np_percent_age_unvac = np.copy(np_percent_age_vac)
|
||||
np_age_vac_percent = np.empty((len(np_data), len(AgeGroup)))
|
||||
np_age_unvac_percent = np.copy(np_age_vac_percent)
|
||||
np_data_vac, np_data_unvac = split_by_vac_status(np_data)
|
||||
for idx_date in range(len(np_data_vac)):
|
||||
sum_effectif = np.nansum(np_data_vac[idx_date, :, field.value])
|
||||
for age_group in AgeGroup:
|
||||
np_percent_age_vac[idx_date, age_group.value] = np.round(
|
||||
np_age_vac_percent[idx_date, age_group.value] = np.round(
|
||||
(np_data_vac[idx_date, age_group.value, field.value] / sum_effectif)
|
||||
* 100,
|
||||
2,
|
||||
@ -226,17 +243,17 @@ def get_percent_age_by_date_field_vac_splited(
|
||||
for idx_date in range(len(np_data_unvac)):
|
||||
sum_effectif = np.nansum(np_data_unvac[idx_date, :, field.value])
|
||||
for age_group in AgeGroup:
|
||||
np_percent_age_unvac[idx_date, age_group.value] = np.round(
|
||||
np_age_unvac_percent[idx_date, age_group.value] = np.round(
|
||||
(np_data_unvac[idx_date, age_group.value, field.value] / sum_effectif)
|
||||
* 100,
|
||||
2,
|
||||
)
|
||||
return np_percent_age_vac, np_percent_age_unvac
|
||||
return np_age_vac_percent, np_age_unvac_percent
|
||||
|
||||
|
||||
def get_percent_age_by_date_field(np_data: np.ndarray, field: Field) -> np.ndarray:
|
||||
def get_distribution_age_by_field(np_data: np.ndarray, field: Field) -> np.ndarray:
|
||||
"""
|
||||
get numpy percent age grouped by date and field
|
||||
get age distribution (percent) over the whole period by field
|
||||
"""
|
||||
np_percent_age = np.empty((len(np_data), len(AgeGroup)))
|
||||
for idx_date in range(len(np_data)):
|
||||
@ -291,33 +308,38 @@ def save_and_close_fig(
|
||||
|
||||
def analyse(np_data: np.ndarray) -> List[Union[VaccineMean, AgeMean]]:
|
||||
"""
|
||||
analyse data
|
||||
analyse DREES dataset
|
||||
useful stats can be compute here if no plots needed
|
||||
"""
|
||||
logging.info("analysing data...")
|
||||
lst_analyse_data: List[Union[VaccineMean, AgeMean]] = list()
|
||||
np_percent_vac, _ = get_vaccine_percent(np_data)
|
||||
np_vac_distri, _ = get_vaccine_status_distribution(np_data)
|
||||
|
||||
logging.info("--- field by age vaccine mean percent ---")
|
||||
logging.info(
|
||||
"--- field distribution by age and only vaccine status (averaged over the whole period) ---"
|
||||
)
|
||||
for age_group in AgeGroup:
|
||||
for field in Field:
|
||||
mean_vac_percent = np.round(
|
||||
np.nanmean(np_percent_vac[:, age_group.value, field.value]) * 100, 2
|
||||
vac_percent_mean = np.round(
|
||||
np.nanmean(np_vac_distri[:, age_group.value, field.value]) * 100, 2
|
||||
)
|
||||
print(f"{field.name} - {age_group.label} - vac : {mean_vac_percent}%")
|
||||
print(f"{field.name} - {age_group.label} - vac : {vac_percent_mean}%")
|
||||
lst_analyse_data.append(
|
||||
VaccineMean(age_group.label, field.label, mean_vac_percent)
|
||||
VaccineMean(age_group.label, field.label, vac_percent_mean)
|
||||
)
|
||||
|
||||
logging.info("--- age by field and vac status mean percent ---")
|
||||
logging.info(
|
||||
"--- age distribution by field and vac status (averaged over the whole period) ---"
|
||||
)
|
||||
for field in Field:
|
||||
np_percent_age = get_percent_age_by_date_field(np_data, field)
|
||||
np_age_percent = get_distribution_age_by_field(np_data, field)
|
||||
(
|
||||
np_percent_age_vac,
|
||||
np_percent_age_unvac,
|
||||
) = get_percent_age_by_date_field_vac_splited(np_data, field)
|
||||
) = get_distribution_age_by_field_and_vac_status(np_data, field)
|
||||
for age_group in AgeGroup:
|
||||
percent_age_mean = np.round(
|
||||
np.nanmean(np_percent_age[:, age_group.value]), 2
|
||||
np.nanmean(np_age_percent[:, age_group.value]), 2
|
||||
)
|
||||
print(f"age: {age_group.label} - field: {field.name} = {percent_age_mean}%")
|
||||
lst_analyse_data.append(
|
||||
@ -340,18 +362,21 @@ def analyse(np_data: np.ndarray) -> List[Union[VaccineMean, AgeMean]]:
|
||||
return lst_analyse_data
|
||||
|
||||
|
||||
def plot_bar_age_percent_vac_status_by_field(
|
||||
np_data_vac_status: np.ndarray,
|
||||
def plot_bar_age_distribution_by_field_and_vac_status(
|
||||
np_data: np.ndarray,
|
||||
np_date: np.ndarray,
|
||||
field: Field,
|
||||
is_vac: Optional[bool] = True,
|
||||
) -> None:
|
||||
"""
|
||||
plot age distribution distribution (percent) by field and vaccine status
|
||||
"""
|
||||
fig, ax = get_plot_fig(figsize=(22, 8), locator=md.WeekdayLocator())
|
||||
bottom = np_data_vac_status[:, 0]
|
||||
bottom = np_data[:, 0]
|
||||
suffix = "vac" if is_vac else "unvac"
|
||||
title = "Vaccinés" if is_vac else "Non vaccinés"
|
||||
for age_group in AgeGroup:
|
||||
percents_age = np_data_vac_status[:, age_group.value]
|
||||
percents_age = np_data[:, age_group.value]
|
||||
if age_group.value > 0:
|
||||
ax.bar(
|
||||
np_date,
|
||||
@ -381,25 +406,30 @@ def plot_bar_age_percent_vac_status_by_field(
|
||||
)
|
||||
|
||||
|
||||
def plot_bar_age_percent_by_field(
|
||||
def plot_bar_age_distribution_by_field(
|
||||
np_data: np.ndarray, np_date: np.ndarray, field: Field
|
||||
) -> None:
|
||||
"""
|
||||
plot percent vaccinated field group by age bar diagram
|
||||
plot age distribution (percent) by field
|
||||
"""
|
||||
(
|
||||
np_percent_age_vac,
|
||||
np_percent_age_unvac,
|
||||
) = get_percent_age_by_date_field_vac_splited(np_data, field)
|
||||
plot_bar_age_percent_vac_status_by_field(np_percent_age_vac, np_date, field)
|
||||
plot_bar_age_percent_vac_status_by_field(
|
||||
np_percent_age_unvac, np_date, field, is_vac=False
|
||||
np_age_vac_percent,
|
||||
np_age_unvac_percent,
|
||||
) = get_distribution_age_by_field_and_vac_status(np_data, field)
|
||||
plot_bar_age_distribution_by_field_and_vac_status(
|
||||
np_age_vac_percent, np_date, field
|
||||
)
|
||||
plot_bar_age_distribution_by_field_and_vac_status(
|
||||
np_age_unvac_percent, np_date, field, is_vac=False
|
||||
)
|
||||
|
||||
|
||||
def plot_cumulative_field(
|
||||
np_data: np.ndarray, np_date: np.ndarray, field: Field
|
||||
) -> None:
|
||||
"""
|
||||
plot cumulative field by age and vaccine status (cases per million)
|
||||
"""
|
||||
np_data_vac, np_data_unvac = split_by_vac_status(np_data)
|
||||
for age_group in AgeGroup:
|
||||
fig, _ = get_plot_fig()
|
||||
@ -413,8 +443,8 @@ def plot_cumulative_field(
|
||||
plt.plot(np_date, np_cumulate_unvac, label=f"Non vaccinés")
|
||||
|
||||
plt.title(f"{age_group.label} - {field.label}")
|
||||
plt.xlabel("date")
|
||||
plt.ylabel("nombre")
|
||||
plt.xlabel("Date")
|
||||
plt.ylabel("Nombre de cas")
|
||||
save_and_close_fig(
|
||||
fig,
|
||||
os.path.join(
|
||||
@ -428,18 +458,20 @@ def plot_fields_by_age_vac(
|
||||
np_data: np.ndarray, np_date: np.ndarray, age_group: AgeGroup, vac_status: VacStatus
|
||||
) -> None:
|
||||
"""
|
||||
plot data by vaccine status, age and field
|
||||
plot field data by age and vaccine status (cases per million)
|
||||
"""
|
||||
fig, _ = get_plot_fig()
|
||||
|
||||
for field in Field:
|
||||
plt.plot(
|
||||
np_date,
|
||||
np_data[:, age_group.value, vac_status.value, field.value],
|
||||
10e6
|
||||
* np_data[:, age_group.value, vac_status.value, field.value]
|
||||
/ np_data[:, age_group.value, vac_status.value, Quota.EFFECTIF.value],
|
||||
label=f"{field.label}",
|
||||
)
|
||||
plt.xlabel("date")
|
||||
plt.ylabel("nombre")
|
||||
plt.xlabel("Date")
|
||||
plt.ylabel("Cas par million de personnes")
|
||||
plt.title(f"{age_group.label} - {vac_status.label}")
|
||||
|
||||
save_and_close_fig(
|
||||
@ -450,26 +482,25 @@ def plot_fields_by_age_vac(
|
||||
)
|
||||
|
||||
|
||||
def plot_bar_data_by_age_field(
|
||||
def plot_bar_vaccine_status_distribution_by_age_field(
|
||||
np_data: np.ndarray,
|
||||
np_date: np.ndarray,
|
||||
age_group: AgeGroup,
|
||||
field: Field,
|
||||
) -> None:
|
||||
"""
|
||||
display a bar graph by field and age over the data period
|
||||
bars display vaccine status percent
|
||||
display vaccine/unvaccine distribution (percent) over the whole period by age and field
|
||||
"""
|
||||
np_percent_vac, np_percent_unvac = get_vaccine_percent(np_data)
|
||||
np_vac_distri, np_unvac_distri = get_vaccine_status_distribution(np_data)
|
||||
# adjust the fig size to display correctly bars and labels
|
||||
fig, ax = get_plot_fig(figsize=(22, 8))
|
||||
|
||||
for idx_date in range(len(np_date)):
|
||||
vac_percent = np.round(
|
||||
np_percent_vac[idx_date, age_group.value, field.value] * 100, 2
|
||||
np_vac_distri[idx_date, age_group.value, field.value] * 100, 2
|
||||
)
|
||||
unvac_percent = np.round(
|
||||
np_percent_unvac[idx_date, age_group.value, field.value] * 100, 2
|
||||
np_unvac_distri[idx_date, age_group.value, field.value] * 100, 2
|
||||
)
|
||||
bar_vac = ax.bar(idx_date, vac_percent, color="b", label="Vaccinés")
|
||||
ax.bar(
|
||||
@ -505,7 +536,7 @@ def plot_bar_data_by_age_field(
|
||||
|
||||
def get_age_vac_args() -> List[Tuple[AgeGroup, VacStatus]]:
|
||||
"""
|
||||
get tuple arguments to plot fields data by age and vac status on multiprocess
|
||||
build pool age and vac status arguments
|
||||
"""
|
||||
pool_args: List[Tuple[AgeGroup, VacStatus]] = list()
|
||||
for age_group in AgeGroup:
|
||||
@ -516,7 +547,7 @@ def get_age_vac_args() -> List[Tuple[AgeGroup, VacStatus]]:
|
||||
|
||||
def get_age_field_args() -> List[Tuple[AgeGroup, Field]]:
|
||||
"""
|
||||
get tuple arguments to plot fields data by age and field on multiprocess
|
||||
build pool age and field arguments
|
||||
"""
|
||||
pool_args: List[Tuple[AgeGroup, Field]] = list()
|
||||
for age_group in AgeGroup:
|
||||
@ -525,6 +556,16 @@ def get_age_field_args() -> List[Tuple[AgeGroup, Field]]:
|
||||
return pool_args
|
||||
|
||||
|
||||
def get_field_args() -> List[Tuple[Field]]:
|
||||
"""
|
||||
build pool field arguments
|
||||
"""
|
||||
pool_args: List[Tuple[Field]] = list()
|
||||
for field in Field:
|
||||
pool_args.append((field,))
|
||||
return pool_args
|
||||
|
||||
|
||||
def move_tmp_plots() -> None:
|
||||
"""
|
||||
move .tmp.png plots into .png after generation
|
||||
@ -567,12 +608,15 @@ def generate_html_page(
|
||||
|
||||
if __name__ == "__main__":
|
||||
"""
|
||||
This script aims to plot DRESS data
|
||||
This script aims to analyse and plot DRESS data
|
||||
Stats availables:
|
||||
- Age distribution (percent) by field (vaccine and unvaccine)
|
||||
- Vaccine/unvaccine distribution (percent) by field and age
|
||||
Plots availables :
|
||||
- cumulative deaths by age
|
||||
- hc, sc, dc by vaccine status and age
|
||||
- hc, sc, dc (vaccine/unvaccine percent) by age
|
||||
- hc, sc, dc (age grouped percent) by field
|
||||
- cumulative hc, sc, dc by age and vaccine status
|
||||
- hc, sc, dc by vaccine status and age (cases per million)
|
||||
- hc, sc, dc (vaccine/unvaccine percent distribution) by age
|
||||
- hc, sc, dc (age percent distribution) by field
|
||||
Main indicators are :
|
||||
- hospitalisations (hc)
|
||||
- criticals (sc)
|
||||
@ -605,27 +649,29 @@ if __name__ == "__main__":
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
dic_data: Dict[str, Any] = get_data(
|
||||
dic_data_unstructured: Dict[str, Any] = get_data(
|
||||
file_path=os.path.join(DATA_REPOSITORY, "dress.json"), refresh=args.refresh
|
||||
)
|
||||
dic_data_grouped: Dict[dt, Any] = group_by_age_date(dic_data)
|
||||
np_data, np_date = get_np_data(dic_data_grouped)
|
||||
dic_data: Dict[dt, Any] = structure_data(dic_data_unstructured)
|
||||
np_data, np_date = get_np_data(dic_data)
|
||||
|
||||
lst_analyse_data = analyse(np_data)
|
||||
|
||||
if not args.no_plot:
|
||||
os.makedirs(OUTPUT_REPOSITORY, exist_ok=True)
|
||||
plot_fields_args = get_age_vac_args()
|
||||
f_fields = partial(plot_fields_by_age_vac, np_data, np_date)
|
||||
plot_vac_percent_age_args = get_age_field_args()
|
||||
f_bars = partial(plot_bar_data_by_age_field, np_data, np_date)
|
||||
with Pool(2) as pool:
|
||||
pool.starmap(f_fields, plot_fields_args)
|
||||
pool.starmap(f_bars, plot_vac_percent_age_args)
|
||||
|
||||
for field in Field:
|
||||
plot_cumulative_field(np_data, np_date, field)
|
||||
plot_bar_age_percent_by_field(np_data, np_date, field)
|
||||
f_fields = partial(plot_fields_by_age_vac, np_data, np_date)
|
||||
f_bars_vaccine = partial(
|
||||
plot_bar_vaccine_status_distribution_by_age_field, np_data, np_date
|
||||
)
|
||||
f_bars_age = partial(plot_bar_age_distribution_by_field, np_data, np_date)
|
||||
f_cumulate = partial(plot_cumulative_field, np_data, np_date)
|
||||
|
||||
with Pool(2) as pool:
|
||||
pool.starmap(f_fields, get_age_vac_args())
|
||||
pool.starmap(f_bars_vaccine, get_age_field_args())
|
||||
pool.starmap(f_bars_age, get_field_args())
|
||||
pool.starmap(f_cumulate, get_field_args())
|
||||
|
||||
move_tmp_plots()
|
||||
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user