add fields plot by vac status grouped by age
This commit is contained in:
		
							parent
							
								
									9c657a7887
								
							
						
					
					
						commit
						2357303295
					
				
							
								
								
									
										153
									
								
								drees.py
									
									
									
									
									
								
							
							
						
						
									
										153
									
								
								drees.py
									
									
									
									
									
								
							| @ -22,6 +22,15 @@ DATA_URL = "https://data.drees.solidarites-sante.gouv.fr/api/records/1.0/search/ | |||||||
| DATA_REPOSITORY = "data" | DATA_REPOSITORY = "data" | ||||||
| OUTPUT_REPOSITORY = "output" | OUTPUT_REPOSITORY = "output" | ||||||
| 
 | 
 | ||||||
|  | # cycler could be better, but for ages plots it's ok | ||||||
|  | AGE_COLORS = { | ||||||
|  |     0: "pink", | ||||||
|  |     1: "green", | ||||||
|  |     2: "blue", | ||||||
|  |     3: "red", | ||||||
|  |     4: "gray", | ||||||
|  | } | ||||||
|  | 
 | ||||||
| 
 | 
 | ||||||
| class DreesEnum(bytes, Enum): | class DreesEnum(bytes, Enum): | ||||||
|     def __new__(cls, value, label): |     def __new__(cls, value, label): | ||||||
| @ -65,7 +74,6 @@ def get_data( | |||||||
|     """ |     """ | ||||||
|     collect covid data by age from DREES |     collect covid data by age from DREES | ||||||
|     """ |     """ | ||||||
|     logging.info("fetching data...") |  | ||||||
|     os.makedirs(DATA_REPOSITORY, exist_ok=True) |     os.makedirs(DATA_REPOSITORY, exist_ok=True) | ||||||
|     data_url = DATA_URL.format(extension=extension) |     data_url = DATA_URL.format(extension=extension) | ||||||
|     if data_url.endswith("/"): |     if data_url.endswith("/"): | ||||||
| @ -76,12 +84,14 @@ def get_data( | |||||||
|         else file_path |         else file_path | ||||||
|     ) |     ) | ||||||
|     if not os.path.isfile(file_path) or refresh: |     if not os.path.isfile(file_path) or refresh: | ||||||
|  |         logging.info("fetching data...") | ||||||
|         r = requests.get(data_url) |         r = requests.get(data_url) | ||||||
|         if not r.content: |         if not r.content: | ||||||
|             raise ValueError("no data provided froim the url : {}".format(data_url)) |             raise ValueError("no data provided froim the url : {}".format(data_url)) | ||||||
|         with open(file_path, "wb") as f: |         with open(file_path, "wb") as f: | ||||||
|             f.write(r.content) |             f.write(r.content) | ||||||
|             return json.loads(r.content) |             return json.loads(r.content) | ||||||
|  |     logging.info(f"opening {file_path}...") | ||||||
|     return json.load(open(file_path, "rb")) |     return json.load(open(file_path, "rb")) | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
| @ -160,6 +170,49 @@ def get_np_data(dic_data_grouped: Dict[dt, Any]) -> Tuple[np.ndarray, np.ndarray | |||||||
|     return np_data, np_date |     return np_data, np_date | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
|  | def split_by_vac_status(np_data: np.ndarray) -> Tuple[np.ndarray, np.ndarray]: | ||||||
|  |     return np.sum(np_data[:, :, 1:, :], axis=2), np_data[:, :, VacStatus.NC.value, :] | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | def get_vaccine_percent(np_data: np.ndarray) -> Tuple[np.ndarray, np.ndarray]: | ||||||
|  |     """ | ||||||
|  |     get the vaccine percent per date, age and field | ||||||
|  |     the vaccine data holds all the vaccine status except unvaccine | ||||||
|  |     """ | ||||||
|  |     np_data_vac, np_data_unvac = split_by_vac_status(np_data) | ||||||
|  |     np_percent_vac = np_data_vac / np.sum(np_data, axis=2) | ||||||
|  |     np_percent_unvac = np_data_unvac / np.sum(np_data, axis=2) | ||||||
|  |     return np_percent_vac, np_percent_unvac | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | def get_percent_age_by_date_field( | ||||||
|  |     np_data: np.ndarray, field: Field | ||||||
|  | ) -> Tuple[np.ndarray, np.ndarray]: | ||||||
|  |     """ | ||||||
|  |     get numpy percent age grouped by date and field splited by vaccine status | ||||||
|  |     """ | ||||||
|  |     np_percent_age_vac = np.empty((len(np_data), len(AgeGroup))) | ||||||
|  |     np_percent_age_unvac = np.copy(np_percent_age_vac) | ||||||
|  |     np_data_vac, np_data_unvac = split_by_vac_status(np_data) | ||||||
|  |     for idx_date in range(len(np_data_vac)): | ||||||
|  |         sum_effectif = np.nansum(np_data_vac[idx_date, :, field.value]) | ||||||
|  |         for age_group in AgeGroup: | ||||||
|  |             np_percent_age_vac[idx_date, age_group.value] = np.round( | ||||||
|  |                 (np_data_vac[idx_date, age_group.value, field.value] / sum_effectif) | ||||||
|  |                 * 100, | ||||||
|  |                 2, | ||||||
|  |             ) | ||||||
|  |     for idx_date in range(len(np_data_unvac)): | ||||||
|  |         sum_effectif = np.nansum(np_data_unvac[idx_date, :, field.value]) | ||||||
|  |         for age_group in AgeGroup: | ||||||
|  |             np_percent_age_unvac[idx_date, age_group.value] = np.round( | ||||||
|  |                 (np_data_unvac[idx_date, age_group.value, field.value] / sum_effectif) | ||||||
|  |                 * 100, | ||||||
|  |                 2, | ||||||
|  |             ) | ||||||
|  |     return np_percent_age_vac, np_percent_age_unvac | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
| def get_plot_fig( | def get_plot_fig( | ||||||
|     grid: Optional[bool] = True, |     grid: Optional[bool] = True, | ||||||
|     date_format: Optional[str] = DATE_FORMAT, |     date_format: Optional[str] = DATE_FORMAT, | ||||||
| @ -193,8 +246,68 @@ def save_and_close_fig( | |||||||
|     logging.info(f"{output_path} plotted") |     logging.info(f"{output_path} plotted") | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
| def split_by_vac_status(np_data: np.ndarray) -> Tuple[np.ndarray, np.ndarray]: | def analyse(np_data: np.ndarray, np_date: np.ndarray) -> None: | ||||||
|     return np.sum(np_data[:, :, 1:, :], axis=2), np_data[:, :, VacStatus.NC.value, :] |     """ | ||||||
|  |     analyse data | ||||||
|  |     """ | ||||||
|  |     logging.info("analysing data...") | ||||||
|  |     np_percent_vac, _ = get_vaccine_percent(np_data) | ||||||
|  | 
 | ||||||
|  |     logging.info("--- vaccine mean percent ---") | ||||||
|  |     for age_group in AgeGroup: | ||||||
|  |         for field in Field: | ||||||
|  |             mean_vac_percent = np.round( | ||||||
|  |                 np.nanmean(np_percent_vac[:, age_group.value, field.value]) * 100, 2 | ||||||
|  |             ) | ||||||
|  |             print(f"{field.label} - {age_group.label} - vac : {mean_vac_percent}%") | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | def plot_bar_age_percent_vac_status_by_field( | ||||||
|  |     np_data_vac_status: np.ndarray, field: Field, is_vac: Optional[bool] = True | ||||||
|  | ) -> None: | ||||||
|  |     fig, ax = get_plot_fig(figsize=(22, 8)) | ||||||
|  |     bottom = np_data_vac_status[:, 0] | ||||||
|  |     title = "vac" if is_vac else "no vac" | ||||||
|  |     for age_group in AgeGroup: | ||||||
|  |         percents_age = np_data_vac_status[:, age_group.value] | ||||||
|  |         if age_group.value > 0: | ||||||
|  |             ax.bar( | ||||||
|  |                 np_date, | ||||||
|  |                 percents_age, | ||||||
|  |                 label=age_group.label, | ||||||
|  |                 bottom=bottom, | ||||||
|  |                 color=AGE_COLORS[age_group.value], | ||||||
|  |             ) | ||||||
|  |             bottom += percents_age | ||||||
|  |         else: | ||||||
|  |             ax.bar( | ||||||
|  |                 np_date, | ||||||
|  |                 percents_age, | ||||||
|  |                 label=age_group.label, | ||||||
|  |                 color=AGE_COLORS[age_group.value], | ||||||
|  |             ) | ||||||
|  | 
 | ||||||
|  |         ax.set_ylabel("%") | ||||||
|  |         ax.set_title(f"{field.label} - {title}") | ||||||
|  |         plt.legend( | ||||||
|  |             [age_group.label for age_group in AgeGroup], loc="upper right", frameon=True | ||||||
|  |         ) | ||||||
|  |     save_and_close_fig( | ||||||
|  |         fig, | ||||||
|  |         os.path.join(OUTPUT_REPOSITORY, f"age_percent_{title}_{field.label}"), | ||||||
|  |         has_legend=False, | ||||||
|  |     ) | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | def plot_bar_age_percent_by_field(np_data: np.ndarray, field: Field) -> None: | ||||||
|  |     """ | ||||||
|  |     plot percent vaccinated field group by age bar diagram | ||||||
|  |     """ | ||||||
|  |     np_percent_age_vac, np_percent_age_unvac = get_percent_age_by_date_field( | ||||||
|  |         np_data, field | ||||||
|  |     ) | ||||||
|  |     plot_bar_age_percent_vac_status_by_field(np_percent_age_vac, field) | ||||||
|  |     plot_bar_age_percent_vac_status_by_field(np_percent_age_unvac, field, is_vac=False) | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
| def plot_cumulative_field( | def plot_cumulative_field( | ||||||
| @ -210,8 +323,8 @@ def plot_cumulative_field( | |||||||
|         np_cumulate_unvac: np.ndarray = np.cumsum( |         np_cumulate_unvac: np.ndarray = np.cumsum( | ||||||
|             np_data_unvac[:, age_group.value, field.value], axis=0 |             np_data_unvac[:, age_group.value, field.value], axis=0 | ||||||
|         ) |         ) | ||||||
|         plt.plot(np_date, np_cumulate_vac, label=f"{age_group.label} vax") |         plt.plot(np_date, np_cumulate_vac, label=f"{age_group.label} vac") | ||||||
|         plt.plot(np_date, np_cumulate_unvac, label=f"{age_group.label} no vax") |         plt.plot(np_date, np_cumulate_unvac, label=f"{age_group.label} no vac") | ||||||
| 
 | 
 | ||||||
|     plt.title(f"nombre de {field.label} cumulé par age") |     plt.title(f"nombre de {field.label} cumulé par age") | ||||||
|     plt.xlabel("date") |     plt.xlabel("date") | ||||||
| @ -244,17 +357,6 @@ def plot_fields_by_age_vac( | |||||||
|     ) |     ) | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
| def get_vaccine_percent(np_data: np.ndarray) -> Tuple[np.ndarray, np.ndarray]: |  | ||||||
|     """ |  | ||||||
|     get the vaccine percent per date, age and field |  | ||||||
|     the vaccine data holds all the vaccine status except unvaccine |  | ||||||
|     """ |  | ||||||
|     np_data_vac, np_data_unvac = split_by_vac_status(np_data) |  | ||||||
|     np_percent_vac = np_data_vac / np.sum(np_data, axis=2) |  | ||||||
|     np_percent_unvac = np_data_unvac / np.sum(np_data, axis=2) |  | ||||||
|     return np_percent_vac, np_percent_unvac |  | ||||||
| 
 |  | ||||||
| 
 |  | ||||||
| def plot_bar_data_by_age_field( | def plot_bar_data_by_age_field( | ||||||
|     np_data: np.ndarray, |     np_data: np.ndarray, | ||||||
|     np_date: np.ndarray, |     np_date: np.ndarray, | ||||||
| @ -272,14 +374,16 @@ def plot_bar_data_by_age_field( | |||||||
| 
 | 
 | ||||||
|     for idx_date in range(len(np_date)): |     for idx_date in range(len(np_date)): | ||||||
|         vac_percent = np.round( |         vac_percent = np.round( | ||||||
|             np_percent_vac[idx_date, age_group.value, field.value] * 100 |             np_percent_vac[idx_date, age_group.value, field.value] * 100, 2 | ||||||
|         ) |         ) | ||||||
|         unvac_percent = np.round( |         unvac_percent = np.round( | ||||||
|             np_percent_unvac[idx_date, age_group.value, field.value] * 100 |             np_percent_unvac[idx_date, age_group.value, field.value] * 100, 2 | ||||||
|  |         ) | ||||||
|  |         bar_vac = ax.bar(idx_date, vac_percent, color="b", label="vac") | ||||||
|  |         ax.bar(idx_date, unvac_percent, bottom=vac_percent, color="r", label="no vac") | ||||||
|  |         ax.bar_label( | ||||||
|  |             bar_vac, label_type="edge", color="black", fontsize="7", fmt="%.0f" | ||||||
|         ) |         ) | ||||||
|         bar_vac = ax.bar(idx_date, vac_percent, color="b", label="vax") |  | ||||||
|         ax.bar(idx_date, unvac_percent, bottom=vac_percent, color="r", label="no vax") |  | ||||||
|         ax.bar_label(bar_vac, label_type="edge", color="black", fontsize="8") |  | ||||||
|         ax.set_ylim(top=105)  # to display 100% label |         ax.set_ylim(top=105)  # to display 100% label | ||||||
| 
 | 
 | ||||||
|     ax.set_ylabel("%") |     ax.set_ylabel("%") | ||||||
| @ -293,7 +397,7 @@ def plot_bar_data_by_age_field( | |||||||
|             for idx, d in enumerate(np_date.astype(dt)) |             for idx, d in enumerate(np_date.astype(dt)) | ||||||
|         ], |         ], | ||||||
|     ) |     ) | ||||||
|     plt.legend(["vax", "no vax"], loc=0, frameon=True) |     plt.legend(["vac", "no vac"], loc="upper right", frameon=True) | ||||||
| 
 | 
 | ||||||
|     save_and_close_fig( |     save_and_close_fig( | ||||||
|         fig, |         fig, | ||||||
| @ -331,10 +435,12 @@ if __name__ == "__main__": | |||||||
|         - cumulative deaths by age |         - cumulative deaths by age | ||||||
|         - hc, sc, dc by vaccine status and age |         - hc, sc, dc by vaccine status and age | ||||||
|         - hc, sc, dc (vaccine/unvaccine percent) by age |         - hc, sc, dc (vaccine/unvaccine percent) by age | ||||||
|  |         - hc, sc, dc (age grouped percent) by field | ||||||
|     Main indicators are : |     Main indicators are : | ||||||
|         - hospitalisations (hc) |         - hospitalisations (hc) | ||||||
|         - criticals (sc) |         - criticals (sc) | ||||||
|         - deaths (dc) |         - deaths (dc) | ||||||
|  |     hc, sc, dc include positive PCR tests | ||||||
|     """ |     """ | ||||||
| 
 | 
 | ||||||
|     parser = argparse.ArgumentParser() |     parser = argparse.ArgumentParser() | ||||||
| @ -357,6 +463,8 @@ if __name__ == "__main__": | |||||||
| 
 | 
 | ||||||
|     np_data, np_date = get_np_data(dic_data_grouped) |     np_data, np_date = get_np_data(dic_data_grouped) | ||||||
| 
 | 
 | ||||||
|  |     analyse(np_data, np_date) | ||||||
|  | 
 | ||||||
|     plot_fields_args = get_age_vac_args() |     plot_fields_args = get_age_vac_args() | ||||||
|     f_fields = partial(plot_fields_by_age_vac, np_data, np_date) |     f_fields = partial(plot_fields_by_age_vac, np_data, np_date) | ||||||
|     plot_vac_percent_age_args = get_age_field_args() |     plot_vac_percent_age_args = get_age_field_args() | ||||||
| @ -367,3 +475,4 @@ if __name__ == "__main__": | |||||||
| 
 | 
 | ||||||
|     for field in Field: |     for field in Field: | ||||||
|         plot_cumulative_field(np_data, np_date, field) |         plot_cumulative_field(np_data, np_date, field) | ||||||
|  |         plot_bar_age_percent_by_field(np_data, field) | ||||||
|  | |||||||
		Loading…
	
	
			
			x
			
			
		
	
		Reference in New Issue
	
	Block a user