Skip to content

Commit 22c8589

Browse files
committed
Update sql table definitions and add extra na values to historic data script
1 parent 5662a79 commit 22c8589

File tree

2 files changed

+89
-26
lines changed

2 files changed

+89
-26
lines changed

src/acquisition/rvdss/pull_historic.py

+2-1
Original file line numberDiff line numberDiff line change
@@ -414,7 +414,8 @@ def fetch_one_season_from_report(url):
414414

415415
# Read table, coding all the abbreviations for missing data into NA
416416
# Also use dropna because removing footers causes the html to have an empty row
417-
na_values = ['N.A.','N.A', 'N.C.','N.R.','Not Available','Not Tested',"not available","not tested","N.D.","-"]
417+
na_values = ['N.A.','N.A', 'N.C.','N.R.','Not Available','Not Tested',"not available",
418+
"not tested","N.D.","-",'Not tested','non testé']
418419
table = pd.read_html(tab,na_values=na_values)[0].dropna(how="all")
419420

420421
# Check for multiline headers

src/ddl/rvdss.sql

+87-25
Original file line numberDiff line numberDiff line change
@@ -5,45 +5,107 @@ TODO: briefly describe data source and define all columns.
55

66
CREATE TABLE `rvdss_repiratory_detections` (
77
`id` int(11) NOT NULL AUTO_INCREMENT,
8-
`date` date NOT NULL,
9-
`geo_type` char(20) NOT NULL,
10-
`geo_value` char(20) NOT NULL,
11-
`epiweek` int(11) NOT NULL,
12-
`flua_positive_tests` int(11) NOT NULL,
13-
`flua_percent_positive_tests` double NOT NULL,
14-
`flu_total_tests` int(11) NOT NULL,
8+
`epiweek` int(6) NOT NULL,
9+
`time_value` date NOT NULL,
10+
`issue` date NOT NULL,
11+
`geo_type` char(6) NOT NULL,
12+
`geo_value` char(35) NOT NULL,
13+
`sarscov2_tests` int(10) NOT NULL,
14+
`sarscov2_positive_tests` int(10) NOT NULL,
15+
`flu_tests` int(10) NOT NULL,
16+
`flu_positive_tests` int(10) NOT NULL,
17+
`fluah1n1pdm09_positive_tests` int(10) NOT NULL,
18+
`fluah3_positive_tests` int(10) NOT NULL,
19+
`fluauns_positive_tests` int(10) NOT NULL,
20+
`flua_positive_tests` int(10) NOT NULL,
21+
`flub_positive_tests` int(10) NOT NULL,
22+
`rsv_tests` int(10) NOT NULL,
23+
`rsv_positive_tests` int(10) NOT NULL,
24+
`hpiv_tests` int(10) NOT NULL,
25+
`hpiv1_positive_tests` int(10) NOT NULL,
26+
`hpiv2_positive_tests` int(10) NOT NULL,
27+
`hpiv3_positive_tests` int(10) NOT NULL,
28+
`hpiv4_positive_tests` int(10) NOT NULL,
29+
`hpivother_positive_tests` int(10) NOT NULL,
30+
`adv_tests` int(10) NOT NULL,
31+
`adv_positive_tests` int(10) NOT NULL,
32+
`hmpv_tests` int(10) NOT NULL,
33+
`hmpv_positive_tests` int(10) NOT NULL,
34+
`evrv_tests` int(10) NOT NULL,
35+
`evrv_positive_tests` int(10) NOT NULL,
36+
`hcov_tests` int(10) NOT NULL,
37+
`hcov_positive_tests` int(10) NOT NULL,
38+
`week` int(2) NOT NULL,
39+
`weekorder` int(2) NOT NULL,
40+
`year` int(4) NOT NULL,
1541
PRIMARY KEY (`id`),
16-
UNIQUE KEY `date` (`date`,`geo_value`),
42+
UNIQUE KEY `date` (`epiweek`, `time_value`,`issue`, `geo_type`,`geo_value`),
1743
KEY `state` (`state`),
1844
KEY `epiweek` (`epiweek`),
1945
) ENGINE=InnoDB DEFAULT CHARSET=utf8;
2046

21-
CREATE TABLE `rvdss_testing` (
47+
CREATE TABLE `rvdss_pct_positive` (
2248
`id` int(11) NOT NULL AUTO_INCREMENT,
23-
`date` date NOT NULL,
24-
`geo_type` char(20) NOT NULL,
25-
`geo_value` char(20) NOT NULL,
26-
`epiweek` int(11) NOT NULL,
27-
`flua_positive_tests` int(11) NOT NULL,
28-
`flua_percent_positive_tests` double NOT NULL,
29-
`flu_total_tests` int(11) NOT NULL,
49+
`epiweek` int(6) NOT NULL,
50+
`time_value` date NOT NULL,
51+
`issue` date NOT NULL,
52+
`geo_type` char(6) NOT NULL,
53+
`geo_value` char(35) NOT NULL,
54+
`evrv_pct_positive` int(10) NOT NULL,
55+
`evrv_tests` int(10) NOT NULL,
56+
`evrv_positive_tests` int(10) NOT NULL,
57+
`hpiv_pct_positive` int(10) NOT NULL,
58+
`hpiv_tests` int(10) NOT NULL,
59+
`hpiv_positive_tests` int(10) NOT NULL,
60+
`adv_pct_positive` int(10) NOT NULL,
61+
`adv_tests` int(10) NOT NULL,
62+
`hcov_pct_positive` int(10) NOT NULL,
63+
`hcov_tests` int(10) NOT NULL,
64+
`hcov_positive_tests` int(10) NOT NULL,
65+
`flua_pct_positive` int(10) NOT NULL,
66+
`flub_pct_positive` int(10) NOT NULL,
67+
`flu_tests` int(10) NOT NULL,
68+
`flua_positive_tests` int(10) NOT NULL,
69+
`flua_tests` int(10) NOT NULL,
70+
`flub_tests` int(10) NOT NULL,
71+
`flub_positive_tests` int(10) NOT NULL,
72+
`flu_positive_tests` int(10) NOT NULL,
73+
`flu_pct_positive` int(10) NOT NULL,
74+
`hmpv_pct_positive` int(10) NOT NULL,
75+
`hmpv_tests` int(10) NOT NULL,
76+
`hmpv_positive_tests` int(10) NOT NULL,
77+
`rsv_pct_positive` int(10) NOT NULL,
78+
`rsv_tests` int(10) NOT NULL,
79+
`rsv_positive_tests` int(10) NOT NULL,
80+
`sarscov2_pct_positive` int(10) NOT NULL,
81+
`sarscov2_tests` int(10) NOT NULL,
82+
`sarscov2_positive_tests` int(10) NOT NULL,
83+
`region` char(20)
84+
`week` int(2) NOT NULL,
85+
`weekorder` int(2) NOT NULL,
86+
`year` int(4) NOT NULL,
3087
PRIMARY KEY (`id`),
31-
UNIQUE KEY `date` (`date`,`geo_value`),
88+
UNIQUE KEY `date` (`epiweek`, `time_value`,`issue`, `geo_type`,`geo_value`),
3289
KEY `state` (`state`),
3390
KEY `epiweek` (`epiweek`),
3491
) ENGINE=InnoDB DEFAULT CHARSET=utf8;
3592

3693
CREATE TABLE `rvdss_detections_counts` (
3794
`id` int(11) NOT NULL AUTO_INCREMENT,
38-
`date` date NOT NULL,
39-
`geo_type` char(20) NOT NULL,
40-
`geo_value` char(20) NOT NULL,
41-
`epiweek` int(11) NOT NULL,
42-
`flua_positive_tests` int(11) NOT NULL,
43-
`flua_percent_positive_tests` double NOT NULL,
44-
`flu_total_tests` int(11) NOT NULL,
95+
`epiweek` int(6) NOT NULL,
96+
`time_value` date NOT NULL,
97+
`issue` date NOT NULL,
98+
`geo_type` char(6) NOT NULL,
99+
`geo_value` char(35) NOT NULL,
100+
`hpiv_positive_tests` int(10) NOT NULL,
101+
`adv_positive_tests` int(10) NOT NULL,
102+
`hmpv_positive_tests` int(10) NOT NULL,
103+
`evrv_positive_tests` int(10) NOT NULL,
104+
`hcov_positive_tests` int(10) NOT NULL,
105+
`rsv_positive_tests` int(10) NOT NULL,
106+
`flu_positive_tests` int(10) NOT NULL,
45107
PRIMARY KEY (`id`),
46-
UNIQUE KEY `date` (`date`,`geo_value`),
108+
UNIQUE KEY `date` (`epiweek`, `time_value`,`issue`, `geo_type`,`geo_value`),
47109
KEY `state` (`state`),
48110
KEY `epiweek` (`epiweek`),
49111
) ENGINE=InnoDB DEFAULT CHARSET=utf8;

0 commit comments

Comments
 (0)