Skip to content

Commit 9d22911

Browse files
committed
Add extra values that should be read as NA and counts with spaces in them
1 parent 22c8589 commit 9d22911

File tree

2 files changed

+7
-4
lines changed

2 files changed

+7
-4
lines changed

src/acquisition/rvdss/pull_historic.py

+1
Original file line numberDiff line numberDiff line change
@@ -237,6 +237,7 @@ def create_detections_table(table,modified_date,week_number,week_end_date,start_
237237

238238
def create_number_detections_table(table,modified_date,start_year):
239239
week_columns = table.columns.get_indexer(table.columns[~table.columns.str.contains('week')])
240+
table = table.apply(lambda x: x.replace(r'\s', '', regex=True).astype('int'))
240241

241242
for index in week_columns:
242243
new_name = abbreviate_virus(table.columns[index]) + " positive_tests"

tests/acquisition/rvdss/test_pull_historic.py

+6-4
Original file line numberDiff line numberDiff line change
@@ -226,7 +226,8 @@ def test_create_detections_table(self):
226226
tab.tfoot.decompose()
227227
tab = re.sub(",",r".",str(tab))
228228

229-
na_values = ['N.A.','N.A', 'N.C.','N.R.','Not Available','Not Tested',"not available","not tested","N.D.","-"]
229+
na_values = ['N.A.','N.A', 'N.C.','N.R.','Not Available','Not Tested',"not available",
230+
"not tested","N.D.","-",'Not tested','non testé']
230231
table = pd.read_html(tab,na_values=na_values)[0].dropna(how="all")
231232
table.columns=table.columns.str.lower()
232233
table = drop_ah1_columns(table)
@@ -260,7 +261,8 @@ def test_create_number_detections_table(self):
260261
tab = caption.find_next('table')
261262
tab = re.sub(",","",str(tab))
262263

263-
na_values = ['N.A.','N.A', 'N.C.','N.R.','Not Available','Not Tested',"not available","not tested","N.D.","-"]
264+
na_values = ['N.A.','N.A', 'N.C.','N.R.','Not Available','Not Tested',"not available",
265+
"not tested","N.D.","-",'Not tested','non testé']
264266
table = pd.read_html(tab,na_values=na_values)[0].dropna(how="all")
265267
table.columns=table.columns.str.lower()
266268
table = drop_ah1_columns(table)
@@ -295,8 +297,8 @@ def test_create_percent_positive_detection_table(self):
295297
expected_rsvdata = expected_rsvdata.sort_values(by=['epiweek', 'time_value', 'issue', 'geo_type', 'geo_value'])
296298

297299
# get tables from raw html and process before testing the function
298-
na_values = ['N.A.','N.A', 'N.C.','N.R.','Not Available','Not Tested',"not available","not tested","N.D.","-"]
299-
300+
na_values = ['N.A.','N.A', 'N.C.','N.R.','Not Available','Not Tested',"not available",
301+
"not tested","N.D.","-",'Not tested','non testé']
300302
flu_caption=[t for t in captions if "Influenza" in t.text][0]
301303
flu_tab = flu_caption.find_next('table')
302304
flu_tab = re.sub(",","",str(flu_tab))

0 commit comments

Comments
 (0)