Skip to content

Commit b2e5013

Browse files
committed
Add tests and testdata
1 parent c0742fc commit b2e5013

File tree

8 files changed

+1297
-13
lines changed

8 files changed

+1297
-13
lines changed

requirements.dev.txt

+3
Original file line numberDiff line numberDiff line change
@@ -19,3 +19,6 @@ selenium==4.7.2
1919
sqlalchemy-stubs>=0.3
2020
tenacity==7.0.0
2121
xlrd==2.0.1
22+
bs4
23+
mock
24+
requests_file

src/acquisition/rvdss/utils.py

+14-9
Original file line numberDiff line numberDiff line change
@@ -14,16 +14,19 @@
1414
)
1515

1616
def abbreviate_virus(full_name):
17+
"""Abbreviate viruses and make them lowercase """
18+
1719
lowercase=full_name.lower()
1820
keys = (re.escape(k) for k in VIRUSES.keys())
1921
pattern = re.compile(r'\b(' + '|'.join(keys) + r')\b')
2022
result = pattern.sub(lambda x: VIRUSES[x.group()], lowercase)
2123
return(result)
2224

2325
def abbreviate_geo(full_name):
26+
"""Abbreviate provincial geo_values and make spelling consistent (i.e. removing extra spaces)"""
2427
lowercase=full_name.lower()
2528
lowercase = re.sub("province of ","",lowercase)
26-
lowercase=re.sub("\.|\*","",lowercase)
29+
lowercase=re.sub(r"\.|\*","",lowercase)
2730
lowercase=re.sub("/territoires","",lowercase)
2831
lowercase=re.sub("^cana$","can",lowercase)
2932
lowercase =lowercase.translate(str.maketrans(string.punctuation, ' '*len(string.punctuation),'.'+"'"))
@@ -43,7 +46,8 @@ def abbreviate_geo(full_name):
4346
return(result)
4447

4548
def create_geo_types(geo,default_geo):
46-
if geo in NATION:
49+
lowercase_geo = geo.lower()
50+
if lowercase_geo in NATION:
4751
geo_type="nation"
4852
elif geo in REGIONS:
4953
geo_type="region"
@@ -88,15 +92,15 @@ def preprocess_table_columns(table):
8892
Change some naming of locations in columns (i.e at instead of atl)
8993
"""
9094
table.columns = [re.sub("\xa0"," ", col) for col in table.columns] # \xa0 to space
91-
table.columns = [re.sub("(.*?)(\.\d+)", "\\1", c) for c in table.columns] # remove .# for duplicated columns
92-
table.columns =[re.sub("\.", "", s)for s in table.columns] #remove periods
95+
table.columns = [re.sub(r"(.*?)(\.\d+)", "\\1", c) for c in table.columns] # remove .# for duplicated columns
96+
table.columns =[re.sub(r"\.", "", s)for s in table.columns] #remove periods
9397
table.columns =[re.sub(r"\((all)\)", "", s)for s in table.columns] # remove (all)
94-
table.columns =[re.sub(r"\s*\(|\)", "", s)for s in table.columns]
98+
table.columns =[re.sub(r"\s*\(|\)", "", s)for s in table.columns] # remove ( )
9599
table.columns = [re.sub(' +', ' ', col) for col in table.columns] # Make any muliple spaces into one space
96-
table.columns = [re.sub(r'\(|\)', '', col) for col in table.columns] # replace () for _
100+
table.columns = [re.sub(r'\(|\)', '', col) for col in table.columns]
97101
table.columns = [re.sub(r'/', '_', col) for col in table.columns] # replace / with _
98102

99-
table.columns = [re.sub(r"^at\b","atl ",t) for t in table.columns]
103+
table.columns = [re.sub(r"^at\b","atl",t) for t in table.columns]
100104
table.columns = [re.sub("canada","can",t) for t in table.columns]
101105
table.columns = [re.sub(r"\bcb\b","bc",t) for t in table.columns]
102106

@@ -146,7 +150,8 @@ def make_signal_type_spelling_consistent(signal):
146150
pat4 = 'tested'
147151
combined_pat2 = '|'.join((pat3, pat4))
148152

149-
new_signal = re.sub(combined_pat, "positive_tests",signal)
153+
new_signal = re.sub("positive tests", "positive_tests",signal)
154+
new_signal = re.sub(combined_pat, "positive_tests",new_signal)
150155
new_signal = re.sub(combined_pat2, "tests",new_signal)
151156
new_signal =re.sub(" *%", "_pct_positive",new_signal)
152157
new_signal = re.sub("total ", "",new_signal)
@@ -198,7 +203,7 @@ def get_detections_data(base_url,headers,update_date):
198203
week_df = summary_df[(summary_df['Section'] == "summary") & (summary_df['Type']=="title")]
199204
week_string = week_df.iloc[0]['Text'].lower()
200205
current_week = int(re.search("week (.+?) ", week_string).group(1))
201-
current_year= int(re.search("20\d{2}", week_string).group(0))
206+
current_year= int(re.search(r"20\d{2}", week_string).group(0))
202207

203208
current_epiweek= Week(current_year,current_week)
204209

testdata/acquisition/rvdss/RVD_CurrentWeekTable.csv

+1,026
Large diffs are not rendered by default.
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,25 @@
1+
Language,Section,Type,Text
2+
English,summary,title,"Summary of laboratory data for Week 7 (week ending February 15, 2025)"
3+
English,summary,text,"In week 7 (week ending February 15, 2025) in Canada, percent positivity is currently highest for influenza (26.9% positive) among respiratory viruses under surveillance. The following results were reported from RVDSS laboratories:"
4+
English,category1,title,Influenza (includes influenza A and B)
5+
English,category1,listitem1,"Influenza percent positivity continues to increase (11,790 detections; 26.9% positive)."
6+
English,category2,title,SARS-CoV-2 (the virus which causes COVID-19)
7+
English,category2,listitem1,"National SARS-CoV-2 percent positivity continues to decrease (1,750 detections; 4.0% positive)."
8+
English,category3,title,RSV (respiratory syncytial virus)
9+
English,category3,listitem1,"National RSV percent positivity continues to decrease (1,938 detections; 4.9% positive)."
10+
English,category4,title,Other respiratory viruses
11+
English,category4,listitem1,Percent positivity of all other respiratory viruses is following historically observed trends.
12+
English,category5,title,Number of reporting laboratories
13+
English,category5,listitem1,34 out of 35 laboratories reported surveillance data.
14+
French,summary,title,Résumé des données de laboratoire pour la semaine 7 (semaine se terminant le 15 février 2025)
15+
French,summary,text,"Au cours de la semaine 7 (se terminant le 15 février 2025) au Canada, le pourcentage de positivité est actuellement le plus élevé pour la grippe (26,9 % positifs) parmi les virus respiratoires sous surveillance. Les résultats suivants ont été rapportés par les laboratoires du SSDVR :"
16+
French,category1,title,La grippe (incluant la grippe A et B)
17+
French,category1,listitem1,"Le pourcentage de positivité pour la grippe continue d’augmenter (11 790 détections; 26,9 % positifs)."
18+
French,category2,title,SRAS-CoV-2 (le virus à l’origine de la COVID-19)
19+
French,category2,listitem1,"Le pourcentage de positivité national pour le SRAS-CoV-2 continue de diminuer (1 750 détections; 4,0 % positifs)."
20+
French,category3,title,Le VRS (virus respiratoire syncytial)
21+
French,category3,listitem1,"Le pourcentage de positivité national du VRS continue de diminuer (1 938 détections; 4,9 % positifs)."
22+
French,category4,title,Autres virus respiratoires
23+
French,category4,listitem1,Le pourcentage de positivité de tous les autres virus respiratoires suit les tendances historiques observées.
24+
French,category5,title,Nombre de laboratoires déclarants
25+
French,category5,listitem1,Nombre de laboratoires qui ont fait état de la situation : 34 sur 35
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
2/20/2025 10:28:16
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
2025-02-14
2+
2023-09-01

tests/acquisition/rvdss/test_pull_historic.py

+59
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,13 @@
11
"""Unit tests for rvdss/pull_historic.py."""
22

33
import pytest
4+
import mock
5+
6+
from delphi.epidata.acquisition.rvdss.pull_historic import (get_report_season_years, add_https_prefix,
7+
construct_weekly_report_urls, report_weeks, get_report_date, extract_captions_of_interest, get_modified_dates,
8+
deduplicate_rows, drop_ah1_columns, create_detections_table, create_number_detections_table,
9+
create_percent_positive_detection_table, fetch_one_season_from_report, fetch_archived_dashboard_dates,
10+
fetch_report_data, fetch_historical_dashboard_data)
411

512
# py3tester coverage target
613
__test_target__ = "delphi.epidata.acquisition.rvdss.pull_historic"
@@ -11,3 +18,55 @@ class TestPullHistoric():
1118
def test_syntax(self):
1219
"""This no-op test ensures that syntax is valid."""
1320
pass
21+
22+
def test_get_report_season_years(self):
23+
pass
24+
25+
def test_add_https_prefix(self):
26+
# assert add_https_prefix(["/random.html"]) == "https://www.canada.ca/random.html"
27+
# assert add_https_prefix(["http://randomurl2.html"]) == "https://randomurl2.html"
28+
# assert add_https_prefix(["https://randomurl3.html"]) == "https://randomurl3.html"
29+
pass
30+
31+
def test_construct_weekly_report_urls(self):
32+
pass
33+
34+
def test_report_weeks(self):
35+
pass
36+
37+
def test_get_report_date(self):
38+
pass
39+
40+
def test_extract_captions_of_interest(self):
41+
pass
42+
43+
def test_get_modified_dates(self):
44+
pass
45+
46+
def test_deduplicate_rows(self):
47+
pass
48+
49+
def test_drop_ah1_columns(self):
50+
pass
51+
52+
def test_create_detections_table(self):
53+
pass
54+
55+
def test_create_number_detections_table(self):
56+
pass
57+
58+
def test_create_percent_positive_detection_table(self):
59+
pass
60+
61+
def test_fetch_one_season_from_report(self):
62+
pass
63+
64+
def test_fetch_archived_dashboard_dates(self):
65+
pass
66+
67+
def test_fetch_report_data(self):
68+
pass
69+
70+
def test_fetch_historical_dashboard_data(self):
71+
pass
72+

tests/acquisition/rvdss/test_utils.py

+167-4
Original file line numberDiff line numberDiff line change
@@ -1,24 +1,187 @@
11
"""Unit tests for rvdss/utils.py."""
22

33
import pytest
4+
import mock
5+
import requests
6+
from requests_file import FileAdapter
7+
from pathlib import Path
8+
import pandas as pd
49

5-
from delphi.epidata.acquisition.rvdss.utils import abbreviate_virus, create_geo_types
10+
from delphi.epidata.acquisition.rvdss.utils import (abbreviate_virus, abbreviate_geo, create_geo_types, check_date_format,
11+
get_dashboard_update_date, check_most_recent_update_date, preprocess_table_columns, add_flu_prefix,
12+
make_signal_type_spelling_consistent, get_positive_data, get_detections_data, fetch_dashboard_data)
613

714
# py3tester coverage target
815
__test_target__ = "delphi.epidata.acquisition.rvdss.utils"
916

17+
example_unprocessed_data = [
18+
pd.DataFrame({'Reporting\xa0Laboratories':1},index=[0]),
19+
pd.DataFrame({'lab':1,'lab.2':2},index=[0]),
20+
pd.DataFrame({'Reporting.lab':1},index=[0]),
21+
pd.DataFrame({'flucounts (all)':2},index=[0]),
22+
pd.DataFrame({'fluah1 (2009)':2},index=[0]),
23+
pd.DataFrame({'flucounts s':2},index=[0]),
24+
pd.DataFrame({'lab/tech':3},index=[0]),
25+
26+
pd.DataFrame({'at counts':1},index=[0]),
27+
pd.DataFrame({'canada counts':2},index=[0]),
28+
pd.DataFrame({'cb counts':3},index=[0]),
29+
30+
pd.DataFrame({'h1n1 2009 ':3},index=[0]),
31+
pd.DataFrame({'h1n12009 counts':3},index=[0]),
32+
pd.DataFrame({'a_h1 counts':3},index=[0]),
33+
pd.DataFrame({'ah1 counts':3},index=[0]),
34+
pd.DataFrame({'a_uns counts':3},index=[0]),
35+
pd.DataFrame({'a_h3 counts':3},index=[0]),
36+
37+
pd.DataFrame({'parainfluenza a':4,'piv b':4, "para c":4},index=[0]),
38+
pd.DataFrame({'adeno a':4, 'adeno b':4},index=[0]),
39+
pd.DataFrame({'human metapneumovirus a':4},index=[0]),
40+
pd.DataFrame({'enterovirus_rhinovirus a':4,'rhinovirus b':4, "rhv c":4,"entero_rhino d":4,"rhino e":4, "ev_rv f":4},index=[0]),
41+
pd.DataFrame({'coronavirus a':4,'coron b':4, "coro c":4},index=[0]),
42+
pd.DataFrame({'respiratory syncytial virus a':4},index=[0]),
43+
pd.DataFrame({'influenza counts':4},index=[0]),
44+
pd.DataFrame({'sars-cov-2 counts':4},index=[0]),
45+
46+
pd.DataFrame({"flu a":5,"flu b":5},index=[0]),
47+
pd.DataFrame({"flutest p":5},index=[0]),
48+
pd.DataFrame({"other hpiv a":5, "other_hpiv count b":5},index=[0]),
49+
50+
51+
pd.DataFrame({"flu apositive":6,"flu bpositive":6},index=[0]),
52+
pd.DataFrame({"hpiv_1 counts":6,"hpiv_2 counts":6,"hpiv_3 counts":6,"hpiv_4 counts":6},index=[0]),
53+
54+
pd.DataFrame({"num positive tests":7},index=[0]),
55+
pd.DataFrame({"num positive a":7,"num pos b":7},index=[0]),
56+
pd.DataFrame({"num test a":7,"num tested b":7},index=[0]),
57+
pd.DataFrame({"virus% a":7,"virus % b":7},index=[0]),
58+
pd.DataFrame({"total counts":7},index=[0])
59+
]
60+
61+
expected_processed_data = [
62+
pd.DataFrame({'reporting laboratories':1},index=[0]),
63+
pd.DataFrame({'lab':1,'lab2':2},index=[0]).rename(columns={"lab":"lab","lab2":"lab"}),
64+
pd.DataFrame({'reportinglab':1},index=[0]),
65+
pd.DataFrame({'flucounts ':2},index=[0]),
66+
pd.DataFrame({'fluah12009':2},index=[0]),
67+
pd.DataFrame({'flucounts s':2},index=[0]),
68+
pd.DataFrame({'lab_tech':3},index=[0]),
69+
70+
pd.DataFrame({'atl counts':1},index=[0]),
71+
pd.DataFrame({'can counts':2},index=[0]),
72+
pd.DataFrame({'bc counts':3},index=[0]),
73+
74+
pd.DataFrame({'ah1n1pdm09':3},index=[0]),
75+
pd.DataFrame({'ah1n1pdm09 counts':3},index=[0]),
76+
pd.DataFrame({'ah1n1pdm09 counts':3},index=[0]),
77+
pd.DataFrame({'ah1n1pdm09 counts':3},index=[0]),
78+
pd.DataFrame({'auns counts':3},index=[0]),
79+
pd.DataFrame({'ah3 counts':3},index=[0]),
80+
81+
pd.DataFrame({'hpiv a':4,'hpiv b':4, "hpiv c":4},index=[0]),
82+
pd.DataFrame({'adv a':4, 'adv b':4},index=[0]),
83+
pd.DataFrame({'hmpv a':4},index=[0]),
84+
pd.DataFrame({'evrv a':4,'evrv b':4, "evrv c":4,"evrv d":4,"evrv e":4, "evrv f":4},index=[0]),
85+
pd.DataFrame({'hcov a':4,'hcov b':4, "hcov c":4},index=[0]),
86+
pd.DataFrame({'rsv a':4},index=[0]),
87+
pd.DataFrame({'flu counts':4},index=[0]),
88+
pd.DataFrame({'sarscov2 counts':4},index=[0]),
89+
90+
pd.DataFrame({"flua":5,"flub":5},index=[0]),
91+
pd.DataFrame({"flu tests p":5},index=[0]),
92+
pd.DataFrame({"hpivother a":5, "hpivother count b":5},index=[0]),
93+
94+
pd.DataFrame({"flua_positive_tests":6,"flub_positive_tests":6},index=[0]),
95+
pd.DataFrame({"hpiv1 counts":6,"hpiv2 counts":6,"hpiv3 counts":6,"hpiv4 counts":6},index=[0]),
96+
97+
pd.DataFrame({"num positive_tests":7},index=[0]),
98+
pd.DataFrame({"num positive_tests a":7,"num positive_tests b":7},index=[0]),
99+
pd.DataFrame({"num tests a":7,"num tests b":7},index=[0]),
100+
pd.DataFrame({"virus_pct_positive a":7,"virus_pct_positive b":7},index=[0]),
101+
pd.DataFrame({"counts":7},index=[0])
102+
]
10103

11104
class TestUtils:
12105
def test_syntax(self):
13106
"""This no-op test ensures that syntax is valid."""
14107
pass
15108

16109
def test_abbreviate_virus(self):
17-
assert abbreviate_virus("influenza") == "flu" # normal case
18-
assert abbreviate_virus("flu") == "flu" # already abbreviated
110+
assert abbreviate_virus("influenza") == "flu" # normal case
111+
assert abbreviate_virus("flu") == "flu" # already abbreviated
112+
assert abbreviate_virus("parainfluenza") == "hpiv"
113+
assert abbreviate_virus("banana") == "banana" #non geos should remain as is
19114

115+
def test_abbreviate_geo(self):
116+
assert abbreviate_geo("british columbia") == "bc"
117+
assert abbreviate_geo("québec") == "qc" # recognise accents in provinces
118+
assert abbreviate_geo("Région Nord-Est") == "région nord est" # remove dashes, make lowercase
119+
assert abbreviate_geo("P.H.O.L. - Sault Ste. Marie") == "phol sault ste marie"
120+
assert abbreviate_geo("random lab") == "random lab" #unknown geos remain unchanged
121+
# only province names on their own should be abbreviated, not as part of a larger name
122+
assert abbreviate_geo("british columbia lab") == "british columbia lab"
123+
20124
def test_create_geo_types(self):
21125
assert create_geo_types("canada","lab") == "nation"
22126
assert create_geo_types("bc","lab") == "region"
23127
assert create_geo_types("random lab","lab") == "lab"
24-
assert create_geo_types("Canada","province") == "province" #lowercase handling happens upstream
128+
assert create_geo_types("Canada","province") == "nation"
129+
130+
def test_check_date_format(self):
131+
assert check_date_format("2015-09-05") == "2015-09-05"
132+
assert check_date_format("01/10/2020") == "2020-10-01" # change d/m/Y to Y-m-d
133+
assert check_date_format("02-11-2013") == "2013-11-02" # change d-m-Y to Y-m-d
134+
with pytest.raises(AssertionError):
135+
check_date_format("02-2005-10") # Invalid date format raises error
136+
137+
@mock.patch("requests.get")
138+
def test_get_dashboard_update_date(self, mock_requests):
139+
# Set up fake data.
140+
headers={}
141+
url = "testurl.ca"
142+
143+
s = requests.Session()
144+
s.mount('file://', FileAdapter())
145+
146+
TEST_DIR = Path(__file__).parent
147+
resp = s.get('file://'+ str(TEST_DIR) + "/RVD_UpdateDate.csv")
148+
149+
# Mocks
150+
mock_requests.return_value = resp
151+
assert get_dashboard_update_date(url, headers) == "2025-02-20"
152+
153+
def test_check_most_recent_update_date(self):
154+
TEST_DIR = Path(__file__).parent
155+
path = str(TEST_DIR) + "/example_update_dates.txt"
156+
157+
assert check_most_recent_update_date("2025-02-14",path) == True #date is in the file
158+
assert check_most_recent_update_date("2025-03-20",path) == False #date is not in the file
159+
160+
def test_preprocess_table_columns(self):
161+
for example, expected in zip(example_unprocessed_data, expected_processed_data):
162+
assert preprocess_table_columns(example).equals(expected)
163+
164+
def test_add_flu_prefix(self):
165+
assert add_flu_prefix("ah3_pos") == "fluah3_pos"
166+
assert add_flu_prefix("auns") == "fluauns"
167+
assert add_flu_prefix("ah1pdm09 tests") == "fluah1pdm09 tests"
168+
assert add_flu_prefix("ah1n1pdm09") == "fluah1n1pdm09"
169+
assert add_flu_prefix("fluah1n1pdm09") == "fluah1n1pdm09" #if prefix exists, do nothing
170+
assert add_flu_prefix("random string") == "random string" #if no prefix, it should do nothing
171+
172+
def test_make_signal_type_spelling_consistent(self):
173+
assert make_signal_type_spelling_consistent("positive tests") == "positive_tests"
174+
assert make_signal_type_spelling_consistent("flu pos") == "flu positive_tests"
175+
assert make_signal_type_spelling_consistent("rsv tested") == "rsv tests"
176+
assert make_signal_type_spelling_consistent("covid total tested") == "covid tests"
177+
assert make_signal_type_spelling_consistent("flua%") == "flua_pct_positive"
178+
179+
180+
def test_get_positive_data(self):
181+
pass
182+
183+
def test_get_detections_data(self):
184+
pass
185+
186+
def test_fetch_dashboard_data(self):
187+
pass

0 commit comments

Comments
 (0)