1
1
"""Unit tests for rvdss/utils.py."""
2
2
3
3
import pytest
4
+ import mock
5
+ import requests
6
+ from requests_file import FileAdapter
7
+ from pathlib import Path
8
+ import pandas as pd
4
9
5
- from delphi .epidata .acquisition .rvdss .utils import abbreviate_virus , create_geo_types
10
+ from delphi .epidata .acquisition .rvdss .utils import (abbreviate_virus , abbreviate_geo , create_geo_types , check_date_format ,
11
+ get_dashboard_update_date , check_most_recent_update_date , preprocess_table_columns , add_flu_prefix ,
12
+ make_signal_type_spelling_consistent , get_positive_data , get_detections_data , fetch_dashboard_data )
6
13
7
14
# py3tester coverage target
8
15
__test_target__ = "delphi.epidata.acquisition.rvdss.utils"
9
16
17
+ example_unprocessed_data = [
18
+ pd .DataFrame ({'Reporting\xa0 Laboratories' :1 },index = [0 ]),
19
+ pd .DataFrame ({'lab' :1 ,'lab.2' :2 },index = [0 ]),
20
+ pd .DataFrame ({'Reporting.lab' :1 },index = [0 ]),
21
+ pd .DataFrame ({'flucounts (all)' :2 },index = [0 ]),
22
+ pd .DataFrame ({'fluah1 (2009)' :2 },index = [0 ]),
23
+ pd .DataFrame ({'flucounts s' :2 },index = [0 ]),
24
+ pd .DataFrame ({'lab/tech' :3 },index = [0 ]),
25
+
26
+ pd .DataFrame ({'at counts' :1 },index = [0 ]),
27
+ pd .DataFrame ({'canada counts' :2 },index = [0 ]),
28
+ pd .DataFrame ({'cb counts' :3 },index = [0 ]),
29
+
30
+ pd .DataFrame ({'h1n1 2009 ' :3 },index = [0 ]),
31
+ pd .DataFrame ({'h1n12009 counts' :3 },index = [0 ]),
32
+ pd .DataFrame ({'a_h1 counts' :3 },index = [0 ]),
33
+ pd .DataFrame ({'ah1 counts' :3 },index = [0 ]),
34
+ pd .DataFrame ({'a_uns counts' :3 },index = [0 ]),
35
+ pd .DataFrame ({'a_h3 counts' :3 },index = [0 ]),
36
+
37
+ pd .DataFrame ({'parainfluenza a' :4 ,'piv b' :4 , "para c" :4 },index = [0 ]),
38
+ pd .DataFrame ({'adeno a' :4 , 'adeno b' :4 },index = [0 ]),
39
+ pd .DataFrame ({'human metapneumovirus a' :4 },index = [0 ]),
40
+ pd .DataFrame ({'enterovirus_rhinovirus a' :4 ,'rhinovirus b' :4 , "rhv c" :4 ,"entero_rhino d" :4 ,"rhino e" :4 , "ev_rv f" :4 },index = [0 ]),
41
+ pd .DataFrame ({'coronavirus a' :4 ,'coron b' :4 , "coro c" :4 },index = [0 ]),
42
+ pd .DataFrame ({'respiratory syncytial virus a' :4 },index = [0 ]),
43
+ pd .DataFrame ({'influenza counts' :4 },index = [0 ]),
44
+ pd .DataFrame ({'sars-cov-2 counts' :4 },index = [0 ]),
45
+
46
+ pd .DataFrame ({"flu a" :5 ,"flu b" :5 },index = [0 ]),
47
+ pd .DataFrame ({"flutest p" :5 },index = [0 ]),
48
+ pd .DataFrame ({"other hpiv a" :5 , "other_hpiv count b" :5 },index = [0 ]),
49
+
50
+
51
+ pd .DataFrame ({"flu apositive" :6 ,"flu bpositive" :6 },index = [0 ]),
52
+ pd .DataFrame ({"hpiv_1 counts" :6 ,"hpiv_2 counts" :6 ,"hpiv_3 counts" :6 ,"hpiv_4 counts" :6 },index = [0 ]),
53
+
54
+ pd .DataFrame ({"num positive tests" :7 },index = [0 ]),
55
+ pd .DataFrame ({"num positive a" :7 ,"num pos b" :7 },index = [0 ]),
56
+ pd .DataFrame ({"num test a" :7 ,"num tested b" :7 },index = [0 ]),
57
+ pd .DataFrame ({"virus% a" :7 ,"virus % b" :7 },index = [0 ]),
58
+ pd .DataFrame ({"total counts" :7 },index = [0 ])
59
+ ]
60
+
61
+ expected_processed_data = [
62
+ pd .DataFrame ({'reporting laboratories' :1 },index = [0 ]),
63
+ pd .DataFrame ({'lab' :1 ,'lab2' :2 },index = [0 ]).rename (columns = {"lab" :"lab" ,"lab2" :"lab" }),
64
+ pd .DataFrame ({'reportinglab' :1 },index = [0 ]),
65
+ pd .DataFrame ({'flucounts ' :2 },index = [0 ]),
66
+ pd .DataFrame ({'fluah12009' :2 },index = [0 ]),
67
+ pd .DataFrame ({'flucounts s' :2 },index = [0 ]),
68
+ pd .DataFrame ({'lab_tech' :3 },index = [0 ]),
69
+
70
+ pd .DataFrame ({'atl counts' :1 },index = [0 ]),
71
+ pd .DataFrame ({'can counts' :2 },index = [0 ]),
72
+ pd .DataFrame ({'bc counts' :3 },index = [0 ]),
73
+
74
+ pd .DataFrame ({'ah1n1pdm09' :3 },index = [0 ]),
75
+ pd .DataFrame ({'ah1n1pdm09 counts' :3 },index = [0 ]),
76
+ pd .DataFrame ({'ah1n1pdm09 counts' :3 },index = [0 ]),
77
+ pd .DataFrame ({'ah1n1pdm09 counts' :3 },index = [0 ]),
78
+ pd .DataFrame ({'auns counts' :3 },index = [0 ]),
79
+ pd .DataFrame ({'ah3 counts' :3 },index = [0 ]),
80
+
81
+ pd .DataFrame ({'hpiv a' :4 ,'hpiv b' :4 , "hpiv c" :4 },index = [0 ]),
82
+ pd .DataFrame ({'adv a' :4 , 'adv b' :4 },index = [0 ]),
83
+ pd .DataFrame ({'hmpv a' :4 },index = [0 ]),
84
+ pd .DataFrame ({'evrv a' :4 ,'evrv b' :4 , "evrv c" :4 ,"evrv d" :4 ,"evrv e" :4 , "evrv f" :4 },index = [0 ]),
85
+ pd .DataFrame ({'hcov a' :4 ,'hcov b' :4 , "hcov c" :4 },index = [0 ]),
86
+ pd .DataFrame ({'rsv a' :4 },index = [0 ]),
87
+ pd .DataFrame ({'flu counts' :4 },index = [0 ]),
88
+ pd .DataFrame ({'sarscov2 counts' :4 },index = [0 ]),
89
+
90
+ pd .DataFrame ({"flua" :5 ,"flub" :5 },index = [0 ]),
91
+ pd .DataFrame ({"flu tests p" :5 },index = [0 ]),
92
+ pd .DataFrame ({"hpivother a" :5 , "hpivother count b" :5 },index = [0 ]),
93
+
94
+ pd .DataFrame ({"flua_positive_tests" :6 ,"flub_positive_tests" :6 },index = [0 ]),
95
+ pd .DataFrame ({"hpiv1 counts" :6 ,"hpiv2 counts" :6 ,"hpiv3 counts" :6 ,"hpiv4 counts" :6 },index = [0 ]),
96
+
97
+ pd .DataFrame ({"num positive_tests" :7 },index = [0 ]),
98
+ pd .DataFrame ({"num positive_tests a" :7 ,"num positive_tests b" :7 },index = [0 ]),
99
+ pd .DataFrame ({"num tests a" :7 ,"num tests b" :7 },index = [0 ]),
100
+ pd .DataFrame ({"virus_pct_positive a" :7 ,"virus_pct_positive b" :7 },index = [0 ]),
101
+ pd .DataFrame ({"counts" :7 },index = [0 ])
102
+ ]
10
103
11
104
class TestUtils :
12
105
def test_syntax (self ):
13
106
"""This no-op test ensures that syntax is valid."""
14
107
pass
15
108
16
109
def test_abbreviate_virus (self ):
17
- assert abbreviate_virus ("influenza" ) == "flu" # normal case
18
- assert abbreviate_virus ("flu" ) == "flu" # already abbreviated
110
+ assert abbreviate_virus ("influenza" ) == "flu" # normal case
111
+ assert abbreviate_virus ("flu" ) == "flu" # already abbreviated
112
+ assert abbreviate_virus ("parainfluenza" ) == "hpiv"
113
+ assert abbreviate_virus ("banana" ) == "banana" #non geos should remain as is
19
114
115
+ def test_abbreviate_geo (self ):
116
+ assert abbreviate_geo ("british columbia" ) == "bc"
117
+ assert abbreviate_geo ("québec" ) == "qc" # recognise accents in provinces
118
+ assert abbreviate_geo ("Région Nord-Est" ) == "région nord est" # remove dashes, make lowercase
119
+ assert abbreviate_geo ("P.H.O.L. - Sault Ste. Marie" ) == "phol sault ste marie"
120
+ assert abbreviate_geo ("random lab" ) == "random lab" #unknown geos remain unchanged
121
+ # only province names on their own should be abbreviated, not as part of a larger name
122
+ assert abbreviate_geo ("british columbia lab" ) == "british columbia lab"
123
+
20
124
def test_create_geo_types (self ):
21
125
assert create_geo_types ("canada" ,"lab" ) == "nation"
22
126
assert create_geo_types ("bc" ,"lab" ) == "region"
23
127
assert create_geo_types ("random lab" ,"lab" ) == "lab"
24
- assert create_geo_types ("Canada" ,"province" ) == "province" #lowercase handling happens upstream
128
+ assert create_geo_types ("Canada" ,"province" ) == "nation"
129
+
130
+ def test_check_date_format (self ):
131
+ assert check_date_format ("2015-09-05" ) == "2015-09-05"
132
+ assert check_date_format ("01/10/2020" ) == "2020-10-01" # change d/m/Y to Y-m-d
133
+ assert check_date_format ("02-11-2013" ) == "2013-11-02" # change d-m-Y to Y-m-d
134
+ with pytest .raises (AssertionError ):
135
+ check_date_format ("02-2005-10" ) # Invalid date format raises error
136
+
137
+ @mock .patch ("requests.get" )
138
+ def test_get_dashboard_update_date (self , mock_requests ):
139
+ # Set up fake data.
140
+ headers = {}
141
+ url = "testurl.ca"
142
+
143
+ s = requests .Session ()
144
+ s .mount ('file://' , FileAdapter ())
145
+
146
+ TEST_DIR = Path (__file__ ).parent
147
+ resp = s .get ('file://' + str (TEST_DIR ) + "/RVD_UpdateDate.csv" )
148
+
149
+ # Mocks
150
+ mock_requests .return_value = resp
151
+ assert get_dashboard_update_date (url , headers ) == "2025-02-20"
152
+
153
+ def test_check_most_recent_update_date (self ):
154
+ TEST_DIR = Path (__file__ ).parent
155
+ path = str (TEST_DIR ) + "/example_update_dates.txt"
156
+
157
+ assert check_most_recent_update_date ("2025-02-14" ,path ) == True #date is in the file
158
+ assert check_most_recent_update_date ("2025-03-20" ,path ) == False #date is not in the file
159
+
160
+ def test_preprocess_table_columns (self ):
161
+ for example , expected in zip (example_unprocessed_data , expected_processed_data ):
162
+ assert preprocess_table_columns (example ).equals (expected )
163
+
164
+ def test_add_flu_prefix (self ):
165
+ assert add_flu_prefix ("ah3_pos" ) == "fluah3_pos"
166
+ assert add_flu_prefix ("auns" ) == "fluauns"
167
+ assert add_flu_prefix ("ah1pdm09 tests" ) == "fluah1pdm09 tests"
168
+ assert add_flu_prefix ("ah1n1pdm09" ) == "fluah1n1pdm09"
169
+ assert add_flu_prefix ("fluah1n1pdm09" ) == "fluah1n1pdm09" #if prefix exists, do nothing
170
+ assert add_flu_prefix ("random string" ) == "random string" #if no prefix, it should do nothing
171
+
172
+ def test_make_signal_type_spelling_consistent (self ):
173
+ assert make_signal_type_spelling_consistent ("positive tests" ) == "positive_tests"
174
+ assert make_signal_type_spelling_consistent ("flu pos" ) == "flu positive_tests"
175
+ assert make_signal_type_spelling_consistent ("rsv tested" ) == "rsv tests"
176
+ assert make_signal_type_spelling_consistent ("covid total tested" ) == "covid tests"
177
+ assert make_signal_type_spelling_consistent ("flua%" ) == "flua_pct_positive"
178
+
179
+
180
+ def test_get_positive_data (self ):
181
+ pass
182
+
183
+ def test_get_detections_data (self ):
184
+ pass
185
+
186
+ def test_fetch_dashboard_data (self ):
187
+ pass
0 commit comments