Skip to content

Commit 6353b18

Browse files
committed
Add utils tests and move test data
1 parent b2e5013 commit 6353b18

File tree

6 files changed

+4009
-27
lines changed

6 files changed

+4009
-27
lines changed

src/acquisition/rvdss/utils.py

+54-16
Original file line numberDiff line numberDiff line change
@@ -112,7 +112,7 @@ def preprocess_table_columns(table):
112112
table.columns = [re.sub(r"flu a","flua",t) for t in table.columns]
113113
table.columns = [re.sub(r"flu b","flub",t) for t in table.columns]
114114
table.columns = [re.sub(r"flutest\b","flu test", col) for col in table.columns]
115-
table.columns = [re.sub(r"other hpiv|other_hpiv","hpivother",t) for t in table.columns]
115+
table.columns = [re.sub(r"other hpiv|other_hpiv|hpiv_other","hpivother",t) for t in table.columns]
116116

117117
table.columns=[re.sub(r'bpositive','b_positive',c) for c in table.columns]
118118
table.columns=[re.sub(r'apositive','a_positive',c) for c in table.columns]
@@ -173,7 +173,8 @@ def get_positive_data(base_url,headers,update_date):
173173
df['time_value'] = [check_date_format(d) for d in df['time_value']]
174174
df['geo_type'] = [create_geo_types(g,"province") for g in df['geo_value']]
175175
df.insert(1,"issue",update_date)
176-
176+
df['region'] = [abbreviate_geo(g) for g in df['region']]
177+
177178
#df=df.drop(["weekorder","region","year","week"],axis=1)
178179

179180
df = df.pivot(index=['epiweek','time_value','issue','geo_type','geo_value','region','week','weekorder','year'],
@@ -194,38 +195,75 @@ def get_positive_data(base_url,headers,update_date):
194195

195196
return(df)
196197

197-
def get_detections_data(base_url,headers,update_date):
198-
# Get current week and year
199-
summary_url = base_url + "RVD_SummaryText.csv"
200-
summary_url_response = requests.get(summary_url, headers=headers)
201-
summary_df = pd.read_csv(io.StringIO(summary_url_response.text))
198+
# def get_detections_data(base_url,headers,update_date):
199+
# # Get current week and year
200+
# summary_url = base_url + "RVD_SummaryText.csv"
201+
# summary_url_response = requests.get(summary_url, headers=headers)
202+
# summary_df = pd.read_csv(io.StringIO(summary_url_response.text))
203+
204+
# week_df = summary_df[(summary_df['Section'] == "summary") & (summary_df['Type']=="title")]
205+
# week_string = week_df.iloc[0]['Text'].lower()
206+
# current_week = int(re.search("week (.+?) ", week_string).group(1))
207+
# current_year= int(re.search(r"20\d{2}", week_string).group(0))
208+
209+
# current_epiweek= Week(current_year,current_week)
210+
211+
# # Get weekly data
212+
# detections_url = base_url + "RVD_CurrentWeekTable.csv"
213+
# detections_url_response = requests.get(detections_url, headers=headers)
214+
# detections_url_response.encoding='UTF-8'
215+
# df_detections = pd.read_csv(io.StringIO(detections_url_response.text))
216+
217+
# # swap order of names from a_b to b_a
218+
# df_detections = df_detections.rename(columns=lambda x: '_'.join(x.split('_')[1:]+x.split('_')[:1]))
219+
# df_detections.insert(0,"epiweek",int(str(current_epiweek)))
220+
# df_detections.insert(1,"time_value",str(current_epiweek.enddate()))
221+
# df_detections.insert(2,"issue",update_date)
222+
# df_detections=preprocess_table_columns(df_detections)
202223

203-
week_df = summary_df[(summary_df['Section'] == "summary") & (summary_df['Type']=="title")]
204-
week_string = week_df.iloc[0]['Text'].lower()
205-
current_week = int(re.search("week (.+?) ", week_string).group(1))
206-
current_year= int(re.search(r"20\d{2}", week_string).group(0))
224+
# df_detections.columns=[re.sub(r' ','_',c) for c in df_detections.columns]
225+
# df_detections=df_detections.rename(columns={'reportinglaboratory':"geo_value"})
226+
# df_detections['geo_value'] = [abbreviate_geo(g) for g in df_detections['geo_value']]
227+
# df_detections['geo_type'] = [create_geo_types(g,"lab") for g in df_detections['geo_value']]
207228

208-
current_epiweek= Week(current_year,current_week)
229+
# return(df_detections.set_index(['epiweek', 'time_value', 'issue', 'geo_type', 'geo_value']))
230+
231+
232+
def get_detections_data(base_url,headers,update_date):
233+
# Get current week and year
234+
# summary_url = base_url + "RVD_SummaryText.csv"
235+
# summary_url_response = requests.get(summary_url, headers=headers)
236+
# summary_df = pd.read_csv(io.StringIO(summary_url_response.text))
237+
# week_df = summary_df[(summary_df['Section'] == "summary") & (summary_df['Type']=="title")]
238+
# week_string = week_df.iloc[0]['Text'].lower()
239+
# current_week = int(re.search("week (.+?) ", week_string).group(1))
240+
# current_year= int(re.search(r"20\d{2}", week_string).group(0))
241+
# current_epiweek= Week(current_year,current_week)
209242

210243
# Get weekly data
211244
detections_url = base_url + "RVD_CurrentWeekTable.csv"
212245
detections_url_response = requests.get(detections_url, headers=headers)
213246
detections_url_response.encoding='UTF-8'
214247
df_detections = pd.read_csv(io.StringIO(detections_url_response.text))
248+
249+
df_detections["year"] = [int(re.search(r"20\d{2}", w).group(0)) for w in df_detections["date"]]
250+
ew = df_detections.apply(lambda x: Week(x['year'],x['week']),axis=1)
215251

252+
# swap order of names from a_b to b_a
216253
df_detections = df_detections.rename(columns=lambda x: '_'.join(x.split('_')[1:]+x.split('_')[:1]))
217-
df_detections.insert(0,"epiweek",int(str(current_epiweek)))
218-
df_detections.insert(1,"time_value",str(current_epiweek.enddate()))
254+
df_detections.insert(0,"epiweek",[int(str(w)) for w in ew])
255+
df_detections['epiweek'] = [int(str(w)) for w in df_detections['epiweek']]
219256
df_detections.insert(2,"issue",update_date)
257+
220258
df_detections=preprocess_table_columns(df_detections)
221-
222259
df_detections.columns=[re.sub(r' ','_',c) for c in df_detections.columns]
223-
df_detections=df_detections.rename(columns={'reportinglaboratory':"geo_value"})
260+
df_detections=df_detections.rename(columns={'reportinglaboratory':"geo_value",'date':"time_value"})
224261
df_detections['geo_value'] = [abbreviate_geo(g) for g in df_detections['geo_value']]
225262
df_detections['geo_type'] = [create_geo_types(g,"lab") for g in df_detections['geo_value']]
226263

227264
return(df_detections.set_index(['epiweek', 'time_value', 'issue', 'geo_type', 'geo_value']))
228265

266+
229267
def fetch_dashboard_data(url):
230268
headers = {
231269
'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/126.0.0.0 Safari/537.36'

0 commit comments

Comments
 (0)