-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathfetcher.py
60 lines (44 loc) · 1.43 KB
/
fetcher.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
# Rewrite in Twisted
import errno
import io
import os.path
import time
import requests
# cache file expires after this many seconds
EXPIRY = 3600
FILE_PREFIX = '/tmp/'
HEADERS = {'User-Agent': 'Mozilla/5.0 (X11; Linux; rv:74.0) Gecko/20100101 Firefox/74.0'}
def fetch_page(url):
cookie_jar = requests.cookies.RequestsCookieJar()
# set CONSENT cookie with some random value to mimic we have accepted the cookies
cookie_jar.set('CONSENT', 'YES+cb.202110101-12-p0.en+FX+035')
try:
req = requests.get(url, headers=HEADERS, cookies=cookie_jar)
req.raise_for_status()
return req.content.decode()
except Exception as e:
err_msg = 'Error while fetching page: "{}": {}'.format(url, e)
print(err_msg)
def _store_file(data, cache_file):
with io.open(cache_file, 'wb') as f:
f.write(data.encode())
def _read_file(cache_file):
with io.open(cache_file, 'rb') as f:
return f.read().decode()
def get_page(url, cache_file):
full_path = FILE_PREFIX + cache_file
try:
mtime = os.path.getmtime(full_path)
except OSError as e:
if e.errno == errno.ENOENT:
page = fetch_page(url)
_store_file(page, full_path)
return page
else:
raise
if mtime + EXPIRY < time.time():
page = fetch_page(url)
_store_file(page, full_path)
return page
else:
return _read_file(full_path)