Skip to content

Commit 619066b

Browse files
committed
Added session creation and reuse in stream_file
1 parent c9ca07b commit 619066b

File tree

3 files changed

+24
-5
lines changed

3 files changed

+24
-5
lines changed

CHANGELOG.md

+10
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,16 @@ All notable changes to this project are documented in this file.
55
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
66
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html) (as of version 1.5.0).
77

8+
## [Unreleased]
9+
10+
### Added
11+
12+
- `download.get_session()` to build a new requests Session
13+
14+
### Changed
15+
16+
- `download.stream_file()` accepts a `session` param to use instead of creating one
17+
818
## [1.7.0] - 2022-08-02
919

1020
### Added

src/zimscraperlib/VERSION

+1-1
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
1.7.0
1+
1.8.0

src/zimscraperlib/download.py

+13-4
Original file line numberDiff line numberDiff line change
@@ -157,6 +157,13 @@ def _get_retry_adapter(max_retries: Optional[int] = 5) -> requests.adapters.Base
157157
return requests.adapters.HTTPAdapter(max_retries=retries)
158158

159159

160+
def get_session(max_retries: Optional[int] = 5) -> requests.Session:
161+
"""Session to hold cookies and connection pool together"""
162+
session = requests.Session()
163+
session.mount("http", _get_retry_adapter(max_retries)) # tied to http and https
164+
return session
165+
166+
160167
def stream_file(
161168
url: str,
162169
fpath: Optional[pathlib.Path] = None,
@@ -166,6 +173,7 @@ def stream_file(
166173
only_first_block: Optional[bool] = False,
167174
max_retries: Optional[int] = 5,
168175
headers: Optional[Dict[str, str]] = None,
176+
session: Optional[requests.Session] = None,
169177
) -> Union[int, requests.structures.CaseInsensitiveDict]:
170178
"""Stream data from a URL to either a BytesIO object or a file
171179
Arguments -
@@ -175,16 +183,17 @@ def stream_file(
175183
proxies - A dict of proxies to be used
176184
https://requests.readthedocs.io/en/master/user/advanced/#proxies
177185
only_first_block - Whether to download only one (first) block
178-
max_retries - Maximum number of retries after which error is raised
186+
max_retries - Maximum number of retries after which error is raised. Does not
187+
apply if using your own session
188+
session - Session object to make the request with. A new one created otherwise
179189
Returns the total number of bytes downloaded and the response headers"""
180190

181191
# if no output option is supplied
182192
if fpath is None and byte_stream is None:
183193
raise ValueError("Either file path or a bytesIO object is needed")
184194

185-
session = requests.Session()
186-
retry_adapter = _get_retry_adapter(max_retries)
187-
session.mount("http", retry_adapter) # tied to http and https
195+
if not session:
196+
session = get_session(max_retries)
188197
resp = session.get(
189198
url,
190199
stream=True,

0 commit comments

Comments
 (0)