Progress Bar while download file over http with Requests
It seems that there is a disconnect between the examples on the Progress Bar Usage page and what the code actually requires.
In the following example, note the use of maxval
instead of max_value
. Also note the use of .start()
to initialized the bar. This has been noted in an Issue.
The n_chunk
parameter denotes how many 1024 kb chunks to stream at once while looping through the request iterator.
import requests
import time
import numpy as np
import progressbar
url = "http://wikipedia.com/"
def download_file(url, n_chunk=1):
r = requests.get(url, stream=True)
# Estimates the number of bar updates
block_size = 1024
file_size = int(r.headers.get('Content-Length', None))
num_bars = np.ceil(file_size / (n_chunk * block_size))
bar = progressbar.ProgressBar(maxval=num_bars).start()
with open('test.html', 'wb') as f:
for i, chunk in enumerate(r.iter_content(chunk_size=n_chunk * block_size)):
f.write(chunk)
bar.update(i+1)
# Add a little sleep so you can see the bar progress
time.sleep(0.05)
return
download_file(url)
EDIT: Addressed comment about code clarity.
EDIT2: Fixed logic so bar reports 100% at completion. Credit to leovp's answer for using the 1024 kb block size.
The tqdm
package now includes a function designed more specifically for this type of situation: wrapattr
. You just wrap an object's read
(or write
) attribute, and tqdm handles the rest; there's no messing with block sizes or anything like that. Here's a simple download function that puts it all together with requests
:
def download(url, filename):
import functools
import pathlib
import shutil
import requests
from tqdm.auto import tqdm
r = requests.get(url, stream=True, allow_redirects=True)
if r.status_code != 200:
r.raise_for_status() # Will only raise for 4xx codes, so...
raise RuntimeError(f"Request to {url} returned status code {r.status_code}")
file_size = int(r.headers.get('Content-Length', 0))
path = pathlib.Path(filename).expanduser().resolve()
path.parent.mkdir(parents=True, exist_ok=True)
desc = "(Unknown total file size)" if file_size == 0 else ""
r.raw.read = functools.partial(r.raw.read, decode_content=True) # Decompress if needed
with tqdm.wrapattr(r.raw, "read", total=file_size, desc=desc) as r_raw:
with path.open("wb") as f:
shutil.copyfileobj(r_raw, f)
return path
I suggest you try tqdm
, it's very easy to use.
Example code for downloading with requests
library:
from tqdm import tqdm
import requests
url = "http://www.ovh.net/files/10Mb.dat" #big file test
# Streaming, so we can iterate over the response.
response = requests.get(url, stream=True)
total_size_in_bytes= int(response.headers.get('content-length', 0))
block_size = 1024 #1 Kibibyte
progress_bar = tqdm(total=total_size_in_bytes, unit='iB', unit_scale=True)
with open('test.dat', 'wb') as file:
for data in response.iter_content(block_size):
progress_bar.update(len(data))
file.write(data)
progress_bar.close()
if total_size_in_bytes != 0 and progress_bar.n != total_size_in_bytes:
print("ERROR, something went wrong")