Pobierz pyth plik gzip
def download_dataset(url, target_path="data/", keep_download=True, overwrite_download=False):
"""Downloads dataset from a url.
url: string, a dataset path
target_path: string, path where data will be downloaded
keep_download: boolean, keeps the original file after extraction
overwrite_download: boolean, stops download if dataset already exists
"""
if url == "" or url is None:
raise Exception(EMPTY_URL_ERROR)
filename = get_filename(url)
file_location = get_file_location(target_path, filename)
os.makedirs(data_dir, exist_ok=True)
if os.path.exists(file_location) and not overwrite_download:
print(f"File already exists at {file_location}. Use: 'overwrite_download=True' to \
overwrite download")
extract_file(target_path, filename)
return
print(f"Downloading file from {url} to {file_location}.")
# Download
with open(file_location, 'wb') as f:
with requests.get(url, allow_redirects=True, stream=True) as resp:
for chunk in resp.iter_content(chunk_size = 512): #chunk_size in bytes
if chunk:
f.write(chunk)
print("Finished downloading.")
print("Extracting the file now ...")
extract_file(target_path, filename)
if not keep_download:
os.remove(file_location)
Mohamed Naji