Update ZipFile to context manager (#9843)

* Update zipFile to context manager

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

* Cleanup

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

* Cleanup

Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
pull/9846/head
Glenn Jocher 2022-10-18 16:35:04 +02:00 committed by GitHub
parent 010cd0db7d
commit d0df6c8403
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 15 additions and 86 deletions

View File

@ -17,7 +17,6 @@ from multiprocessing.pool import Pool, ThreadPool
from pathlib import Path from pathlib import Path
from threading import Thread from threading import Thread
from urllib.parse import urlparse from urllib.parse import urlparse
from zipfile import ZipFile
import numpy as np import numpy as np
import torch import torch
@ -31,7 +30,8 @@ from tqdm import tqdm
from utils.augmentations import (Albumentations, augment_hsv, classify_albumentations, classify_transforms, copy_paste, from utils.augmentations import (Albumentations, augment_hsv, classify_albumentations, classify_transforms, copy_paste,
cutout, letterbox, mixup, random_perspective) cutout, letterbox, mixup, random_perspective)
from utils.general import (DATASETS_DIR, LOGGER, NUM_THREADS, check_dataset, check_requirements, check_yaml, clean_str, from utils.general import (DATASETS_DIR, LOGGER, NUM_THREADS, check_dataset, check_requirements, check_yaml, clean_str,
cv2, is_colab, is_kaggle, segments2boxes, xyn2xy, xywh2xyxy, xywhn2xyxy, xyxy2xywhn) cv2, is_colab, is_kaggle, segments2boxes, unzip_file, xyn2xy, xywh2xyxy, xywhn2xyxy,
xyxy2xywhn)
from utils.torch_utils import torch_distributed_zero_first from utils.torch_utils import torch_distributed_zero_first
# Parameters # Parameters
@ -1053,7 +1053,7 @@ class HUBDatasetStats():
if not str(path).endswith('.zip'): # path is data.yaml if not str(path).endswith('.zip'): # path is data.yaml
return False, None, path return False, None, path
assert Path(path).is_file(), f'Error unzipping {path}, file not found' assert Path(path).is_file(), f'Error unzipping {path}, file not found'
ZipFile(path).extractall(path=path.parent) # unzip unzip_file(path, path=path.parent)
dir = path.with_suffix('') # dataset directory == zip name dir = path.with_suffix('') # dataset directory == zip name
assert dir.is_dir(), f'Error unzipping {path}, {dir} not found. path/to/abc.zip MUST unzip to path/to/abc/' assert dir.is_dir(), f'Error unzipping {path}, {dir} not found. path/to/abc.zip MUST unzip to path/to/abc/'
return True, str(dir), self._find_yaml(dir) # zipped, data_dir, yaml_path return True, str(dir), self._find_yaml(dir) # zipped, data_dir, yaml_path

View File

@ -5,12 +5,9 @@ Download utils
import logging import logging
import os import os
import platform
import subprocess import subprocess
import time
import urllib import urllib
from pathlib import Path from pathlib import Path
from zipfile import ZipFile
import requests import requests
import torch import torch
@ -109,81 +106,3 @@ def attempt_download(file, repo='ultralytics/yolov5', release='v6.2'):
error_msg=f'{file} missing, try downloading from https://github.com/{repo}/releases/{tag} or {url3}') error_msg=f'{file} missing, try downloading from https://github.com/{repo}/releases/{tag} or {url3}')
return str(file) return str(file)
def gdrive_download(id='16TiPfZj7htmTyhntwcZyEEAejOUxuT6m', file='tmp.zip'):
# Downloads a file from Google Drive. from yolov5.utils.downloads import *; gdrive_download()
t = time.time()
file = Path(file)
cookie = Path('cookie') # gdrive cookie
print(f'Downloading https://drive.google.com/uc?export=download&id={id} as {file}... ', end='')
if file.exists():
file.unlink() # remove existing file
if cookie.exists():
cookie.unlink() # remove existing cookie
# Attempt file download
out = "NUL" if platform.system() == "Windows" else "/dev/null"
os.system(f'curl -c ./cookie -s -L "drive.google.com/uc?export=download&id={id}" > {out}')
if os.path.exists('cookie'): # large file
s = f'curl -Lb ./cookie "drive.google.com/uc?export=download&confirm={get_token()}&id={id}" -o {file}'
else: # small file
s = f'curl -s -L -o {file} "drive.google.com/uc?export=download&id={id}"'
r = os.system(s) # execute, capture return
if cookie.exists():
cookie.unlink() # remove existing cookie
# Error check
if r != 0:
if file.exists():
file.unlink() # remove partial
print('Download error ') # raise Exception('Download error')
return r
# Unzip if archive
if file.suffix == '.zip':
print('unzipping... ', end='')
ZipFile(file).extractall(path=file.parent) # unzip
file.unlink() # remove zip
print(f'Done ({time.time() - t:.1f}s)')
return r
def get_token(cookie="./cookie"):
with open(cookie) as f:
for line in f:
if "download" in line:
return line.split()[-1]
return ""
# Google utils: https://cloud.google.com/storage/docs/reference/libraries ----------------------------------------------
#
#
# def upload_blob(bucket_name, source_file_name, destination_blob_name):
# # Uploads a file to a bucket
# # https://cloud.google.com/storage/docs/uploading-objects#storage-upload-object-python
#
# storage_client = storage.Client()
# bucket = storage_client.get_bucket(bucket_name)
# blob = bucket.blob(destination_blob_name)
#
# blob.upload_from_filename(source_file_name)
#
# print('File {} uploaded to {}.'.format(
# source_file_name,
# destination_blob_name))
#
#
# def download_blob(bucket_name, source_blob_name, destination_file_name):
# # Uploads a blob from a bucket
# storage_client = storage.Client()
# bucket = storage_client.get_bucket(bucket_name)
# blob = bucket.blob(source_blob_name)
#
# blob.download_to_filename(destination_file_name)
#
# print('Blob {} downloaded to {}.'.format(
# source_blob_name,
# destination_file_name))

View File

@ -511,7 +511,7 @@ def check_dataset(data, autodownload=True):
LOGGER.info(f'Downloading {s} to {f}...') LOGGER.info(f'Downloading {s} to {f}...')
torch.hub.download_url_to_file(s, f) torch.hub.download_url_to_file(s, f)
Path(DATASETS_DIR).mkdir(parents=True, exist_ok=True) # create root Path(DATASETS_DIR).mkdir(parents=True, exist_ok=True) # create root
ZipFile(f).extractall(path=DATASETS_DIR) # unzip unzip_file(f, path=DATASETS_DIR) # unzip
Path(f).unlink() # remove zip Path(f).unlink() # remove zip
r = None # success r = None # success
elif s.startswith('bash '): # bash script elif s.startswith('bash '): # bash script
@ -566,6 +566,16 @@ def yaml_save(file='data.yaml', data={}):
yaml.safe_dump({k: str(v) if isinstance(v, Path) else v for k, v in data.items()}, f, sort_keys=False) yaml.safe_dump({k: str(v) if isinstance(v, Path) else v for k, v in data.items()}, f, sort_keys=False)
def unzip_file(file, path=None, exclude=('.DS_Store', '__MACOSX')):
# Unzip a *.zip file to path/, excluding files containing strings in exclude list
if path is None:
path = Path(file).parent # default path
with ZipFile(file) as zipObj:
for f in zipObj.namelist(): # list all archived filenames in the zip
if all(x not in f for x in exclude):
zipObj.extract(f, path=path)
def url2file(url): def url2file(url):
# Convert URL to filename, i.e. https://url.com/file.txt?auth -> file.txt # Convert URL to filename, i.e. https://url.com/file.txt?auth -> file.txt
url = str(Path(url)).replace(':/', '://') # Pathlib turns :// -> :/ url = str(Path(url)).replace(':/', '://') # Pathlib turns :// -> :/
@ -601,7 +611,7 @@ def download(url, dir='.', unzip=True, delete=True, curl=False, threads=1, retry
if unzip and success and f.suffix in ('.zip', '.tar', '.gz'): if unzip and success and f.suffix in ('.zip', '.tar', '.gz'):
LOGGER.info(f'Unzipping {f}...') LOGGER.info(f'Unzipping {f}...')
if f.suffix == '.zip': if f.suffix == '.zip':
ZipFile(f).extractall(path=dir) # unzip unzip_file(f, dir) # unzip
elif f.suffix == '.tar': elif f.suffix == '.tar':
os.system(f'tar xf {f} --directory {f.parent}') # unzip os.system(f'tar xf {f} --directory {f.parent}') # unzip
elif f.suffix == '.gz': elif f.suffix == '.gz':