Update ZipFile to context manager (#9843)

* Update zipFile to context manager * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * Cleanup * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * Cleanup Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
2022-10-18 16:35:04 +02:00 · 2022-10-18 16:35:04 +02:00 · d0df6c8403
parent 010cd0db7d
commit d0df6c8403
3 changed files with 15 additions and 86 deletions
--- a/utils/dataloaders.py
+++ b/utils/dataloaders.py
@ -17,7 +17,6 @@ from multiprocessing.pool import Pool, ThreadPool
 from pathlib import Path
 from threading import Thread
 from urllib.parse import urlparse
 from zipfile import ZipFile
 import numpy as np
 import torch
@ -31,7 +30,8 @@ from tqdm import tqdm
 from utils.augmentations import (Albumentations, augment_hsv, classify_albumentations, classify_transforms, copy_paste,
                                 cutout, letterbox, mixup, random_perspective)
 from utils.general import (DATASETS_DIR, LOGGER, NUM_THREADS, check_dataset, check_requirements, check_yaml, clean_str,
-                           cv2, is_colab, is_kaggle, segments2boxes, xyn2xy, xywh2xyxy, xywhn2xyxy, xyxy2xywhn)
+                           cv2, is_colab, is_kaggle, segments2boxes, unzip_file, xyn2xy, xywh2xyxy, xywhn2xyxy,
                           xyxy2xywhn)
 from utils.torch_utils import torch_distributed_zero_first
 # Parameters
@ -1053,7 +1053,7 @@ class HUBDatasetStats():
        if not str(path).endswith('.zip'):  # path is data.yaml
            return False, None, path
        assert Path(path).is_file(), f'Error unzipping {path}, file not found'
-        ZipFile(path).extractall(path=path.parent)  # unzip
+        unzip_file(path, path=path.parent)
        dir = path.with_suffix('')  # dataset directory == zip name
        assert dir.is_dir(), f'Error unzipping {path}, {dir} not found. path/to/abc.zip MUST unzip to path/to/abc/'
        return True, str(dir), self._find_yaml(dir)  # zipped, data_dir, yaml_path
--- a/utils/downloads.py
+++ b/utils/downloads.py
@ -5,12 +5,9 @@ Download utils
 import logging
 import os
 import platform
 import subprocess
 import time
 import urllib
 from pathlib import Path
 from zipfile import ZipFile
 import requests
 import torch
@ -109,81 +106,3 @@ def attempt_download(file, repo='ultralytics/yolov5', release='v6.2'):
                error_msg=f'{file} missing, try downloading from https://github.com/{repo}/releases/{tag} or {url3}')
    return str(file)
 def gdrive_download(id='16TiPfZj7htmTyhntwcZyEEAejOUxuT6m', file='tmp.zip'):
    # Downloads a file from Google Drive. from yolov5.utils.downloads import *; gdrive_download()
    t = time.time()
    file = Path(file)
    cookie = Path('cookie')  # gdrive cookie
    print(f'Downloading https://drive.google.com/uc?export=download&id={id} as {file}... ', end='')
    if file.exists():
        file.unlink()  # remove existing file
    if cookie.exists():
        cookie.unlink()  # remove existing cookie
    # Attempt file download
    out = "NUL" if platform.system() == "Windows" else "/dev/null"
    os.system(f'curl -c ./cookie -s -L "drive.google.com/uc?export=download&id={id}" > {out}')
    if os.path.exists('cookie'):  # large file
        s = f'curl -Lb ./cookie "drive.google.com/uc?export=download&confirm={get_token()}&id={id}" -o {file}'
    else:  # small file
        s = f'curl -s -L -o {file} "drive.google.com/uc?export=download&id={id}"'
    r = os.system(s)  # execute, capture return
    if cookie.exists():
        cookie.unlink()  # remove existing cookie
    # Error check
    if r != 0:
        if file.exists():
            file.unlink()  # remove partial
        print('Download error ')  # raise Exception('Download error')
        return r
    # Unzip if archive
    if file.suffix == '.zip':
        print('unzipping... ', end='')
        ZipFile(file).extractall(path=file.parent)  # unzip
        file.unlink()  # remove zip
    print(f'Done ({time.time() - t:.1f}s)')
    return r
 def get_token(cookie="./cookie"):
    with open(cookie) as f:
        for line in f:
            if "download" in line:
                return line.split()[-1]
    return ""
 # Google utils: https://cloud.google.com/storage/docs/reference/libraries ----------------------------------------------
 #
 #
 # def upload_blob(bucket_name, source_file_name, destination_blob_name):
 #     # Uploads a file to a bucket
 #     # https://cloud.google.com/storage/docs/uploading-objects#storage-upload-object-python
 #
 #     storage_client = storage.Client()
 #     bucket = storage_client.get_bucket(bucket_name)
 #     blob = bucket.blob(destination_blob_name)
 #
 #     blob.upload_from_filename(source_file_name)
 #
 #     print('File {} uploaded to {}.'.format(
 #         source_file_name,
 #         destination_blob_name))
 #
 #
 # def download_blob(bucket_name, source_blob_name, destination_file_name):
 #     # Uploads a blob from a bucket
 #     storage_client = storage.Client()
 #     bucket = storage_client.get_bucket(bucket_name)
 #     blob = bucket.blob(source_blob_name)
 #
 #     blob.download_to_filename(destination_file_name)
 #
 #     print('Blob {} downloaded to {}.'.format(
 #         source_blob_name,
 #         destination_file_name))
--- a/utils/general.py
+++ b/utils/general.py
@ -511,7 +511,7 @@ def check_dataset(data, autodownload=True):
                LOGGER.info(f'Downloading {s} to {f}...')
                torch.hub.download_url_to_file(s, f)
                Path(DATASETS_DIR).mkdir(parents=True, exist_ok=True)  # create root
-                ZipFile(f).extractall(path=DATASETS_DIR)  # unzip
+                unzip_file(f, path=DATASETS_DIR)  # unzip
                Path(f).unlink()  # remove zip
                r = None  # success
            elif s.startswith('bash '):  # bash script
@ -566,6 +566,16 @@ def yaml_save(file='data.yaml', data={}):
        yaml.safe_dump({k: str(v) if isinstance(v, Path) else v for k, v in data.items()}, f, sort_keys=False)
 def unzip_file(file, path=None, exclude=('.DS_Store', '__MACOSX')):
    # Unzip a *.zip file to path/, excluding files containing strings in exclude list
    if path is None:
        path = Path(file).parent  # default path
    with ZipFile(file) as zipObj:
        for f in zipObj.namelist():  # list all archived filenames in the zip
            if all(x not in f for x in exclude):
                zipObj.extract(f, path=path)
 def url2file(url):
    # Convert URL to filename, i.e. https://url.com/file.txt?auth -> file.txt
    url = str(Path(url)).replace(':/', '://')  # Pathlib turns :// -> :/
@ -601,7 +611,7 @@ def download(url, dir='.', unzip=True, delete=True, curl=False, threads=1, retry
        if unzip and success and f.suffix in ('.zip', '.tar', '.gz'):
            LOGGER.info(f'Unzipping {f}...')
            if f.suffix == '.zip':
-                ZipFile(f).extractall(path=dir)  # unzip
+                unzip_file(f, dir)  # unzip
            elif f.suffix == '.tar':
                os.system(f'tar xf {f} --directory {f.parent}')  # unzip
            elif f.suffix == '.gz':