Replace os.system('unzip file.zip') -> ZipFile.extractall() (#4919)

* Replace `os.system('unzip file.zip')` -> `ZipFile.extractall()`

* Cleanup
This commit is contained in:
Glenn Jocher 2021-09-25 08:52:36 -07:00 committed by GitHub
parent 1492632796
commit a64a4c839f
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 16 additions and 12 deletions

View File

@ -15,6 +15,7 @@ from itertools import repeat
from multiprocessing.pool import ThreadPool, Pool from multiprocessing.pool import ThreadPool, Pool
from pathlib import Path from pathlib import Path
from threading import Thread from threading import Thread
from zipfile import ZipFile
import cv2 import cv2
import numpy as np import numpy as np
@ -928,8 +929,8 @@ def dataset_stats(path='coco128.yaml', autodownload=False, verbose=False, profil
# Unzip data.zip TODO: CONSTRAINT: path/to/abc.zip MUST unzip to 'path/to/abc/' # Unzip data.zip TODO: CONSTRAINT: path/to/abc.zip MUST unzip to 'path/to/abc/'
if str(path).endswith('.zip'): # path is data.zip if str(path).endswith('.zip'): # path is data.zip
assert Path(path).is_file(), f'Error unzipping {path}, file not found' assert Path(path).is_file(), f'Error unzipping {path}, file not found'
assert os.system(f'unzip -q {path} -d {path.parent}') == 0, f'Error unzipping {path}' ZipFile(path).extractall(path=path.parent) # unzip
dir = path.with_suffix('') # dataset directory dir = path.with_suffix('') # dataset directory == zip name
return True, str(dir), next(dir.rglob('*.yaml')) # zipped, data_dir, yaml_path return True, str(dir), next(dir.rglob('*.yaml')) # zipped, data_dir, yaml_path
else: # path is data.yaml else: # path is data.yaml
return False, None, path return False, None, path

View File

@ -9,6 +9,7 @@ import subprocess
import time import time
import urllib import urllib
from pathlib import Path from pathlib import Path
from zipfile import ZipFile
import requests import requests
import torch import torch
@ -104,8 +105,8 @@ def gdrive_download(id='16TiPfZj7htmTyhntwcZyEEAejOUxuT6m', file='tmp.zip'):
# Unzip if archive # Unzip if archive
if file.suffix == '.zip': if file.suffix == '.zip':
print('unzipping... ', end='') print('unzipping... ', end='')
os.system(f'unzip -q {file}') # unzip ZipFile(file).extractall(path=file.parent) # unzip
file.unlink() # remove zip to free space file.unlink() # remove zip
print(f'Done ({time.time() - t:.1f}s)') print(f'Done ({time.time() - t:.1f}s)')
return r return r

View File

@ -18,6 +18,7 @@ from itertools import repeat
from multiprocessing.pool import ThreadPool from multiprocessing.pool import ThreadPool
from pathlib import Path from pathlib import Path
from subprocess import check_output from subprocess import check_output
from zipfile import ZipFile
import cv2 import cv2
import numpy as np import numpy as np
@ -353,17 +354,19 @@ def check_dataset(data, autodownload=True):
if s and autodownload: # download script if s and autodownload: # download script
if s.startswith('http') and s.endswith('.zip'): # URL if s.startswith('http') and s.endswith('.zip'): # URL
f = Path(s).name # filename f = Path(s).name # filename
print(f'Downloading {s} ...') print(f'Downloading {s} to {f}...')
torch.hub.download_url_to_file(s, f) torch.hub.download_url_to_file(s, f)
root = path.parent if 'path' in data else '..' # unzip directory i.e. '../' root = path.parent if 'path' in data else '..' # unzip directory i.e. '../'
Path(root).mkdir(parents=True, exist_ok=True) # create root Path(root).mkdir(parents=True, exist_ok=True) # create root
r = os.system(f'unzip -q {f} -d {root} && rm {f}') # unzip ZipFile(f).extractall(path=root) # unzip
Path(f).unlink() # remove zip
r = None # success
elif s.startswith('bash '): # bash script elif s.startswith('bash '): # bash script
print(f'Running {s} ...') print(f'Running {s} ...')
r = os.system(s) r = os.system(s)
else: # python script else: # python script
r = exec(s, {'yaml': data}) # return None r = exec(s, {'yaml': data}) # return None
print('Dataset autodownload %s\n' % ('success' if r in (0, None) else 'failure')) # print result print(f"Dataset autodownload {f'success, saved to {root}' if r in (0, None) else 'failure'}")
else: else:
raise Exception('Dataset not found.') raise Exception('Dataset not found.')
@ -393,12 +396,11 @@ def download(url, dir='.', unzip=True, delete=True, curl=False, threads=1):
if unzip and f.suffix in ('.zip', '.gz'): if unzip and f.suffix in ('.zip', '.gz'):
print(f'Unzipping {f}...') print(f'Unzipping {f}...')
if f.suffix == '.zip': if f.suffix == '.zip':
s = f'unzip -qo {f} -d {dir}' # unzip -quiet -overwrite ZipFile(f).extractall(path=dir) # unzip
elif f.suffix == '.gz': elif f.suffix == '.gz':
s = f'tar xfz {f} --directory {f.parent}' # unzip os.system(f'tar xfz {f} --directory {f.parent}') # unzip
if delete: # delete zip file after unzip if delete:
s += f' && rm {f}' f.unlink() # remove zip
os.system(s)
dir = Path(dir) dir = Path(dir)
dir.mkdir(parents=True, exist_ok=True) # make directory dir.mkdir(parents=True, exist_ok=True) # make directory