yolov5/utils/google_utils.py

141 lines
5.5 KiB
Python
Raw Normal View History

# Google utils: https://cloud.google.com/storage/docs/reference/libraries
2020-05-29 17:04:54 -07:00
import os
import platform
2020-09-03 20:29:08 -07:00
import subprocess
2020-05-29 17:04:54 -07:00
import time
from pathlib import Path
2020-08-20 21:17:40 -07:00
import requests
import torch
2020-05-29 17:04:54 -07:00
2020-09-03 20:29:08 -07:00
def gsutil_getsize(url=''):
# gs://bucket/file size https://cloud.google.com/storage/docs/gsutil/commands/du
2021-01-10 11:42:45 -08:00
s = subprocess.check_output(f'gsutil du {url}', shell=True).decode('utf-8')
2020-09-03 20:29:08 -07:00
return eval(s.split(' ')[0]) if len(s) else 0 # bytes
def safe_download(file, url, url2=None, min_bytes=1E0, error_msg=''):
# Attempts to download file from url or url2, checks and removes incomplete downloads < min_bytes
file = Path(file)
try: # GitHub
print(f'Downloading {url} to {file}...')
torch.hub.download_url_to_file(url, str(file))
assert file.exists() and file.stat().st_size > min_bytes # check
except Exception as e: # GCP
file.unlink(missing_ok=True) # remove partial downloads
print(f'Download error: {e}\nRe-attempting {url2 or url} to {file}...')
os.system(f"curl -L '{url2 or url}' -o '{file}' --retry 3 -C -") # curl download, retry and resume on fail
finally:
if not file.exists() or file.stat().st_size < min_bytes: # check
file.unlink(missing_ok=True) # remove partial downloads
print(f'ERROR: Download failure: {error_msg or url}')
print('')
2021-01-13 21:09:50 -08:00
def attempt_download(file, repo='ultralytics/yolov5'):
2021-01-10 11:42:45 -08:00
# Attempt file download if does not exist
file = Path(str(file).strip().replace("'", ''))
2021-01-10 11:42:45 -08:00
if not file.exists():
# URL specified
name = file.name
if str(file).startswith(('http:/', 'https:/')): # download
url = str(file).replace(':/', '://') # Pathlib turns :// -> :/
safe_download(file=name, url=url, min_bytes=1E5)
return name
# GitHub assets
2021-05-01 18:15:17 +02:00
file.parent.mkdir(parents=True, exist_ok=True) # make parent dir (if required)
2021-01-13 21:09:50 -08:00
try:
response = requests.get(f'https://api.github.com/repos/{repo}/releases/latest').json() # github api
assets = [x['name'] for x in response['assets']] # release assets, i.e. ['yolov5s.pt', 'yolov5m.pt', ...]
tag = response['tag_name'] # i.e. 'v1.0'
except: # fallback plan
2021-04-22 20:27:32 +02:00
assets = ['yolov5s.pt', 'yolov5m.pt', 'yolov5l.pt', 'yolov5x.pt',
'yolov5s6.pt', 'yolov5m6.pt', 'yolov5l6.pt', 'yolov5x6.pt']
try:
tag = subprocess.check_output('git tag', shell=True, stderr=subprocess.STDOUT).decode().split()[-1]
except:
tag = 'v5.0' # current release
2021-01-10 11:42:45 -08:00
if name in assets:
safe_download(file,
url=f'https://github.com/{repo}/releases/download/{tag}/{name}',
# url2=f'https://storage.googleapis.com/{repo}/ckpt/{name}', # backup url (optional)
min_bytes=1E5,
error_msg=f'{file} missing, try downloading from https://github.com/{repo}/releases/')
return str(file)
2021-01-10 11:42:45 -08:00
def gdrive_download(id='16TiPfZj7htmTyhntwcZyEEAejOUxuT6m', file='tmp.zip'):
# Downloads a file from Google Drive. from yolov5.utils.google_utils import *; gdrive_download()
2020-05-29 17:04:54 -07:00
t = time.time()
2021-01-10 11:42:45 -08:00
file = Path(file)
cookie = Path('cookie') # gdrive cookie
print(f'Downloading https://drive.google.com/uc?export=download&id={id} as {file}... ', end='')
file.unlink(missing_ok=True) # remove existing file
cookie.unlink(missing_ok=True) # remove existing cookie
2020-05-29 17:04:54 -07:00
# Attempt file download
out = "NUL" if platform.system() == "Windows" else "/dev/null"
2021-01-10 11:42:45 -08:00
os.system(f'curl -c ./cookie -s -L "drive.google.com/uc?export=download&id={id}" > {out}')
2020-05-29 17:04:54 -07:00
if os.path.exists('cookie'): # large file
2021-01-10 11:42:45 -08:00
s = f'curl -Lb ./cookie "drive.google.com/uc?export=download&confirm={get_token()}&id={id}" -o {file}'
2020-05-29 17:04:54 -07:00
else: # small file
2021-01-10 11:42:45 -08:00
s = f'curl -s -L -o {file} "drive.google.com/uc?export=download&id={id}"'
r = os.system(s) # execute, capture return
2021-01-10 11:42:45 -08:00
cookie.unlink(missing_ok=True) # remove existing cookie
2020-05-29 17:04:54 -07:00
# Error check
if r != 0:
2021-01-10 11:42:45 -08:00
file.unlink(missing_ok=True) # remove partial
2020-05-29 17:04:54 -07:00
print('Download error ') # raise Exception('Download error')
return r
# Unzip if archive
2021-01-10 11:42:45 -08:00
if file.suffix == '.zip':
2020-05-29 17:04:54 -07:00
print('unzipping... ', end='')
2021-01-10 11:42:45 -08:00
os.system(f'unzip -q {file}') # unzip
file.unlink() # remove zip to free space
2020-05-29 17:04:54 -07:00
2021-01-10 11:42:45 -08:00
print(f'Done ({time.time() - t:.1f}s)')
2020-05-29 17:04:54 -07:00
return r
2020-07-10 20:36:10 -07:00
def get_token(cookie="./cookie"):
with open(cookie) as f:
for line in f:
if "download" in line:
return line.split()[-1]
return ""
2020-05-29 17:04:54 -07:00
# def upload_blob(bucket_name, source_file_name, destination_blob_name):
# # Uploads a file to a bucket
# # https://cloud.google.com/storage/docs/uploading-objects#storage-upload-object-python
#
# storage_client = storage.Client()
# bucket = storage_client.get_bucket(bucket_name)
# blob = bucket.blob(destination_blob_name)
#
# blob.upload_from_filename(source_file_name)
#
# print('File {} uploaded to {}.'.format(
# source_file_name,
# destination_blob_name))
#
#
# def download_blob(bucket_name, source_blob_name, destination_file_name):
# # Uploads a blob from a bucket
# storage_client = storage.Client()
# bucket = storage_client.get_bucket(bucket_name)
# blob = bucket.blob(source_blob_name)
#
# blob.download_to_filename(destination_file_name)
#
# print('Blob {} downloaded to {}.'.format(
# source_blob_name,
# destination_file_name))