Multi-threaded image caching

This commit is contained in:
Glenn Jocher 2020-11-07 02:18:18 +01:00
parent ea7e78cb11
commit 194f16844e
3 changed files with 8 additions and 6 deletions

View File

@ -25,7 +25,7 @@ end=$(date +%s)
runtime=$((end - start)) runtime=$((end - start))
echo "Completed in" $runtime "seconds" echo "Completed in" $runtime "seconds"
echo "Spliting dataset..." echo "Splitting dataset..."
python3 - "$@" <<END python3 - "$@" <<END
import xml.etree.ElementTree as ET import xml.etree.ElementTree as ET
import pickle import pickle

View File

@ -20,7 +20,6 @@ tqdm>=4.41.0
# pycocotools>=2.0 # pycocotools>=2.0
# export -------------------------------------- # export --------------------------------------
# packaging # for coremltools
# coremltools==4.0 # coremltools==4.0
# onnx>=1.7.0 # onnx>=1.7.0
# scikit-learn==0.19.2 # for coreml quantization # scikit-learn==0.19.2 # for coreml quantization

View File

@ -1,13 +1,15 @@
import glob import glob
import math
import os import os
import random import random
import shutil import shutil
import time import time
from itertools import repeat
from multiprocessing.pool import ThreadPool
from pathlib import Path from pathlib import Path
from threading import Thread from threading import Thread
import cv2 import cv2
import math
import numpy as np import numpy as np
import torch import torch
from PIL import Image, ExifTags from PIL import Image, ExifTags
@ -474,10 +476,11 @@ class LoadImagesAndLabels(Dataset): # for training/testing
self.imgs = [None] * n self.imgs = [None] * n
if cache_images: if cache_images:
gb = 0 # Gigabytes of cached images gb = 0 # Gigabytes of cached images
pbar = tqdm(range(len(self.img_files)), desc='Caching images')
self.img_hw0, self.img_hw = [None] * n, [None] * n self.img_hw0, self.img_hw = [None] * n, [None] * n
for i in pbar: # max 10k images results = ThreadPool(8).imap_unordered(lambda x: load_image(*x), zip(repeat(self), range(n))) # 8 threads
self.imgs[i], self.img_hw0[i], self.img_hw[i] = load_image(self, i) # img, hw_original, hw_resized pbar = tqdm(enumerate(results), total=n)
for i, x in pbar:
self.imgs[i], self.img_hw0[i], self.img_hw[i] = x # img, hw_original, hw_resized = load_image(self, i)
gb += self.imgs[i].nbytes gb += self.imgs[i].nbytes
pbar.desc = 'Caching images (%.1fGB)' % (gb / 1E9) pbar.desc = 'Caching images (%.1fGB)' % (gb / 1E9)