Fix fitz camelCase deprecation and .PDF not being recognized as pdf file (#10181)

* Fix fitz camelCase deprecation and .PDF not being recognized as pdf file

* refactor get_image_file_list function
pull/11092/head
itasli 2023-07-07 08:13:32 +00:00 committed by tangshiyu
parent 1fbefbbf84
commit ff95c05589
1 changed files with 10 additions and 21 deletions

View File

@ -59,33 +59,22 @@ def _check_image_file(path):
def get_image_file_list(img_file, infer_list=None):
imgs_lists = []
if infer_list and not os.path.exists(infer_list):
raise Exception("not found infer list {}".format(infer_list))
if infer_list:
with open(infer_list, "r") as f:
lines = f.readlines()
for line in lines:
image_path = line.strip().split("\t")[0]
image_path = os.path.join(img_file, image_path)
imgs_lists.append(image_path)
else:
if img_file is None or not os.path.exists(img_file):
raise Exception("not found any img file in {}".format(img_file))
if img_file is None or not os.path.exists(img_file):
raise Exception("not found any img file in {}".format(img_file))
if os.path.isfile(img_file) and _check_image_file(img_file):
imgs_lists.append(img_file)
elif os.path.isdir(img_file):
for single_file in os.listdir(img_file):
file_path = os.path.join(img_file, single_file)
if os.path.isfile(file_path) and _check_image_file(file_path):
imgs_lists.append(file_path)
img_end = {'jpg', 'bmp', 'png', 'jpeg', 'rgb', 'tif', 'tiff', 'gif', 'pdf'}
if os.path.isfile(img_file) and _check_image_file(img_file):
imgs_lists.append(img_file)
elif os.path.isdir(img_file):
for single_file in os.listdir(img_file):
file_path = os.path.join(img_file, single_file)
if os.path.isfile(file_path) and _check_image_file(file_path):
imgs_lists.append(file_path)
if len(imgs_lists) == 0:
raise Exception("not found any img file in {}".format(img_file))
imgs_lists = sorted(imgs_lists)
return imgs_lists
def binarize_img(img):
if len(img.shape) == 3 and img.shape[2] == 3:
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) # conversion to grayscale image