Fix fitz camelCase deprecation and .PDF not being recognized as pdf file (#10181)

* Fix fitz camelCase deprecation and .PDF not being recognized as pdf file * refactor get_image_file_list function
2023-07-07 08:13:32 +00:00 · 2023-07-07 08:13:32 +00:00 · ff95c05589
parent 1fbefbbf84
commit ff95c05589
1 changed files with 10 additions and 21 deletions
--- a/ppocr/utils/utility.py
+++ b/ppocr/utils/utility.py
@ -59,33 +59,22 @@ def _check_image_file(path):

 def get_image_file_list(img_file, infer_list=None):
    imgs_lists = []
-    if infer_list and not os.path.exists(infer_list):
-        raise Exception("not found infer list {}".format(infer_list))
-    if infer_list:
-        with open(infer_list, "r") as f:
-            lines = f.readlines()
-        for line in lines:
-            image_path = line.strip().split("\t")[0]
-            image_path = os.path.join(img_file, image_path)
-            imgs_lists.append(image_path)
-    else:
-        if img_file is None or not os.path.exists(img_file):
-            raise Exception("not found any img file in {}".format(img_file))
+    if img_file is None or not os.path.exists(img_file):
+        raise Exception("not found any img file in {}".format(img_file))
+
+    if os.path.isfile(img_file) and _check_image_file(img_file):
+        imgs_lists.append(img_file)
+    elif os.path.isdir(img_file):
+        for single_file in os.listdir(img_file):
+            file_path = os.path.join(img_file, single_file)
+            if os.path.isfile(file_path) and _check_image_file(file_path):
+                imgs_lists.append(file_path)

-        img_end = {'jpg', 'bmp', 'png', 'jpeg', 'rgb', 'tif', 'tiff', 'gif', 'pdf'}
-        if os.path.isfile(img_file) and _check_image_file(img_file):
-            imgs_lists.append(img_file)
-        elif os.path.isdir(img_file):
-            for single_file in os.listdir(img_file):
-                file_path = os.path.join(img_file, single_file)
-                if os.path.isfile(file_path) and _check_image_file(file_path):
-                    imgs_lists.append(file_path)
    if len(imgs_lists) == 0:
        raise Exception("not found any img file in {}".format(img_file))
    imgs_lists = sorted(imgs_lists)
    return imgs_lists

-
 def binarize_img(img):
    if len(img.shape) == 3 and img.shape[2] == 3:
        gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) # conversion to grayscale image