replace GPL licensed components
parent
4e9d2e9c40
commit
53d83d8f54
|
@ -12,7 +12,7 @@
|
||||||
# See the License for the specific language governing permissions and
|
# See the License for the specific language governing permissions and
|
||||||
# limitations under the License.
|
# limitations under the License.
|
||||||
|
|
||||||
import Levenshtein
|
from rapidfuzz.distance import Levenshtein
|
||||||
import string
|
import string
|
||||||
|
|
||||||
|
|
||||||
|
@ -45,8 +45,7 @@ class RecMetric(object):
|
||||||
if self.is_filter:
|
if self.is_filter:
|
||||||
pred = self._normalize_text(pred)
|
pred = self._normalize_text(pred)
|
||||||
target = self._normalize_text(target)
|
target = self._normalize_text(target)
|
||||||
norm_edit_dis += Levenshtein.distance(pred, target) / max(
|
norm_edit_dis += Levenshtein.normalized_distance(pred, target)
|
||||||
len(pred), len(target), 1)
|
|
||||||
if pred == target:
|
if pred == target:
|
||||||
correct_num += 1
|
correct_num += 1
|
||||||
all_num += 1
|
all_num += 1
|
||||||
|
|
|
@ -9,7 +9,7 @@
|
||||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
# Apache 2.0 License for more details.
|
# Apache 2.0 License for more details.
|
||||||
|
|
||||||
import distance
|
from rapidfuzz.distance import Levenshtein
|
||||||
from apted import APTED, Config
|
from apted import APTED, Config
|
||||||
from apted.helpers import Tree
|
from apted.helpers import Tree
|
||||||
from lxml import etree, html
|
from lxml import etree, html
|
||||||
|
@ -39,17 +39,6 @@ class TableTree(Tree):
|
||||||
|
|
||||||
|
|
||||||
class CustomConfig(Config):
|
class CustomConfig(Config):
|
||||||
@staticmethod
|
|
||||||
def maximum(*sequences):
|
|
||||||
"""Get maximum possible value
|
|
||||||
"""
|
|
||||||
return max(map(len, sequences))
|
|
||||||
|
|
||||||
def normalized_distance(self, *sequences):
|
|
||||||
"""Get distance from 0 to 1
|
|
||||||
"""
|
|
||||||
return float(distance.levenshtein(*sequences)) / self.maximum(*sequences)
|
|
||||||
|
|
||||||
def rename(self, node1, node2):
|
def rename(self, node1, node2):
|
||||||
"""Compares attributes of trees"""
|
"""Compares attributes of trees"""
|
||||||
#print(node1.tag)
|
#print(node1.tag)
|
||||||
|
@ -58,23 +47,12 @@ class CustomConfig(Config):
|
||||||
if node1.tag == 'td':
|
if node1.tag == 'td':
|
||||||
if node1.content or node2.content:
|
if node1.content or node2.content:
|
||||||
#print(node1.content, )
|
#print(node1.content, )
|
||||||
return self.normalized_distance(node1.content, node2.content)
|
return Levenshtein.normalized_distance(node1.content, node2.content)
|
||||||
return 0.
|
return 0.
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
class CustomConfig_del_short(Config):
|
class CustomConfig_del_short(Config):
|
||||||
@staticmethod
|
|
||||||
def maximum(*sequences):
|
|
||||||
"""Get maximum possible value
|
|
||||||
"""
|
|
||||||
return max(map(len, sequences))
|
|
||||||
|
|
||||||
def normalized_distance(self, *sequences):
|
|
||||||
"""Get distance from 0 to 1
|
|
||||||
"""
|
|
||||||
return float(distance.levenshtein(*sequences)) / self.maximum(*sequences)
|
|
||||||
|
|
||||||
def rename(self, node1, node2):
|
def rename(self, node1, node2):
|
||||||
"""Compares attributes of trees"""
|
"""Compares attributes of trees"""
|
||||||
if (node1.tag != node2.tag) or (node1.colspan != node2.colspan) or (node1.rowspan != node2.rowspan):
|
if (node1.tag != node2.tag) or (node1.colspan != node2.colspan) or (node1.rowspan != node2.rowspan):
|
||||||
|
@ -90,21 +68,10 @@ class CustomConfig_del_short(Config):
|
||||||
node1_content = ['####']
|
node1_content = ['####']
|
||||||
if len(node2_content) < 3:
|
if len(node2_content) < 3:
|
||||||
node2_content = ['####']
|
node2_content = ['####']
|
||||||
return self.normalized_distance(node1_content, node2_content)
|
return Levenshtein.normalized_distance(node1_content, node2_content)
|
||||||
return 0.
|
return 0.
|
||||||
|
|
||||||
class CustomConfig_del_block(Config):
|
class CustomConfig_del_block(Config):
|
||||||
@staticmethod
|
|
||||||
def maximum(*sequences):
|
|
||||||
"""Get maximum possible value
|
|
||||||
"""
|
|
||||||
return max(map(len, sequences))
|
|
||||||
|
|
||||||
def normalized_distance(self, *sequences):
|
|
||||||
"""Get distance from 0 to 1
|
|
||||||
"""
|
|
||||||
return float(distance.levenshtein(*sequences)) / self.maximum(*sequences)
|
|
||||||
|
|
||||||
def rename(self, node1, node2):
|
def rename(self, node1, node2):
|
||||||
"""Compares attributes of trees"""
|
"""Compares attributes of trees"""
|
||||||
if (node1.tag != node2.tag) or (node1.colspan != node2.colspan) or (node1.rowspan != node2.rowspan):
|
if (node1.tag != node2.tag) or (node1.colspan != node2.colspan) or (node1.rowspan != node2.rowspan):
|
||||||
|
@ -120,7 +87,7 @@ class CustomConfig_del_block(Config):
|
||||||
while ' ' in node2_content:
|
while ' ' in node2_content:
|
||||||
print(node2_content.index(' '))
|
print(node2_content.index(' '))
|
||||||
node2_content.pop(node2_content.index(' '))
|
node2_content.pop(node2_content.index(' '))
|
||||||
return self.normalized_distance(node1_content, node2_content)
|
return Levenshtein.normalized_distance(node1_content, node2_content)
|
||||||
return 0.
|
return 0.
|
||||||
|
|
||||||
class TEDS(object):
|
class TEDS(object):
|
||||||
|
|
|
@ -20,7 +20,7 @@ from shapely.geometry import Polygon
|
||||||
import numpy as np
|
import numpy as np
|
||||||
from collections import defaultdict
|
from collections import defaultdict
|
||||||
import operator
|
import operator
|
||||||
import Levenshtein
|
from rapidfuzz.distance import Levenshtein
|
||||||
import argparse
|
import argparse
|
||||||
import json
|
import json
|
||||||
import copy
|
import copy
|
||||||
|
|
|
@ -6,7 +6,7 @@ lmdb
|
||||||
tqdm
|
tqdm
|
||||||
numpy
|
numpy
|
||||||
visualdl
|
visualdl
|
||||||
python-Levenshtein
|
rapidfuzz
|
||||||
opencv-contrib-python==4.4.0.46
|
opencv-contrib-python==4.4.0.46
|
||||||
cython
|
cython
|
||||||
lxml
|
lxml
|
||||||
|
|
Loading…
Reference in New Issue