mirror of https://github.com/open-mmlab/mmocr.git
[Fix] Being more conservative on Dataset Preparer (#1520)
* [Fix] Being more conservative on Dataset Preparer * updatepull/1440/head
parent
b65b65e8f8
commit
8737675445
|
@ -1,15 +1,17 @@
|
|||
ic15_det_data_root = 'data/icdar2015'
|
||||
ic15_det_data_root = 'data/det/icdar2015'
|
||||
|
||||
ic15_det_train = dict(
|
||||
type='OCRDataset',
|
||||
data_root=ic15_det_data_root,
|
||||
ann_file='textdet_train.json',
|
||||
ann_file='instances_training.json',
|
||||
data_prefix=dict(img_path='imgs/'),
|
||||
filter_cfg=dict(filter_empty_gt=True, min_size=32),
|
||||
pipeline=None)
|
||||
|
||||
ic15_det_test = dict(
|
||||
type='OCRDataset',
|
||||
data_root=ic15_det_data_root,
|
||||
ann_file='textdet_test.json',
|
||||
ann_file='instances_test.json',
|
||||
data_prefix=dict(img_path='imgs/'),
|
||||
test_mode=True,
|
||||
pipeline=None)
|
||||
|
|
|
@ -1,15 +1,15 @@
|
|||
ic15_rec_data_root = 'data/icdar2015/'
|
||||
ic15_rec_data_root = 'data/rec/icdar_2015/'
|
||||
|
||||
ic15_rec_train = dict(
|
||||
type='OCRDataset',
|
||||
data_root=ic15_rec_data_root,
|
||||
ann_file='textrecog_train.json',
|
||||
ann_file='train_labels.json',
|
||||
test_mode=False,
|
||||
pipeline=None)
|
||||
|
||||
ic15_rec_test = dict(
|
||||
type='OCRDataset',
|
||||
data_root=ic15_rec_data_root,
|
||||
ann_file='textrecog_test.json',
|
||||
ann_file='test_labels.json',
|
||||
test_mode=True,
|
||||
pipeline=None)
|
||||
|
|
|
@ -1,4 +1,9 @@
|
|||
# Dataset Preparer
|
||||
# Dataset Preparer (Beta)
|
||||
|
||||
```{note}
|
||||
Dataset Preparer is still in beta version and might not be stable enough. You
|
||||
are welcome to try it out and report any issues to us.
|
||||
```
|
||||
|
||||
## One-click data preparation script
|
||||
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
# Text Detection\[Deprecated\]
|
||||
# Text Detection
|
||||
|
||||
```{warning}
|
||||
```{note}
|
||||
This page is deprecated and all these scripts will be eventually migrated into dataset preparer, a brand new module designed to ease these lengthy dataset preparation steps. [Check it out](./dataset_preparer.md)!
|
||||
```
|
||||
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
# Key Information Extraction\[Deprecated\]
|
||||
# Key Information Extraction
|
||||
|
||||
```{warning}
|
||||
```{note}
|
||||
This page is deprecated and all these scripts will be eventually migrated into dataset preparer, a brand new module designed to ease these lengthy dataset preparation steps. [Check it out](./dataset_preparer.md)!
|
||||
```
|
||||
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
# Text Recognition\[Deprecated\]
|
||||
# Text Recognition
|
||||
|
||||
```{warning}
|
||||
```{note}
|
||||
This page is deprecated and all these scripts will be eventually migrated into dataset preparer, a brand new module designed to ease these lengthy dataset preparation steps. [Check it out](./dataset_preparer.md)!
|
||||
```
|
||||
|
||||
|
|
|
@ -1,4 +1,8 @@
|
|||
# 数据准备
|
||||
# 数据准备 (Beta)
|
||||
|
||||
```{note}
|
||||
Dataset Preparer 目前仍处在公测阶段,欢迎尝鲜试用!如遇到任何问题,请及时向我们反馈。
|
||||
```
|
||||
|
||||
## 一键式数据准备脚本
|
||||
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
# 文字检测\[过时\]
|
||||
# 文字检测
|
||||
|
||||
```{warning}
|
||||
```{note}
|
||||
该页面内容已经过时,所有有关数据格式转换相关的脚本都将最终迁移至数据准备器 **dataset preparer**,这个全新设计的模块能够极大地方便用户完成冗长的数据准备步骤,详见[相关文档](./dataset_preparer.md)。
|
||||
```
|
||||
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
# 关键信息提取\[过时\]
|
||||
# 关键信息提取
|
||||
|
||||
```{warning}
|
||||
```{note}
|
||||
该页面内容已经过时,所有有关数据格式转换相关的脚本都将最终迁移至数据准备器 **dataset preparer**,这个全新设计的模块能够极大地方便用户完成冗长的数据准备步骤,详见[相关文档](./dataset_preparer.md)。
|
||||
```
|
||||
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
# 文字识别\[过时\]
|
||||
# 文字识别
|
||||
|
||||
```{warning}
|
||||
```{note}
|
||||
该页面内容已经过时,所有有关数据格式转换相关的脚本都将最终迁移至数据准备器 **dataset preparer**,这个全新设计的模块能够极大地方便用户完成冗长的数据准备步骤,详见[相关文档](./dataset_preparer.md)。
|
||||
```
|
||||
|
||||
|
|
|
@ -119,14 +119,21 @@ class BaseDataConverter:
|
|||
return
|
||||
cfg_path = osp.join(self.config_path, self.task, '_base_', 'datasets',
|
||||
f'{self.dataset_name}.py')
|
||||
if not osp.exists(cfg_path):
|
||||
with open(cfg_path, 'w') as f:
|
||||
f.write(
|
||||
f'{self.dataset_name}_{self.task}_data_root = \'{self.data_root}\'\n' # noqa: E501
|
||||
)
|
||||
for split in self.splits:
|
||||
with open(cfg_path, 'a') as f:
|
||||
f.write(dataset_config[split])
|
||||
if osp.exists(cfg_path):
|
||||
while True:
|
||||
c = input(f'{cfg_path} already exists, overwrite? (Y/n) ') \
|
||||
or 'Y'
|
||||
if c.lower() == 'y':
|
||||
break
|
||||
if c.lower() == 'n':
|
||||
return
|
||||
with open(cfg_path, 'w') as f:
|
||||
f.write(
|
||||
f'{self.dataset_name}_{self.task}_data_root = \'{self.data_root}\'\n' # noqa: E501
|
||||
)
|
||||
for split in self.splits:
|
||||
with open(cfg_path, 'a') as f:
|
||||
f.write(dataset_config[split])
|
||||
|
||||
@abstractmethod
|
||||
def pack_instance(self, sample: Tuple, split: str) -> Dict:
|
||||
|
|
Loading…
Reference in New Issue