mirror of https://github.com/open-mmlab/mmocr.git
71 lines
1.7 KiB
Python
71 lines
1.7 KiB
Python
_base_ = [
|
|
'../../_base_/schedules/schedule_adadelta_18e.py',
|
|
'../../_base_/default_runtime.py'
|
|
]
|
|
|
|
categories = [
|
|
'address', 'book', 'company', 'game', 'government', 'movie', 'name',
|
|
'organization', 'position', 'scene'
|
|
]
|
|
|
|
test_ann_file = 'data/cluener2020/dev.json'
|
|
train_ann_file = 'data/cluener2020/train.json'
|
|
vocab_file = 'data/cluener2020/vocab.txt'
|
|
|
|
max_len = 128
|
|
loader = dict(
|
|
type='HardDiskLoader',
|
|
repeat=1,
|
|
parser=dict(type='LineJsonParser', keys=['text', 'label']))
|
|
|
|
ner_convertor = dict(
|
|
type='NerConvertor',
|
|
annotation_type='bio',
|
|
vocab_file=vocab_file,
|
|
categories=categories,
|
|
max_len=max_len)
|
|
|
|
test_pipeline = [
|
|
dict(type='NerTransform', label_convertor=ner_convertor, max_len=max_len),
|
|
dict(type='ToTensorNER')
|
|
]
|
|
|
|
train_pipeline = [
|
|
dict(type='NerTransform', label_convertor=ner_convertor, max_len=max_len),
|
|
dict(type='ToTensorNER')
|
|
]
|
|
dataset_type = 'NerDataset'
|
|
|
|
train = dict(
|
|
type=dataset_type,
|
|
ann_file=train_ann_file,
|
|
loader=loader,
|
|
pipeline=train_pipeline,
|
|
test_mode=False)
|
|
|
|
test = dict(
|
|
type=dataset_type,
|
|
ann_file=test_ann_file,
|
|
loader=loader,
|
|
pipeline=test_pipeline,
|
|
test_mode=True)
|
|
data = dict(
|
|
samples_per_gpu=8, workers_per_gpu=2, train=train, val=test, test=test)
|
|
|
|
evaluation = dict(interval=1, metric='f1-score')
|
|
|
|
model = dict(
|
|
type='NerClassifier',
|
|
encoder=dict(
|
|
type='BertEncoder',
|
|
max_position_embeddings=512,
|
|
init_cfg=dict(
|
|
type='Pretrained',
|
|
checkpoint='https://download.openmmlab.com/mmocr/ner/'
|
|
'bert_softmax/bert_pretrain.pth')),
|
|
decoder=dict(type='FCDecoder'),
|
|
loss=dict(type='MaskedCrossEntropyLoss'),
|
|
label_convertor=ner_convertor)
|
|
|
|
test_cfg = None
|