MQ-Det/configs/pretrain/mq-groundingdino-t.yaml

114 lines
2.4 KiB
YAML

MODEL:
WEIGHT: "MODEL/groundingdino_swint_ogc.pth"
BACKBONE:
OUT_CHANNELS: 256
LANGUAGE_BACKBONE:
FREEZE: False
TOKENIZER_TYPE: "bert-base-uncased"
MODEL_TYPE: "bert-base-uncased" # "roberta-base", "clip"
# TOKENIZER_TYPE: "MODEL/THIRD_PARTIES/bert-base-uncased"
# MODEL_TYPE: "MODEL/THIRD_PARTIES/bert-base-uncased" # "roberta-base", "clip"
MASK_SPECIAL: False
ROI_BOX_HEAD:
POOLER_RESOLUTION: 7
POOLER_SCALES: (0.125, 0.0625, 0.03125, 0.015625)
POOLER_SAMPLING_RATIO: 0
TEST:
EVAL_TASK: 'detection'
DURING_TRAINING: False
IMS_PER_BATCH: 8
# use for grounding model
DATASETS:
TRAIN: ("object365_grounding_train", )
TEST: ("coco_2017_val", )
DISABLE_SHUFFLE: False
ADD_DET_PROMPT: False
RANDOM_SAMPLE_NEG: 85
# RANDOM_SAMPLE_NEG: 365
CONTROL_PROB: (0.0, 0.0, 0.5, 0.0)
SEPARATION_TOKENS: ". "
EXCLUDE_CROWD: True
SPECIAL_SAFEGUARD_FOR_COCO_GROUNDING: True
SEP_AT_LAST: True
ADD_NORMED_CXCY: True
INPUT:
FORMAT: 'rgb'
PIXEL_MEAN: [0.485, 0.456, 0.406]
PIXEL_STD: [0.229, 0.224, 0.225]
MIN_SIZE_TRAIN: 800
MAX_SIZE_TRAIN: 1333
MIN_SIZE_TEST: 800
MAX_SIZE_TEST: 1333
AUGMENT:
MULT_MIN_SIZE_TRAIN: (480,560,640,720,800)
DATALOADER:
SIZE_DIVISIBILITY: 32
NUM_WORKERS: 0
SOLVER:
OPTIMIZER: ADAMW
BASE_LR: 0.0001
#### should be modified during fine-tuning #######
GATE_LR: 0.005
QUERY_LR: 0.00001
#################################################
LANG_LR: 0.00001
WEIGHT_DECAY: 0.0001
# STEPS: (0.67, 0.89)
STEPS: (0.95,)
# MAX_EPOCH: 10
MAX_EPOCH: 1
IMS_PER_BATCH: 16
WARMUP_ITERS: 2000
WARMUP_FACTOR: 0.001
USE_AMP: True
MODEL_EMA: 0.999
FIND_UNUSED_PARAMETERS: False
CHECKPOINT_PERIOD: 99999999
CHECKPOINT_PER_EPOCH: 2.0
# TUNING_HIGHLEVEL_OVERRIDE: "vision_query"
TUNING_HIGHLEVEL_OVERRIDE: "vision_query"
MAX_TO_KEEP: 4
CLIP_GRADIENTS:
ENABLED: True
CLIP_TYPE: "full_model"
CLIP_VALUE: 1.0
# CLIP_VALUE: 0.1
NORM_TYPE: 2.0
VISION_QUERY:
ENABLED: True
QUERY_BANK_PATH: 'MODEL/object365_query_5000_pool7_sel_gd.pth'
PURE_TEXT_RATE: 0.
TEXT_DROPOUT: 0.4
VISION_SCALE: 1.0
NUM_QUERY_PER_CLASS: 5
RANDOM_KSHOT: False
ADD_ADAPT_LAYER: False
CONDITION_GATE: True
NONLINEAR_GATE: True
NO_CAT: True
QUERY_ADDITION_NAME: '_groundingdino-T'
GROUNDINGDINO:
enabled: True
use_checkpoint: False
use_transformer_ckpt: False
text_encoder_type: 'bert-base-uncased'
box_threshold: 0.05