mirror of https://github.com/YifanXu74/MQ-Det.git
166 lines
3.8 KiB
YAML
166 lines
3.8 KiB
YAML
MODEL:
|
|
META_ARCHITECTURE: "GeneralizedVLRCNN_New"
|
|
WEIGHT: "MODEL/glip_large_model.pth"
|
|
RPN_ONLY: True
|
|
RPN_ARCHITECTURE: "VLDYHEAD"
|
|
|
|
BACKBONE:
|
|
CONV_BODY: "SWINT-FPN-RETINANET"
|
|
OUT_CHANNELS: 256
|
|
|
|
SWINT:
|
|
EMBED_DIM: 192
|
|
DEPTHS: (2, 2, 18, 2)
|
|
NUM_HEADS: (6, 12, 24, 48)
|
|
WINDOW_SIZE: 12
|
|
OUT_CHANNELS: (192, 384, 768, 1536)
|
|
DROP_PATH_RATE: 0.4
|
|
|
|
LANGUAGE_BACKBONE:
|
|
FREEZE: False
|
|
TOKENIZER_TYPE: "bert-base-uncased"
|
|
MODEL_TYPE: "bert-base-uncased" # "roberta-base", "clip"
|
|
# TOKENIZER_TYPE: "MODEL/THIRD_PARTIES/bert-base-uncased"
|
|
# MODEL_TYPE: "MODEL/THIRD_PARTIES/bert-base-uncased" # "roberta-base", "clip"
|
|
MASK_SPECIAL: False
|
|
|
|
ROI_BOX_HEAD:
|
|
POOLER_RESOLUTION: 7
|
|
POOLER_SCALES: (0.125, 0.0625, 0.03125, 0.015625, 0.0078125) # TODO: check
|
|
POOLER_SAMPLING_RATIO: 0
|
|
|
|
RPN:
|
|
USE_FPN: True
|
|
ANCHOR_SIZES: (64, 128, 256, 512, 1024)
|
|
ANCHOR_STRIDE: (8, 16, 32, 64, 128)
|
|
ASPECT_RATIOS: (1.0,)
|
|
SCALES_PER_OCTAVE: 1
|
|
|
|
DYHEAD:
|
|
CHANNELS: 256
|
|
NUM_CONVS: 8
|
|
USE_GN: True
|
|
USE_DYRELU: True
|
|
USE_DFCONV: True
|
|
USE_DYFUSE: True
|
|
TOPK: 9 # topk for selecting candidate positive samples from each level
|
|
SCORE_AGG: "MEAN"
|
|
LOG_SCALE: 0.0
|
|
|
|
# USE_CHECKPOINT: True
|
|
USE_CHECKPOINT: False
|
|
FUSE_CONFIG:
|
|
USE_FUSED_FEATURES_DOT_PRODUCT: True
|
|
EARLY_FUSE_ON: True
|
|
TYPE: "MHA-B"
|
|
USE_CLASSIFICATION_LOSS: False
|
|
USE_TOKEN_LOSS: False
|
|
USE_CONTRASTIVE_ALIGN_LOSS: False
|
|
CONTRASTIVE_HIDDEN_DIM: 64
|
|
USE_DOT_PRODUCT_TOKEN_LOSS: True
|
|
USE_LAYER_SCALE: True
|
|
CLAMP_MIN_FOR_UNDERFLOW: True
|
|
CLAMP_MAX_FOR_OVERFLOW: True
|
|
CLAMP_BERTATTN_MIN_FOR_UNDERFLOW: True
|
|
CLAMP_BERTATTN_MAX_FOR_OVERFLOW: True
|
|
CLAMP_DOT_PRODUCT: True
|
|
|
|
TEST:
|
|
EVAL_TASK: 'detection'
|
|
DURING_TRAINING: False
|
|
IMS_PER_BATCH: 8
|
|
|
|
DATASETS:
|
|
|
|
TRAIN: ("object365_grounding_train", )
|
|
TEST: ("coco_2017_val", )
|
|
|
|
ONE_HOT: False
|
|
FLICKR_COPY: 8 # 0.15 * 8 = ~1.2M
|
|
MIXED_COPY: 4 # 0.6 * 4 = ~2.4M
|
|
OBJECT365_COPY: 2 # 1.4 * 2 = ~2.8M
|
|
VG_COPY: 3 # 0.4 * 3 = ~1.2M
|
|
IN_COPY: 2 # 0.67 * 2 = ~1.33M
|
|
OI_COPY: 1 # 2M * 1 = 2M
|
|
|
|
DISABLE_SHUFFLE: False
|
|
ADD_DET_PROMPT: False
|
|
RANDOM_SAMPLE_NEG: 85
|
|
CONTROL_PROB: (0.0, 0.0, 0.5, 0.0)
|
|
FURTHER_SCREEN: True
|
|
CAPTION_CONF: 0.5
|
|
CAPTION_NMS: -1.0
|
|
CAPTION_MIN_BOX: 1
|
|
|
|
SEPARATION_TOKENS: ". "
|
|
|
|
PACK_RANDOM_CAPTION_NUMBER: 20
|
|
NO_RANDOM_PACK_PROBABILITY: 0.4
|
|
RANDOM_PACK_PROB: 0.5
|
|
CAPTION_FORMAT_VERSION: "v2"
|
|
|
|
EXCLUDE_CROWD: True
|
|
SPECIAL_SAFEGUARD_FOR_COCO_GROUNDING: True
|
|
|
|
INPUT:
|
|
PIXEL_MEAN: [ 103.530, 116.280, 123.675 ]
|
|
PIXEL_STD: [ 57.375, 57.120, 58.395 ]
|
|
MIN_SIZE_TRAIN: 800
|
|
MAX_SIZE_TRAIN: 1333
|
|
MIN_SIZE_TEST: 800
|
|
MAX_SIZE_TEST: 1333
|
|
|
|
AUGMENT:
|
|
MULT_MIN_SIZE_TRAIN: (480,560,640,720,800)
|
|
|
|
DATALOADER:
|
|
SIZE_DIVISIBILITY: 32
|
|
NUM_WORKERS: 0
|
|
|
|
SOLVER:
|
|
OPTIMIZER: ADAMW
|
|
BASE_LR: 0.0001
|
|
#### should be modified during fine-tuning #######
|
|
GATE_LR: 0.0025
|
|
QUERY_LR: 0.00001
|
|
#################################################
|
|
LANG_LR: 0.00001
|
|
WEIGHT_DECAY: 0.01
|
|
WEIGHT_DECAY_SCHEDULE: True
|
|
# STEPS: (0.67, 0.89)
|
|
STEPS: (0.95,)
|
|
# MAX_ITER: 1000000
|
|
MAX_EPOCH: 1
|
|
# IMS_PER_BATCH: 64
|
|
IMS_PER_BATCH: 8
|
|
WARMUP_ITERS: 2000
|
|
WARMUP_FACTOR: 0.001
|
|
|
|
FIND_UNUSED_PARAMETERS: False
|
|
|
|
USE_AMP: True
|
|
CHECKPOINT_PERIOD: 99999999
|
|
CHECKPOINT_PER_EPOCH: 2.0
|
|
TUNING_HIGHLEVEL_OVERRIDE: "vision_query"
|
|
MAX_TO_KEEP: 4
|
|
|
|
CLIP_GRADIENTS:
|
|
ENABLED: True
|
|
CLIP_TYPE: "full_model"
|
|
CLIP_VALUE: 1.0
|
|
NORM_TYPE: 2.0
|
|
|
|
VISION_QUERY:
|
|
ENABLED: True
|
|
QUERY_BANK_PATH: 'MODEL/object365_query_5000_pool7_sel_large.pth'
|
|
PURE_TEXT_RATE: 0.
|
|
TEXT_DROPOUT: 0.4
|
|
VISION_SCALE: 1.0
|
|
NUM_QUERY_PER_CLASS: 5
|
|
RANDOM_KSHOT: False
|
|
ADD_ADAPT_LAYER: False
|
|
CONDITION_GATE: True
|
|
NONLINEAR_GATE: True
|
|
NO_CAT: True
|
|
QUERY_ADDITION_NAME: '_L'
|