MQ-Det/knowledge/run_inference_odinw_knowled...

259 lines
14 KiB
YAML

description: Multi-node Pretraining Experiments For vldyhead
# target:
# vc: hai1
# service: amlk8s
# name: itphyperdgx2cl1
# target:
# service: amlk8s
# vc: msrhyper
# cluster: itphyperdellcl1
# subscription_id: 46da6261-2167-4e71-8b0d-f4a45215ce61
# workspace_name: msrhyper
# resource_group: msrhyperVC
target:
vc: hcrr07
service: amlk8s
name: itphyperdgx2cl2
# target:
# vc: msrhyper # msrhyper
# service: amlk8s
# name: itphyperdellcl1
# target:
# vc: msrhyper
# service: amlk8s
# name: itpa100cl
# target:
# service: amlk8s
# vc: Image_Languge_Pretraining
# cluster: scus-v100-2
# subscription_id: 8d7c0a6f-7c2e-4e4b-becf-f9ee95433147
# workspace_name: Image_Languge_Pretraining-aml
# resource_group: Image_Languge_Pretraining-aml
# target:
# vc: hai6
# service: amlk8s
# name: itphyperdgx2cl1
# target:
# vc: hai2
# service: amlk8s
# name: itphyperdgx2cl1
environment:
# image: amsword/setup:py36pt17u18cu11
image: pengchuanzhang/pytorch:ubuntu20.04_torch1.9-cuda11.3-nccl2.9.9
# image: pengchuanzhang/maskrcnn:ubuntu18-py3.7-cuda10.1-pytorch1.7
registry: docker.io
storage:
data_storage:
storage_account_name: penzhanwu2v2
container_name: data
model_storage:
storage_account_name: penzhanwu2v2
container_name: data
project_storage:
storage_account_name: penzhanwu2v2
container_name: amulet
detection_storage:
storage_account_name: penzhanwu2
container_name: phillytools
penzhanwu2v2pt:
storage_account_name: penzhanwu2v2
container_name: phillytools
penzhanwu2v2data:
storage_account_name: penzhanwu2v2
container_name: data
penzhanwu2data:
storage_account_name: penzhanwu2
container_name: data
# penzhanscusdata:
# storage_account_name: penzhanscus
# container_name: data
bingdata:
storage_account_name: penzhanwu2v2
container_name: data
alttextdata:
storage_account_name: vlpdatasets
container_name: data
vlpdatasets_model:
storage_account_name: vlpdatasets
container_name: model
odinw_data:
storage_account_name: vlpdatasets
container_name: odinw
penzhanwu2amlt:
storage_account_name: penzhanwu2
container_name: amulet
output_storage:
storage_account_name: chunyleu
container_name: output
code:
# local directory of the code. this will be uploaded to the server.
# $CONFIG_DIR is expanded to the directory of this config file
local_dir: $CONFIG_DIR/../
#############################################
# run experiments for pre-training
#############################################
search:
job_template:
name: vldyhead_{ckpt}_{knowledge_file}_{model_dir}_{task_name}_{knowledge_type}
sku: G1
sku_count: 1
aml_mpirun:
communicator: OpenMpi
process_count_per_node: 1
command:
- ulimit -n 4096
- pip install einops shapely timm yacs tensorboardX ftfy prettytable
- pip install transformers
- pip install qd
- python setup.py build develop --user
# - ln -s /mnt/data_storage/htzhang DATASET
- ln -s /mnt/odinw_data DATASET
- ln -s /mnt/model_storage/htzhang/model/swin-transformer MODEL
- ln -s /mnt/output_storage/exp_glip/model/knowledge/class_128_batchsize_32 OUTPUT
- ln -s /mnt/bingdata/BingAltTextData BingAltTextData
#- cp /mnt/penzhanwu2amlt/projects/swint_dyhead_ablation/amlt-results/7365119660.39362-37fbb6df-e4a6-432f-b9ea-feca56c444cd/model_0695000.pth ./ MODEL.WEIGHT model_0695000.pth
- export TOKENIZERS_PARALLELISM=True
- python tools/test_grounding_net.py --config-file {model_dir}/config.yml --weight {model_dir}/model_{ckpt}.pth
--task_config configs/odinw_workshop_fewshot/{task_name}.yaml
TEST.EVAL_TASK detection TEST.IMS_PER_BATCH 1
DATALOADER.DISTRIBUTE_CHUNK_AMONG_NODE False MODEL.DYHEAD.USE_CHECKPOINT False DATASETS.PREDOWNLOAD_BING False DATASETS.PREDOWNLOAD_WITH_AZCOPY False DATASETS.USE_OVERRIDE_CATEGORY True DATASETS.USE_CAPTION_PROMPT True
GLIPKNOW.KNOWLEDGE_FILE knowledge/{knowledge_file}.yaml GLIPKNOW.KNOWLEDGE_TYPE {knowledge_type}
OUTPUT_DIR {model_dir}/eval_benchmark35/{task_name}/{knowledge_type} # GLIPKNOW.GPT3_NUM {gpt3_num}
submit_args:
max_attempts: 5
container_args:
shm_size: 64G
max_trials: 1000
type: grid
params:
- name: ckpt
spec: discrete
values: ['0350000'] # ['0050000','0100000','0150000','0200000','0250000','0300000','0350000'] # ['0367500'] # [vitb16_CLIP] [swin_t] # [swin_t_CLIP]
- name: knowledge_file
spec: discrete
values: ['odinw_benchmark35_knowledge_and_gpt3'] # ['odinw_knowledge_with_prompt','odinw_knowledge']
- name: knowledge_type
spec: discrete
values: ['def_wiki','path_wn3','def_wn'] # ['gpt3'] # ['odinw_knowledge_with_prompt','odinw_knowledge']
- name: model_dir
spec: discrete
values: [/mnt/penzhanwu2data/xiaowh/glip_for_od/glip-a_def_wiki_maxcls365_laggfirst,/mnt/penzhanwu2data/xiaowh/glip_for_od/glip-a_maxcls365_laggfirst] # [/mnt/penzhanwu2data/xiaowh/glip_for_od/glip-a_def_wiki_maxcls365_laggfirst] # [5,20,50] # [5,20,50,-1] # [5,20,50], [-1], [5,20,50,-1] # https://penzhanwu2.blob.core.windows.net/data/xiaowh/glip_for_od/glip-a_def_wiki_maxcls365_laggfirst/config.yml
- name: task_name
spec: discrete
values: ['boggleBoards_416x416AutoOrient_export_','ChessPieces_Chess_Pieces.v23-raw.coco']
# ['AerialMaritimeDrone_large','AerialMaritimeDrone_tiled','AmericanSignLanguageLetters_American_Sign_Language_Letters.v1-v1.coco','Aquarium_Aquarium_Combined.v2-raw-1024.coco','BCCD_BCCD.v3-raw.coco','ChessPieces_Chess_Pieces.v23-raw.coco','CottontailRabbits','DroneControl_Drone_Control.v3-raw.coco','EgoHands_generic','EgoHands_specific','HardHatWorkers_raw','MaskWearing_raw','MountainDewCommercial','NorthAmericaMushrooms_North_American_Mushrooms.v1-416x416.coco','OxfordPets_by-breed','OxfordPets_by-species','PKLot_640','Packages_Raw','PascalVOC','Raccoon_Raccoon.v2-raw.coco','ShellfishOpenImages_raw','ThermalCheetah','UnoCards_raw','VehiclesOpenImages_416x416','WildfireSmoke','boggleBoards_416x416AutoOrient_export_','brackishUnderwater_960x540','dice_mediumColor_export','openPoetryVision_512x512','pistols_export','plantdoc_416x416','pothole','selfdrivingCar_fixedLarge_export_','thermalDogsAndPeople','websiteScreenshots']
# ['configs/odinw_workshop_fewshot/AerialMaritimeDrone_large.yaml','configs/odinw_workshop_fewshot/AerialMaritimeDrone_tiled.yaml','configs/odinw_workshop_fewshot/AmericanSignLanguageLetters_American_Sign_Language_Letters.v1-v1.coco.yaml','configs/odinw_workshop_fewshot/Aquarium_Aquarium_Combined.v2-raw-1024.coco.yaml','configs/odinw_workshop_fewshot/BCCD_BCCD.v3-raw.coco.yaml','configs/odinw_workshop_fewshot/ChessPieces_Chess_Pieces.v23-raw.coco.yaml','configs/odinw_workshop_fewshot/CottontailRabbits.yaml','configs/odinw_workshop_fewshot/DroneControl_Drone_Control.v3-raw.coco.yaml','configs/odinw_workshop_fewshot/EgoHands_generic.yaml','configs/odinw_workshop_fewshot/EgoHands_specific.yaml','configs/odinw_workshop_fewshot/HardHatWorkers_raw.yaml','configs/odinw_workshop_fewshot/MaskWearing_raw.yaml','configs/odinw_workshop_fewshot/MountainDewCommercial.yaml','configs/odinw_workshop_fewshot/NorthAmericaMushrooms_North_American_Mushrooms.v1-416x416.coco.yaml','configs/odinw_workshop_fewshot/OxfordPets_by-breed.yaml','configs/odinw_workshop_fewshot/OxfordPets_by-species.yaml','configs/odinw_workshop_fewshot/PKLot_640.yaml','configs/odinw_workshop_fewshot/Packages_Raw.yaml','configs/odinw_workshop_fewshot/PascalVOC.yaml','configs/odinw_workshop_fewshot/Raccoon_Raccoon.v2-raw.coco.yaml','configs/odinw_workshop_fewshot/ShellfishOpenImages_raw.yaml','configs/odinw_workshop_fewshot/ThermalCheetah.yaml','configs/odinw_workshop_fewshot/UnoCards_raw.yaml','configs/odinw_workshop_fewshot/VehiclesOpenImages_416x416.yaml','configs/odinw_workshop_fewshot/WildfireSmoke.yaml','configs/odinw_workshop_fewshot/boggleBoards_416x416AutoOrient_export_.yaml','configs/odinw_workshop_fewshot/brackishUnderwater_960x540.yaml','configs/odinw_workshop_fewshot/dice_mediumColor_export.yaml','configs/odinw_workshop_fewshot/openPoetryVision_512x512.yaml','configs/odinw_workshop_fewshot/pistols_export.yaml','configs/odinw_workshop_fewshot/plantdoc_416x416.yaml','configs/odinw_workshop_fewshot/pothole.yaml','configs/odinw_workshop_fewshot/selfdrivingCar_fixedLarge_export_.yaml','configs/odinw_workshop_fewshot/thermalDogsAndPeople.yaml','configs/odinw_workshop_fewshot/websiteScreenshots.yaml']
# --task_config configs/odinw_caption/Pascal2012.yaml,configs/odinw_caption/AerialMaritimeDrone_large_test.yaml,configs/odinw_caption/AerialMaritimeDrone_large.yaml,configs/odinw_caption/AerialMaritimeDrone_tiled_test.yaml,configs/odinw_caption/AerialMaritimeDrone_tiled.yaml,configs/odinw_caption/Aquarium_Aquarium_Combined.v2-raw-1024.coco_test.yaml,configs/odinw_caption/Aquarium_Aquarium_Combined.v2-raw-1024.coco.yaml,configs/odinw_caption/CottontailRabbits_test.yaml,configs/odinw_caption/CottontailRabbits.yaml,configs/odinw_caption/EgoHands_generic_test.yaml,configs/odinw_caption/EgoHands_generic.yaml,configs/odinw_caption/NorthAmericaMushrooms_North_American_Mushrooms.v1-416x416.coco_test.yaml,configs/odinw_caption/NorthAmericaMushrooms_North_American_Mushrooms.v1-416x416.coco.yaml,configs/odinw_caption/Packages_Raw_test.yaml,configs/odinw_caption/Packages_Raw.yaml,configs/odinw_caption/Raccoon_Raccoon.v2-raw.coco_test.yaml,configs/odinw_caption/Raccoon_Raccoon.v2-raw.coco.yaml,configs/odinw_caption/ShellfishOpenImages_raw_test.yaml,configs/odinw_caption/ShellfishOpenImages_raw.yaml,configs/odinw_caption/VehiclesOpenImages_416x416_test.yaml,configs/odinw_caption/VehiclesOpenImages_416x416.yaml,configs/odinw_caption/pistols_export_test.yaml,configs/odinw_caption/pistols_export.yaml,configs/odinw_caption/pothole_test.yaml,configs/odinw_caption/pothole.yaml,configs/odinw_caption/thermalDogsAndPeople_test.yaml,configs/odinw_caption/thermalDogsAndPeople.yaml
# [odinw_knowledge.yaml OR odinw_knowledge_with_prompt.yaml]
# jobs:
# - name: _bidirectional #vldyhead_swint_s_Obj365_GoldGround_FusionSingle_LangOn_E30_Bsz64_HCRR
# sku: 2xG16
# aml_mpirun:
# communicator: OpenMpi
# process_count_per_node: 1
# command:
# - ulimit -n 4096
# - pip install einops shapely timm yacs tensorboardX ftfy prettytable
# - pip install transformers
# - python setup.py build develop --user
# - ln -s /mnt/data_storage/htzhang DATASET
# - ln -s /mnt/model_storage/htzhang/model/swin-transformer MODEL
# - ln -s /mnt/model_storage/htzhang/model/dyhead OUTPUT
# - ln -s /mnt/bingdata/BingAltTextData BingAltTextData
# - python3 -m onedet.utils.launch --nnodes 2 --nproc_per_node=16 tools/train_net.py
# --config-file configs/harold/dyhead/grounding.yaml
# --skip-test
# --use-tensorboard
# --save_original_config
# AUGMENT.MULT_MIN_SIZE_TRAIN 480,560,640,720,800
# INPUT.MAX_SIZE_TRAIN 1333
# MODEL.BACKBONE.FREEZE_CONV_BODY_AT -1
# SOLVER.IMS_PER_BATCH 64
# SOLVER.USE_AMP True
# TEST.DURING_TRAINING False
# TEST.IMS_PER_BATCH 32
# SOLVER.MODEL_EMA 0.999
# MODEL.LANGUAGE_BACKBONE.FREEZE False
# MODEL.DYHEAD.FUSE_CONFIG.EARLY_FUSE_ON True
# MODEL.DYHEAD.FUSE_CONFIG.TYPE "MHA-B"
# SOLVER.CLIP_GRADIENTS.ENABLED True
# MODEL.DYHEAD.FUSE_CONFIG.CLAMP_MIN_FOR_UNDERFLOW True
# MODEL.DYHEAD.FUSE_CONFIG.CLAMP_MAX_FOR_OVERFLOW True
# MODEL.DYHEAD.FUSE_CONFIG.CLAMP_BERTATTN_MIN_FOR_UNDERFLOW True
# MODEL.DYHEAD.FUSE_CONFIG.CLAMP_BERTATTN_MAX_FOR_OVERFLOW True
# MODEL.DYHEAD.FUSE_CONFIG.SEPARATE_BIDIRECTIONAL True
# SOLVER.FIND_UNUSED_PARAMETERS True
# MODEL.DYHEAD.USE_CHECKPOINT True
# # MODEL.LANGUAGE_BACKBONE.N_LAYERS 4
# submit_args:
# max_attempts: 5
# container_args:
# shm_size: 64G
# jobs:
# - name: _ # debug
# sku: G16
# aml_mpirun:
# communicator: OpenMpi
# process_count_per_node: 1
# command:
# - ulimit -n 4096
# - pip install einops shapely timm yacs tensorboardX ftfy
# - pip install transformers
# - python setup.py build develop --user
# - ln -s /mnt/data_storage/htzhang DATASET
# - ln -s /mnt/model_storage/htzhang/model/swin-transformer MODEL
# - ln -s /mnt/model_storage/htzhang/model/dyhead OUTPUT
# - CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7 python3 -m onedet.utils.launch --nnodes 1 --nproc_per_node=8 tools/train_net.py
# --config-file configs/harold/dyhead/grounding.yaml
# --skip-test
# --use-tensorboard
# --save_original_config
# AUGMENT.MULT_MIN_SIZE_TRAIN 480,560,640,720,800
# INPUT.MAX_SIZE_TRAIN 1333
# MODEL.BACKBONE.FREEZE_CONV_BODY_AT -1
# SOLVER.IMS_PER_BATCH 8
# SOLVER.USE_AMP True
# TEST.DURING_TRAINING False
# TEST.IMS_PER_BATCH 32
# SOLVER.MODEL_EMA 0.999
# SOLVER.MAX_EPOCH 30
# MODEL.LANGUAGE_BACKBONE.FREEZE False
# MODEL.DYHEAD.FUSE_CONFIG.EARLY_FUSE_ON True
# MODEL.DYHEAD.FUSE_CONFIG.USE_DOT_PRODUCT_TOKEN_LOSS True
# MODEL.DYHEAD.FUSE_CONFIG.USE_LAYER_SCALE True
# SOLVER.CLIP_GRADIENTS.ENABLED True
# MODEL.DYHEAD.FUSE_CONFIG.TYPE "MHA-B"
# MODEL.DYHEAD.FUSE_CONFIG.SEPARATE_BIDIRECTIONAL True
# # MODEL.DYHEAD.USE_CHECKPOINT True
# # MODEL.DYHEAD.FUSE_CONFIG.STABLE_SOFTMAX_2D True
# # MODEL.DYHEAD.USE_CHECKPOINT True
# # MODEL.DYHEAD.FUSE_CONFIG.SEPARATE_BIDIRECTIONAL True
# submit_args:
# max_attempts: 5
# container_args:
# shm_size: 64G