From 1a35b04624adbe2d2d2d88fd0670550ed0a53c57 Mon Sep 17 00:00:00 2001 From: MaureenZOU Date: Wed, 4 Oct 2023 16:55:24 -0500 Subject: [PATCH] update requirements and script --- README.md | 29 +++++++++--------- assets/readmes/DATASET.md | 0 assets/readmes/EVAL.md | 0 assets/readmes/INSTALL.md | 0 assets/readmes/TRAIN.md | 0 assets/requirements/requirements.txt | 34 +++++++++++++++++++++ assets/requirements/requirements_custom.txt | 3 ++ demo/seem/app.py | 8 ++--- modeling/vision/encoder/__init__.py | 5 ++- requirements.txt | 31 ------------------- 10 files changed, 59 insertions(+), 51 deletions(-) create mode 100644 assets/readmes/DATASET.md create mode 100644 assets/readmes/EVAL.md create mode 100644 assets/readmes/INSTALL.md create mode 100644 assets/readmes/TRAIN.md create mode 100644 assets/requirements/requirements.txt create mode 100644 assets/requirements/requirements_custom.txt delete mode 100644 requirements.txt diff --git a/README.md b/README.md index 634943d..55777dd 100644 --- a/README.md +++ b/README.md @@ -6,32 +6,31 @@ We introduce **SEEM** that can **S**egment **E**verything **E**verywhere with ** **One-Line Demo with Linux:** ```sh -git clone git@github.com:UX-Decoder/Segment-Everything-Everywhere-All-At-Once.git && cd Segment-Everything-Everywhere-All-At-Once/demo_code && sh run_demo.sh +git clone git@github.com:UX-Decoder/Segment-Everything-Everywhere-All-At-Once.git && sh aasets/scripts/run_demo.sh ``` **Getting Started:** -[INSTALL.md]()
-[DATASET.md]()
-[TRAIN.md]()
-[EVAL.md]() +[INSTALL.md](assets/readmes/INSTALL.md)
+[DATASET.md](assets/readmes/DATASET.md)
+[TRAIN.md](assets/readmes/TRAIN.md)
+[EVAL.md](assets/readmes/EVAL.md) :point_right: *[New]* **Latest Checkpoints and Numbers:** | | | | COCO | | | Ref-COCOg | | | VOC | | SBD | | |-----------------|---------------------------------------------------------------------------------------------|----------|------|------|------|-----------|------|------|-------|-------|-------|-------| | Method | Checkpoint | backbone | PQ ↑ | mAP ↑ | mIoU ↑ | cIoU ↑ | mIoU ↑ | AP50 ↑ | NoC85 ↓ | NoC90 ↓| NoC85 ↓| NoC90 ↓| -| X-Decoder | [ckpt]() | Focal-T | 50.8 | 39.5 | 62.4 | 57.6 | 63.2 | 71.6 | - | - | - | - | -| X-Decoder-oq201 | [ckpt]() | Focal-L | 56.5 | 46.7 | 67.2 | 62.8 | 67.5 | 76.3 | - | - | - | - | -| SEEM_v0 | [ckpt]() | Focal-T | 50.6 | 39.4 | 60.9 | 58.5 | 63.5 | 71.6 | 3.54 | 4.59 | * | * | +| X-Decoder | [ckpt](https://huggingface.co/xdecoder/X-Decoder/resolve/main/xdecoder_focalt_last.pt) | Focal-T | 50.8 | 39.5 | 62.4 | 57.6 | 63.2 | 71.6 | - | - | - | - | +| X-Decoder-oq201 | [ckpt](https://huggingface.co/xdecoder/X-Decoder/resolve/main/xdecoder_focall_last.pt) | Focal-L | 56.5 | 46.7 | 67.2 | 62.8 | 67.5 | 76.3 | - | - | - | - | +| SEEM_v0 | [ckpt](https://huggingface.co/xdecoder/SEEM/resolve/main/seem_focalt_v0.pt) | Focal-T | 50.6 | 39.4 | 60.9 | 58.5 | 63.5 | 71.6 | 3.54 | 4.59 | * | * | | SEEM_v0 | - | Davit-d3 | 56.2 | 46.8 | 65.3 | 63.2 | 68.3 | 76.6 | 2.99 | 3.89 | 5.93 | 9.23 | -| SEEM_v0 | [ckpt]() | Focal-L | 56.2 | 46.4 | 65.5 | 62.8 | 67.7 | 76.2 | 3.04 | 3.85 | * | * | -| SEEM_v1 | [ckpt]() | Focal-T | 50.8 | 39.4 | 60.7 | 58.5 | 63.7 | 72.0 | 3.19 | 4.13 | * | * | -| SEEM_v1 | [ckpt]() | SAM-ViT-B | 52.0 | 43.5 | 60.2 | 54.1 | 62.2 | 69.3 | 2.53 | 3.23 | * | * | -| SEEM_v1 | [ckpt]() | SAM-ViT-L | 49.0 | 41.6 | 58.2 | 53.8 | 62.2 | 69.5 | 2.40 | 2.96 | * | * | - -SEEM_v0: Supporting Single Interactive object training and inference
-SEEM_v1: Supporting Multiple Interactive objects training and inference +| SEEM_v0 | [ckpt](https://huggingface.co/xdecoder/SEEM/resolve/main/seem_focall_v0.pt) | Focal-L | 56.2 | 46.4 | 65.5 | 62.8 | 67.7 | 76.2 | 3.04 | 3.85 | * | * | +| SEEM_v1 | [ckpt](https://huggingface.co/xdecoder/SEEM/resolve/main/seem_focalt_v1.pt) | Focal-T | 50.8 | 39.4 | 60.7 | 58.5 | 63.7 | 72.0 | 3.19 | 4.13 | * | * | +| SEEM_v1 | [ckpt](https://huggingface.co/xdecoder/SEEM/resolve/main/seem_samvitb_v1.pt) | SAM-ViT-B | 52.0 | 43.5 | 60.2 | 54.1 | 62.2 | 69.3 | 2.53 | 3.23 | * | * | +| SEEM_v1 | [ckpt](https://huggingface.co/xdecoder/SEEM/resolve/main/seem_samvitl_v1.pt) | SAM-ViT-L | 49.0 | 41.6 | 58.2 | 53.8 | 62.2 | 69.5 | 2.40 | 2.96 | * | * | +**SEEM_v0:** Supporting Single Interactive object training and inference
+**SEEM_v1:** Supporting Multiple Interactive objects training and inference :fire: **Related projects:** diff --git a/assets/readmes/DATASET.md b/assets/readmes/DATASET.md new file mode 100644 index 0000000..e69de29 diff --git a/assets/readmes/EVAL.md b/assets/readmes/EVAL.md new file mode 100644 index 0000000..e69de29 diff --git a/assets/readmes/INSTALL.md b/assets/readmes/INSTALL.md new file mode 100644 index 0000000..e69de29 diff --git a/assets/readmes/TRAIN.md b/assets/readmes/TRAIN.md new file mode 100644 index 0000000..e69de29 diff --git a/assets/requirements/requirements.txt b/assets/requirements/requirements.txt new file mode 100644 index 0000000..1ec8915 --- /dev/null +++ b/assets/requirements/requirements.txt @@ -0,0 +1,34 @@ +torch==2.1.0 +torchvision==0.16.0 +pillow==9.4.0 +opencv-python==4.8.1.78 +pyyaml==6.0.1 +json_tricks==3.17.3 +yacs==0.1.8 +scikit-learn==1.3.1 +pandas==2.0.3 +timm==0.4.12 +numpy==1.23.1 +einops==0.7.0 +fvcore==0.1.5.post20221221 +transformers==4.34.0 +sentencepiece==0.1.99 +ftfy==6.1.1 +regex==2023.10.3 +nltk==3.8.1 +mpi4py==3.1.5 +vision-datasets==0.2.2 +cython==3.0.2 +pycocotools==2.0.7 +diffdist==0.1 +pyarrow==13.0.0 +cityscapesscripts==2.2.2 +shapely==1.8.0 +scikit-image==0.21.0 +mup==1.0.0 +accelerate==0.23.0 +kornia==0.7.0 +deepspeed==0.10.3 +wandb==0.15.12 +infinibatch==0.1.1 +gradio==3.42.0 \ No newline at end of file diff --git a/assets/requirements/requirements_custom.txt b/assets/requirements/requirements_custom.txt new file mode 100644 index 0000000..d254bf2 --- /dev/null +++ b/assets/requirements/requirements_custom.txt @@ -0,0 +1,3 @@ +git+https://github.com/arogozhnikov/einops.git +git+https://github.com/MaureenZOU/detectron2-xyz.git +git+https://github.com/openai/whisper.git \ No newline at end of file diff --git a/demo/seem/app.py b/demo/seem/app.py index fb86b91..66d6621 100644 --- a/demo/seem/app.py +++ b/demo/seem/app.py @@ -42,14 +42,14 @@ opt = init_distributed(opt) # META DATA cur_model = 'None' if 'focalt' in cfg.conf_files: - pretrained_pth = os.path.join("seem_focalt_v2.pt") + pretrained_pth = os.path.join("seem_focalt_v0.pt") if not os.path.exists(pretrained_pth): - os.system("wget {}".format("https://huggingface.co/xdecoder/SEEM/resolve/main/seem_focalt_v2.pt")) + os.system("wget {}".format("https://huggingface.co/xdecoder/SEEM/resolve/main/seem_focalt_v0.pt")) cur_model = 'Focal-T' elif 'focal' in cfg.conf_files: - pretrained_pth = os.path.join("seem_focall_v1.pt") + pretrained_pth = os.path.join("seem_focall_v0.pt") if not os.path.exists(pretrained_pth): - os.system("wget {}".format("https://huggingface.co/xdecoder/SEEM/resolve/main/seem_focall_v1.pt")) + os.system("wget {}".format("https://huggingface.co/xdecoder/SEEM/resolve/main/seem_focall_v0.pt")) cur_model = 'Focal-L' ''' diff --git a/modeling/vision/encoder/__init__.py b/modeling/vision/encoder/__init__.py index 624fc1d..89af463 100755 --- a/modeling/vision/encoder/__init__.py +++ b/modeling/vision/encoder/__init__.py @@ -1,5 +1,8 @@ from .transformer_encoder_fpn import * -from .transformer_encoder_deform import * +try: + from .transformer_encoder_deform import * +except: + print('Deformable Transformer Encoder is not available.') from .build import * diff --git a/requirements.txt b/requirements.txt deleted file mode 100644 index 2eac1ea..0000000 --- a/requirements.txt +++ /dev/null @@ -1,31 +0,0 @@ -pillow==9.4.0 -opencv-python -pyyaml -json_tricks -yacs -scikit-learn -pandas -timm==0.4.12 -numpy==1.23.1 -einops -fvcore -transformers -sentencepiece -ftfy -regex -nltk -mpi4py -vision-datasets==0.2.2 -cython -pycocotools -diffdist -pyarrow -cityscapesscripts -shapely==1.8.0 -scikit-image -mup -accelerate -kornia -deepspeed -wandb -infinibatch \ No newline at end of file