update requirements and script
parent
946b8460d7
commit
1a35b04624
29
README.md
29
README.md
|
@ -6,32 +6,31 @@ We introduce **SEEM** that can **S**egment **E**verything **E**verywhere with **
|
|||
|
||||
**One-Line Demo with Linux:**
|
||||
```sh
|
||||
git clone git@github.com:UX-Decoder/Segment-Everything-Everywhere-All-At-Once.git && cd Segment-Everything-Everywhere-All-At-Once/demo_code && sh run_demo.sh
|
||||
git clone git@github.com:UX-Decoder/Segment-Everything-Everywhere-All-At-Once.git && sh aasets/scripts/run_demo.sh
|
||||
```
|
||||
|
||||
**Getting Started:**
|
||||
|
||||
[INSTALL.md]() <br>
|
||||
[DATASET.md]() <br>
|
||||
[TRAIN.md]() <br>
|
||||
[EVAL.md]()
|
||||
[INSTALL.md](assets/readmes/INSTALL.md) <br>
|
||||
[DATASET.md](assets/readmes/DATASET.md) <br>
|
||||
[TRAIN.md](assets/readmes/TRAIN.md) <br>
|
||||
[EVAL.md](assets/readmes/EVAL.md)
|
||||
|
||||
:point_right: *[New]* **Latest Checkpoints and Numbers:**
|
||||
| | | | COCO | | | Ref-COCOg | | | VOC | | SBD | |
|
||||
|-----------------|---------------------------------------------------------------------------------------------|----------|------|------|------|-----------|------|------|-------|-------|-------|-------|
|
||||
| Method | Checkpoint | backbone | PQ ↑ | mAP ↑ | mIoU ↑ | cIoU ↑ | mIoU ↑ | AP50 ↑ | NoC85 ↓ | NoC90 ↓| NoC85 ↓| NoC90 ↓|
|
||||
| X-Decoder | [ckpt]() | Focal-T | 50.8 | 39.5 | 62.4 | 57.6 | 63.2 | 71.6 | - | - | - | - |
|
||||
| X-Decoder-oq201 | [ckpt]() | Focal-L | 56.5 | 46.7 | 67.2 | 62.8 | 67.5 | 76.3 | - | - | - | - |
|
||||
| SEEM_v0 | [ckpt]() | Focal-T | 50.6 | 39.4 | 60.9 | 58.5 | 63.5 | 71.6 | 3.54 | 4.59 | * | * |
|
||||
| X-Decoder | [ckpt](https://huggingface.co/xdecoder/X-Decoder/resolve/main/xdecoder_focalt_last.pt) | Focal-T | 50.8 | 39.5 | 62.4 | 57.6 | 63.2 | 71.6 | - | - | - | - |
|
||||
| X-Decoder-oq201 | [ckpt](https://huggingface.co/xdecoder/X-Decoder/resolve/main/xdecoder_focall_last.pt) | Focal-L | 56.5 | 46.7 | 67.2 | 62.8 | 67.5 | 76.3 | - | - | - | - |
|
||||
| SEEM_v0 | [ckpt](https://huggingface.co/xdecoder/SEEM/resolve/main/seem_focalt_v0.pt) | Focal-T | 50.6 | 39.4 | 60.9 | 58.5 | 63.5 | 71.6 | 3.54 | 4.59 | * | * |
|
||||
| SEEM_v0 | - | Davit-d3 | 56.2 | 46.8 | 65.3 | 63.2 | 68.3 | 76.6 | 2.99 | 3.89 | 5.93 | 9.23 |
|
||||
| SEEM_v0 | [ckpt]() | Focal-L | 56.2 | 46.4 | 65.5 | 62.8 | 67.7 | 76.2 | 3.04 | 3.85 | * | * |
|
||||
| SEEM_v1 | [ckpt]() | Focal-T | 50.8 | 39.4 | 60.7 | 58.5 | 63.7 | 72.0 | 3.19 | 4.13 | * | * |
|
||||
| SEEM_v1 | [ckpt]() | SAM-ViT-B | 52.0 | 43.5 | 60.2 | 54.1 | 62.2 | 69.3 | 2.53 | 3.23 | * | * |
|
||||
| SEEM_v1 | [ckpt]() | SAM-ViT-L | 49.0 | 41.6 | 58.2 | 53.8 | 62.2 | 69.5 | 2.40 | 2.96 | * | * |
|
||||
|
||||
SEEM_v0: Supporting Single Interactive object training and inference <br>
|
||||
SEEM_v1: Supporting Multiple Interactive objects training and inference
|
||||
| SEEM_v0 | [ckpt](https://huggingface.co/xdecoder/SEEM/resolve/main/seem_focall_v0.pt) | Focal-L | 56.2 | 46.4 | 65.5 | 62.8 | 67.7 | 76.2 | 3.04 | 3.85 | * | * |
|
||||
| SEEM_v1 | [ckpt](https://huggingface.co/xdecoder/SEEM/resolve/main/seem_focalt_v1.pt) | Focal-T | 50.8 | 39.4 | 60.7 | 58.5 | 63.7 | 72.0 | 3.19 | 4.13 | * | * |
|
||||
| SEEM_v1 | [ckpt](https://huggingface.co/xdecoder/SEEM/resolve/main/seem_samvitb_v1.pt) | SAM-ViT-B | 52.0 | 43.5 | 60.2 | 54.1 | 62.2 | 69.3 | 2.53 | 3.23 | * | * |
|
||||
| SEEM_v1 | [ckpt](https://huggingface.co/xdecoder/SEEM/resolve/main/seem_samvitl_v1.pt) | SAM-ViT-L | 49.0 | 41.6 | 58.2 | 53.8 | 62.2 | 69.5 | 2.40 | 2.96 | * | * |
|
||||
|
||||
**SEEM_v0:** Supporting Single Interactive object training and inference <br>
|
||||
**SEEM_v1:** Supporting Multiple Interactive objects training and inference
|
||||
|
||||
:fire: **Related projects:**
|
||||
|
||||
|
|
|
@ -0,0 +1,34 @@
|
|||
torch==2.1.0
|
||||
torchvision==0.16.0
|
||||
pillow==9.4.0
|
||||
opencv-python==4.8.1.78
|
||||
pyyaml==6.0.1
|
||||
json_tricks==3.17.3
|
||||
yacs==0.1.8
|
||||
scikit-learn==1.3.1
|
||||
pandas==2.0.3
|
||||
timm==0.4.12
|
||||
numpy==1.23.1
|
||||
einops==0.7.0
|
||||
fvcore==0.1.5.post20221221
|
||||
transformers==4.34.0
|
||||
sentencepiece==0.1.99
|
||||
ftfy==6.1.1
|
||||
regex==2023.10.3
|
||||
nltk==3.8.1
|
||||
mpi4py==3.1.5
|
||||
vision-datasets==0.2.2
|
||||
cython==3.0.2
|
||||
pycocotools==2.0.7
|
||||
diffdist==0.1
|
||||
pyarrow==13.0.0
|
||||
cityscapesscripts==2.2.2
|
||||
shapely==1.8.0
|
||||
scikit-image==0.21.0
|
||||
mup==1.0.0
|
||||
accelerate==0.23.0
|
||||
kornia==0.7.0
|
||||
deepspeed==0.10.3
|
||||
wandb==0.15.12
|
||||
infinibatch==0.1.1
|
||||
gradio==3.42.0
|
|
@ -0,0 +1,3 @@
|
|||
git+https://github.com/arogozhnikov/einops.git
|
||||
git+https://github.com/MaureenZOU/detectron2-xyz.git
|
||||
git+https://github.com/openai/whisper.git
|
|
@ -42,14 +42,14 @@ opt = init_distributed(opt)
|
|||
# META DATA
|
||||
cur_model = 'None'
|
||||
if 'focalt' in cfg.conf_files:
|
||||
pretrained_pth = os.path.join("seem_focalt_v2.pt")
|
||||
pretrained_pth = os.path.join("seem_focalt_v0.pt")
|
||||
if not os.path.exists(pretrained_pth):
|
||||
os.system("wget {}".format("https://huggingface.co/xdecoder/SEEM/resolve/main/seem_focalt_v2.pt"))
|
||||
os.system("wget {}".format("https://huggingface.co/xdecoder/SEEM/resolve/main/seem_focalt_v0.pt"))
|
||||
cur_model = 'Focal-T'
|
||||
elif 'focal' in cfg.conf_files:
|
||||
pretrained_pth = os.path.join("seem_focall_v1.pt")
|
||||
pretrained_pth = os.path.join("seem_focall_v0.pt")
|
||||
if not os.path.exists(pretrained_pth):
|
||||
os.system("wget {}".format("https://huggingface.co/xdecoder/SEEM/resolve/main/seem_focall_v1.pt"))
|
||||
os.system("wget {}".format("https://huggingface.co/xdecoder/SEEM/resolve/main/seem_focall_v0.pt"))
|
||||
cur_model = 'Focal-L'
|
||||
|
||||
'''
|
||||
|
|
|
@ -1,5 +1,8 @@
|
|||
from .transformer_encoder_fpn import *
|
||||
from .transformer_encoder_deform import *
|
||||
try:
|
||||
from .transformer_encoder_deform import *
|
||||
except:
|
||||
print('Deformable Transformer Encoder is not available.')
|
||||
from .build import *
|
||||
|
||||
|
||||
|
|
|
@ -1,31 +0,0 @@
|
|||
pillow==9.4.0
|
||||
opencv-python
|
||||
pyyaml
|
||||
json_tricks
|
||||
yacs
|
||||
scikit-learn
|
||||
pandas
|
||||
timm==0.4.12
|
||||
numpy==1.23.1
|
||||
einops
|
||||
fvcore
|
||||
transformers
|
||||
sentencepiece
|
||||
ftfy
|
||||
regex
|
||||
nltk
|
||||
mpi4py
|
||||
vision-datasets==0.2.2
|
||||
cython
|
||||
pycocotools
|
||||
diffdist
|
||||
pyarrow
|
||||
cityscapesscripts
|
||||
shapely==1.8.0
|
||||
scikit-image
|
||||
mup
|
||||
accelerate
|
||||
kornia
|
||||
deepspeed
|
||||
wandb
|
||||
infinibatch
|
Loading…
Reference in New Issue