add instructblip

This commit is contained in:
techmonsterwang 2023-07-02 21:00:05 +08:00
parent f680f4ef30
commit 8a33a6aa2d
3 changed files with 1 additions and 25 deletions

View File

@ -24,7 +24,7 @@ from mmpretrain import inference_model
result = inference_model('instructblip-vicuna7b_3rdparty-zeroshot_caption', 'demo/cat-dog.png')
print(result)
# {'pred_caption': 'The image is a photograph of a beautiful garden. The garden is full of colorful flowers and green leaves.'}
# {'pred_caption': 'a blanket next to each other in the grass\na cute puppy and kitten wallpapers'}
```
<!-- [TABS-END] -->

View File

@ -46,7 +46,6 @@ class InstructBlipCaption(BaseModel):
max_txt_len: int = 256,
end_sym: str = '\n',
num_captions: int = 1,
generation_cfg: dict = dict(),
qformer_text_input = True,
data_preprocessor: Optional[dict] = None,
init_cfg: Optional[dict] = None) -> None:
@ -121,19 +120,6 @@ class InstructBlipCaption(BaseModel):
self.prompt_length = prompt_tokens.attention_mask.sum(1)
self.qformer_text_input = qformer_text_input
# update generation configs
self.generation_cfg = dict(
max_new_tokens=300,
num_beams=1,
do_sample=True,
min_length=1,
top_p=0.9,
repetition_penalty=1.0,
length_penalty=1.0,
temperature=1.0,
**generation_cfg)
if hasattr(self, 'register_load_state_dict_post_hook'):
self.register_load_state_dict_post_hook(self._ignore_llm_keys_hook)
@ -233,9 +219,6 @@ class InstructBlipCaption(BaseModel):
attns_llama = torch.ones(
inputs_llama.size()[:-1], dtype=torch.long).to(images.device)
# *******************************************************************?
llama_tokens = self.llm_tokenizer(
prompt,
padding="longest",

View File

@ -2,11 +2,4 @@ from mmpretrain import inference_model
result = inference_model('instructblip-vicuna7b_3rdparty-zeroshot_caption', 'demo/cat-dog.png')
print(result)
# {'pred_caption': 'This image shows a small dog and a kitten sitting on a blanket in a field of flowers. The dog is looking up at the kitten with a playful expression on its face. The background is a colorful striped blanket, and there are flowers all around them. The image is well composed with the two animals sitting in the center of the frame, surrounded by the flowers and blanket.'}
# from mmpretrain import inference_model
# result = inference_model('minigpt-4_vicuna-7b_caption', 'demo/cat-dog.png')
# print(result)
# {'pred_caption': 'This image shows a small dog and a kitten sitting on a blanket in a field of flowers. The dog is looking up at the kitten with a playful expression on its face. The background is a colorful striped blanket, and there are flowers all around them. The image is well composed with the two animals sitting in the center of the frame, surrounded by the flowers and blanket.'}