update codes and demos
parent
cc02608066
commit
08bd3c2e9b
|
@ -2,8 +2,6 @@ import argparse
|
|||
import os
|
||||
import sys
|
||||
|
||||
sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), "..")))
|
||||
|
||||
import numpy as np
|
||||
import torch
|
||||
from PIL import Image, ImageDraw, ImageFont
|
||||
|
@ -14,6 +12,8 @@ from groundingdino.util import box_ops
|
|||
from groundingdino.util.slconfig import SLConfig
|
||||
from groundingdino.util.utils import clean_state_dict, get_phrases_from_posmap
|
||||
|
||||
sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), "..")))
|
||||
|
||||
|
||||
def plot_boxes_to_image(image_pil, tgt):
|
||||
H, W = tgt["size"]
|
||||
|
@ -88,7 +88,6 @@ def get_grounding_output(model, image, caption, box_threshold, text_threshold, w
|
|||
logits = outputs["pred_logits"].cpu().sigmoid()[0] # (nq, 256)
|
||||
boxes = outputs["pred_boxes"].cpu()[0] # (nq, 4)
|
||||
logits.shape[0]
|
||||
|
||||
|
||||
# filter output
|
||||
logits_filt = logits.clone()
|
||||
|
@ -126,12 +125,8 @@ if __name__ == "__main__":
|
|||
"--output_dir", "-o", type=str, default="outputs", required=True, help="output directory"
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
"--box_threshold", type=float, default=0.3, help="box threshold"
|
||||
)
|
||||
parser.add_argument(
|
||||
"--text_threshold", type=float, default=0.25, help="text threshold"
|
||||
)
|
||||
parser.add_argument("--box_threshold", type=float, default=0.3, help="box threshold")
|
||||
parser.add_argument("--text_threshold", type=float, default=0.25, help="text threshold")
|
||||
args = parser.parse_args()
|
||||
|
||||
# cfg
|
||||
|
|
|
@ -1,4 +1,9 @@
|
|||
# ------------------------------------------------------------------------
|
||||
# Grounding DINO
|
||||
# url: https://github.com/IDEA-Research/GroundingDINO
|
||||
# Copyright (c) 2023 IDEA. All Rights Reserved.
|
||||
# Licensed under the Apache License, Version 2.0 [see LICENSE for details]
|
||||
# ------------------------------------------------------------------------
|
||||
# Conditional DETR
|
||||
# Copyright (c) 2021 Microsoft. All Rights Reserved.
|
||||
# Licensed under the Apache License, Version 2.0 [see LICENSE for details]
|
||||
|
|
|
@ -1,4 +1,9 @@
|
|||
# ------------------------------------------------------------------------
|
||||
# Grounding DINO
|
||||
# url: https://github.com/IDEA-Research/GroundingDINO
|
||||
# Copyright (c) 2023 IDEA. All Rights Reserved.
|
||||
# Licensed under the Apache License, Version 2.0 [see LICENSE for details]
|
||||
# ------------------------------------------------------------------------
|
||||
# Conditional DETR
|
||||
# Copyright (c) 2021 Microsoft. All Rights Reserved.
|
||||
# Licensed under the Apache License, Version 2.0 [see LICENSE for details]
|
||||
|
|
|
@ -1,4 +1,9 @@
|
|||
# ------------------------------------------------------------------------
|
||||
# Grounding DINO
|
||||
# url: https://github.com/IDEA-Research/GroundingDINO
|
||||
# Copyright (c) 2023 IDEA. All Rights Reserved.
|
||||
# Licensed under the Apache License, Version 2.0 [see LICENSE for details]
|
||||
# ------------------------------------------------------------------------
|
||||
# DINO
|
||||
# Copyright (c) 2022 IDEA. All Rights Reserved.
|
||||
# Licensed under the Apache License, Version 2.0 [see LICENSE for details]
|
||||
|
|
|
@ -1,4 +1,9 @@
|
|||
# ------------------------------------------------------------------------
|
||||
# Grounding DINO
|
||||
# url: https://github.com/IDEA-Research/GroundingDINO
|
||||
# Copyright (c) 2023 IDEA. All Rights Reserved.
|
||||
# Licensed under the Apache License, Version 2.0 [see LICENSE for details]
|
||||
# ------------------------------------------------------------------------
|
||||
# DINO
|
||||
# Copyright (c) 2022 IDEA. All Rights Reserved.
|
||||
# Licensed under the Apache License, Version 2.0 [see LICENSE for details]
|
||||
|
|
|
@ -1,3 +1,10 @@
|
|||
# ------------------------------------------------------------------------
|
||||
# Grounding DINO
|
||||
# url: https://github.com/IDEA-Research/GroundingDINO
|
||||
# Copyright (c) 2023 IDEA. All Rights Reserved.
|
||||
# Licensed under the Apache License, Version 2.0 [see LICENSE for details]
|
||||
# ------------------------------------------------------------------------
|
||||
|
||||
import torch
|
||||
import torch.nn.functional as F
|
||||
import torch.utils.checkpoint as checkpoint
|
||||
|
|
|
@ -1,3 +1,10 @@
|
|||
# ------------------------------------------------------------------------
|
||||
# Grounding DINO
|
||||
# url: https://github.com/IDEA-Research/GroundingDINO
|
||||
# Copyright (c) 2023 IDEA. All Rights Reserved.
|
||||
# Licensed under the Apache License, Version 2.0 [see LICENSE for details]
|
||||
# ------------------------------------------------------------------------
|
||||
|
||||
import torch
|
||||
import torch.nn as nn
|
||||
import torch.nn.functional as F
|
||||
|
|
|
@ -1,6 +1,7 @@
|
|||
# ------------------------------------------------------------------------
|
||||
# DINO
|
||||
# Copyright (c) 2022 IDEA. All Rights Reserved.
|
||||
# Grounding DINO
|
||||
# url: https://github.com/IDEA-Research/GroundingDINO
|
||||
# Copyright (c) 2023 IDEA. All Rights Reserved.
|
||||
# Licensed under the Apache License, Version 2.0 [see LICENSE for details]
|
||||
# ------------------------------------------------------------------------
|
||||
# Conditional DETR model and criterion classes.
|
||||
|
|
|
@ -1,18 +1,9 @@
|
|||
# coding=utf-8
|
||||
# Copyright 2022 The IDEA Authors. All rights reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
# ------------------------------------------------------------------------------------------------
|
||||
# ------------------------------------------------------------------------
|
||||
# Grounding DINO
|
||||
# url: https://github.com/IDEA-Research/GroundingDINO
|
||||
# Copyright (c) 2023 IDEA. All Rights Reserved.
|
||||
# Licensed under the Apache License, Version 2.0 [see LICENSE for details]
|
||||
# ------------------------------------------------------------------------
|
||||
# Deformable DETR
|
||||
# Copyright (c) 2020 SenseTime. All Rights Reserved.
|
||||
# Licensed under the Apache License, Version 2.0 [see LICENSE for details]
|
||||
|
@ -26,12 +17,14 @@
|
|||
import math
|
||||
import warnings
|
||||
from typing import Optional
|
||||
|
||||
import torch
|
||||
import torch.nn as nn
|
||||
import torch.nn.functional as F
|
||||
from torch.autograd import Function
|
||||
from torch.autograd.function import once_differentiable
|
||||
from torch.nn.init import constant_, xavier_uniform_
|
||||
|
||||
from groundingdino import _C
|
||||
|
||||
|
||||
|
@ -290,7 +283,6 @@ class MultiScaleDeformableAttention(nn.Module):
|
|||
|
||||
assert (spatial_shapes[:, 0] * spatial_shapes[:, 1]).sum() == num_value
|
||||
|
||||
|
||||
value = self.value_proj(value)
|
||||
if key_padding_mask is not None:
|
||||
value = value.masked_fill(key_padding_mask[..., None], float(0))
|
||||
|
@ -339,7 +331,6 @@ class MultiScaleDeformableAttention(nn.Module):
|
|||
sampling_locations = sampling_locations.float()
|
||||
attention_weights = attention_weights.float()
|
||||
|
||||
|
||||
output = MultiScaleDeformableAttnFunction.apply(
|
||||
value,
|
||||
spatial_shapes,
|
||||
|
@ -416,4 +407,3 @@ def create_dummy_func(func, dependency, message=""):
|
|||
raise ImportError(err)
|
||||
|
||||
return _dummy
|
||||
|
||||
|
|
|
@ -1,4 +1,9 @@
|
|||
# ------------------------------------------------------------------------
|
||||
# Grounding DINO
|
||||
# url: https://github.com/IDEA-Research/GroundingDINO
|
||||
# Copyright (c) 2023 IDEA. All Rights Reserved.
|
||||
# Licensed under the Apache License, Version 2.0 [see LICENSE for details]
|
||||
# ------------------------------------------------------------------------
|
||||
# DINO
|
||||
# Copyright (c) 2022 IDEA. All Rights Reserved.
|
||||
# Licensed under the Apache License, Version 2.0 [see LICENSE for details]
|
||||
|
@ -744,7 +749,13 @@ class DeformableTransformerEncoderLayer(nn.Module):
|
|||
super().__init__()
|
||||
|
||||
# self attention
|
||||
self.self_attn = MSDeformAttn(embed_dim=d_model, num_levels=n_levels, num_heads=n_heads, num_points=n_points, batch_first=True)
|
||||
self.self_attn = MSDeformAttn(
|
||||
embed_dim=d_model,
|
||||
num_levels=n_levels,
|
||||
num_heads=n_heads,
|
||||
num_points=n_points,
|
||||
batch_first=True,
|
||||
)
|
||||
self.dropout1 = nn.Dropout(dropout)
|
||||
self.norm1 = nn.LayerNorm(d_model)
|
||||
|
||||
|
@ -804,7 +815,13 @@ class DeformableTransformerDecoderLayer(nn.Module):
|
|||
super().__init__()
|
||||
|
||||
# cross attention
|
||||
self.cross_attn = MSDeformAttn(embed_dim=d_model, num_levels=n_levels, num_heads=n_heads, num_points=n_points, batch_first=True)
|
||||
self.cross_attn = MSDeformAttn(
|
||||
embed_dim=d_model,
|
||||
num_levels=n_levels,
|
||||
num_heads=n_heads,
|
||||
num_points=n_points,
|
||||
batch_first=True,
|
||||
)
|
||||
self.dropout1 = nn.Dropout(dropout) if dropout > 0 else nn.Identity()
|
||||
self.norm1 = nn.LayerNorm(d_model)
|
||||
|
||||
|
|
|
@ -1,3 +1,9 @@
|
|||
# ------------------------------------------------------------------------
|
||||
# Grounding DINO
|
||||
# url: https://github.com/IDEA-Research/GroundingDINO
|
||||
# Copyright (c) 2023 IDEA. All Rights Reserved.
|
||||
# Licensed under the Apache License, Version 2.0 [see LICENSE for details]
|
||||
# ------------------------------------------------------------------------
|
||||
# Copyright (c) Aishwarya Kamath & Nicolas Carion. Licensed under the Apache License 2.0. All Rights Reserved
|
||||
# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
|
||||
"""
|
||||
|
|
|
@ -1,6 +1,7 @@
|
|||
# ------------------------------------------------------------------------
|
||||
# DINO
|
||||
# Copyright (c) 2022 IDEA. All Rights Reserved.
|
||||
# Grounding DINO
|
||||
# url: https://github.com/IDEA-Research/GroundingDINO
|
||||
# Copyright (c) 2023 IDEA. All Rights Reserved.
|
||||
# Licensed under the Apache License, Version 2.0 [see LICENSE for details]
|
||||
# ------------------------------------------------------------------------
|
||||
|
||||
|
|
|
@ -1,6 +1,7 @@
|
|||
# ------------------------------------------------------------------------
|
||||
# DINO
|
||||
# Copyright (c) 2022 IDEA. All Rights Reserved.
|
||||
# Grounding DINO
|
||||
# url: https://github.com/IDEA-Research/GroundingDINO
|
||||
# Copyright (c) 2023 IDEA. All Rights Reserved.
|
||||
# Licensed under the Apache License, Version 2.0 [see LICENSE for details]
|
||||
# ------------------------------------------------------------------------
|
||||
# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
|
||||
|
|
|
@ -1,3 +1,9 @@
|
|||
# ------------------------------------------------------------------------
|
||||
# Grounding DINO
|
||||
# url: https://github.com/IDEA-Research/GroundingDINO
|
||||
# Copyright (c) 2023 IDEA. All Rights Reserved.
|
||||
# Licensed under the Apache License, Version 2.0 [see LICENSE for details]
|
||||
# ------------------------------------------------------------------------
|
||||
# -*- coding: utf-8 -*-
|
||||
# @Author: Yihao Chen
|
||||
# @Date: 2021-08-16 16:03:17
|
||||
|
|
|
@ -1 +1 @@
|
|||
__version__ = '0.1.0'
|
||||
__version__ = "0.1.0"
|
||||
|
|
Loading…
Reference in New Issue