Merge pull request #2236 from NightMachinery/patch-1

eva.py: fixed bug in applying attention mask
2025-06-03 15:01:08 +08:00 · 2024-07-19 08:09:56 -07:00 · 2024-07-19 08:09:56 -07:00 · 474c9cf768
commit 474c9cf768
parent 7160af4a24 4cca568bd8
1 changed files with 3 additions and 1 deletions
--- a/timm/models/eva.py
+++ b/timm/models/eva.py
@ -134,10 +134,12 @@ class EvaAttention(nn.Module):
        else:
            q = q * self.scale
            attn = (q @ k.transpose(-2, -1))
-            attn = attn.softmax(dim=-1)
+            
            if attn_mask is not None:
                attn_mask = attn_mask.to(torch.bool)
                attn = attn.masked_fill(~attn_mask[:, None, None, :], float("-inf"))
+            attn = attn.softmax(dim=-1)
+            
            attn = self.attn_drop(attn)
            x = attn @ v