From 4cca568bd8ab1e0f7179c5124144e746b0adbfd6 Mon Sep 17 00:00:00 2001 From: Feraidoon Mehri <36224762+NightMachinery@users.noreply.github.com> Date: Fri, 19 Jul 2024 15:12:04 +0330 Subject: [PATCH] eva.py: fixed bug in applying attention mask The mask should be applied before the softmax. --- timm/models/eva.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/timm/models/eva.py b/timm/models/eva.py index 7a1b67e1..f31fd08f 100644 --- a/timm/models/eva.py +++ b/timm/models/eva.py @@ -134,10 +134,12 @@ class EvaAttention(nn.Module): else: q = q * self.scale attn = (q @ k.transpose(-2, -1)) - attn = attn.softmax(dim=-1) + if attn_mask is not None: attn_mask = attn_mask.to(torch.bool) attn = attn.masked_fill(~attn_mask[:, None, None, :], float("-inf")) + attn = attn.softmax(dim=-1) + attn = self.attn_drop(attn) x = attn @ v