From 4cca568bd8ab1e0f7179c5124144e746b0adbfd6 Mon Sep 17 00:00:00 2001
From: Feraidoon Mehri <36224762+NightMachinery@users.noreply.github.com>
Date: Fri, 19 Jul 2024 15:12:04 +0330
Subject: [PATCH] eva.py: fixed bug in applying attention mask

The mask should be applied before the softmax.
---
 timm/models/eva.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/timm/models/eva.py b/timm/models/eva.py
index 7a1b67e1..f31fd08f 100644
--- a/timm/models/eva.py
+++ b/timm/models/eva.py
@@ -134,10 +134,12 @@ class EvaAttention(nn.Module):
         else:
             q = q * self.scale
             attn = (q @ k.transpose(-2, -1))
-            attn = attn.softmax(dim=-1)
+            
             if attn_mask is not None:
                 attn_mask = attn_mask.to(torch.bool)
                 attn = attn.masked_fill(~attn_mask[:, None, None, :], float("-inf"))
+            attn = attn.softmax(dim=-1)
+            
             attn = self.attn_drop(attn)
             x = attn @ v