Merge pull request #1233 from jjsjann123/nhwc_cond_conv2d

fixing channels_last on cond_conv2d; update nvfuser debug env variable
2025-06-03 15:01:08 +08:00 · 2022-04-25 20:42:03 -07:00 · 2022-04-25 20:42:03 -07:00 · 001688dabf
commit 001688dabf
parent 7d235c5a5f f88c606fcf
2 changed files with 5 additions and 4 deletions
--- a/timm/models/layers/cond_conv2d.py
+++ b/timm/models/layers/cond_conv2d.py
@ -91,7 +91,8 @@ class CondConv2d(nn.Module):
            bias = torch.matmul(routing_weights, self.bias)
            bias = bias.view(B * self.out_channels)
        # move batch elements with channels so each batch element can be efficiently convolved with separate kernel
-        x = x.view(1, B * C, H, W)
+        # reshape instead of view to work with channels_last input
+        x = x.reshape(1, B * C, H, W)
        if self.dynamic_padding:
            out = conv2d_same(
                x, weight, bias, stride=self.stride, padding=self.padding,
--- a/timm/utils/jit.py
+++ b/timm/utils/jit.py
@ -34,9 +34,9 @@ def set_jit_fuser(fuser):
        torch._C._jit_override_can_fuse_on_gpu(True)
        torch._C._jit_set_texpr_fuser_enabled(False)
    elif fuser == "nvfuser" or fuser == "nvf":
-        os.environ['PYTORCH_CUDA_FUSER_DISABLE_FALLBACK'] = '1'
-        os.environ['PYTORCH_CUDA_FUSER_DISABLE_FMA'] = '1'
-        os.environ['PYTORCH_CUDA_FUSER_JIT_OPT_LEVEL'] = '0'
+        os.environ['PYTORCH_NVFUSER_DISABLE_FALLBACK'] = '1'
+        os.environ['PYTORCH_NVFUSER_DISABLE_FMA'] = '1'
+        os.environ['PYTORCH_NVFUSER_JIT_OPT_LEVEL'] = '0'
        torch._C._jit_set_texpr_fuser_enabled(False)
        torch._C._jit_set_profiling_executor(True)
        torch._C._jit_set_profiling_mode(True)