We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
1 parent c471b27 commit 616d924Copy full SHA for 616d924
aten/src/ATen/native/cuda/Copy.cu
@@ -341,7 +341,7 @@ void direct_copy_kernel_cuda(TensorIteratorBase &iter) {
341
AT_DISPATCH_BIT_TYPES(dtype, "copy_", [&] {
342
gpu_kernel_nocast(iter, [] GPU_LAMBDA(scalar_t x) { return x; });
343
});
344
- } else if (is_permute_021(iter) && (dtype == kBFloat16 || dtype == kHalf)) {
+ } else if (is_permute_021(iter) && (dtype == kBFloat16 || dtype == kHalf) && !at::detail::getCUDAHooks().isGPUArch({"gfx1100"})) {
345
transpose_last2dim(iter);
346
} else {
347
AT_DISPATCH_V2(
0 commit comments