intel · srividya-sundaram · Apr 22, 2025 · Apr 22, 2025 · Apr 23, 2025 · Apr 23, 2025
@@ -5159,19 +5159,6 @@ class OffloadingActionBuilder final {
         : DeviceActionBuilder(C, Args, Inputs, Action::OFK_SYCL, OAB),
           SYCLInstallation(C.getDriver()) {}
 
-    void withBoundArchForToolChain(const ToolChain *TC,
-                                   llvm::function_ref<void(const char *)> Op) {
-      for (auto &A : GpuArchList) {
-        if (TC->getTriple() == A.first) {
-          Op(A.second ? Args.MakeArgString(A.second) : nullptr);
-          return;
-        }
-      }
-
-      // no bound arch for this toolchain
-      Op(nullptr);
-    }
-
     void pushForeignAction(Action *A) override {
       // Accept a foreign action from the CudaActionBuilder for compiling CUDA
       // sources
@@ -5454,10 +5441,13 @@ class OffloadingActionBuilder final {
         return;
 
       OffloadAction::DeviceDependences Dep;
-      withBoundArchForToolChain(ToolChains.front(), [&](const char *BoundArch) {
-        Dep.add(*SYCLLinkBinary, *ToolChains.front(), BoundArch,
-                Action::OFK_SYCL);
-      });
+      for (auto &TripleAndArchPair : GpuArchList) {
+        if (ToolChains.front()->getTriple() == TripleAndArchPair.first) {
+          Dep.add(*SYCLLinkBinary, *ToolChains.front(),
+                  TripleAndArchPair.second, Action::OFK_SYCL);
+        }
+      }
+
       AL.push_back(C.MakeAction<OffloadAction>(Dep, SYCLLinkBinary->getType()));
       SYCLLinkBinary = nullptr;
     }
@@ -5901,9 +5891,7 @@ class OffloadingActionBuilder final {
           }
           if (SkipWrapper) {
             // Wrapper step not requested.
-            withBoundArchForToolChain(TC, [&](const char *BoundArch) {
-              addDeps(WrapperInputs.front(), TC, BoundArch);
-            });
+            addDeps(WrapperInputs.front(), TC, BoundArch);
             continue;
           }
 
@@ -5917,9 +5905,7 @@ class OffloadingActionBuilder final {
                  BoundArch != nullptr);
             addDeps(DeviceWrappingAction, TC, AddBA ? BoundArch : nullptr);
           } else {
-            withBoundArchForToolChain(TC, [&](const char *BoundArch) {
-              addDeps(DeviceWrappingAction, TC, BoundArch);
-            });
+            addDeps(DeviceWrappingAction, TC, BoundArch);
           }
         }
       }
@@ -6440,14 +6426,20 @@ class OffloadingActionBuilder final {
               SYCLTargetInfoList.emplace_back(*TCIt, nullptr);
             } else {
               const char *OffloadArch = nullptr;
-              for (auto &A : GpuArchList) {
-                if (TT == A.first) {
-                  OffloadArch = A.second;
-                  break;
+              for (auto &TargetTripleArchPair : GpuArchList) {
+                if (TT == TargetTripleArchPair.first) {
+                  OffloadArch = TargetTripleArchPair.second;
+                  // Add an arch to the SYCLTargetInfoList
+                  // only if it is not already present in the list.
+                  auto Arch = llvm::find_if(
+                      SYCLTargetInfoList, [&](auto &DeviceTargetInfo) {
+                        return OffloadArch == DeviceTargetInfo.BoundArch;
+                      });
+
+                  if (Arch == SYCLTargetInfoList.end())
+                    SYCLTargetInfoList.emplace_back(*TCIt, OffloadArch);
                 }
               }
-              assert(OffloadArch && "Failed to find matching arch.");
-              SYCLTargetInfoList.emplace_back(*TCIt, OffloadArch);
             }
           }
         }

@@ -211,3 +211,15 @@
 // RUN: %clangxx -fsycl -### --offload-new-driver %s 2>&1 \
 // RUN:  | FileCheck -check-prefix CHECK_NO_DYNAMIC_LINKING %s
 // CHECK_NO_DYNAMIC_LINKING-NOT: clang-linker-wrapper{{.*}} "-sycl-allow-device-image-dependencies"
+
+// Check if fsycl-targets correctly processes multiple NVidia
+// and AMD GPU targets.
+// RUN:   %clang -### -fsycl -fsycl-targets=nvidia_gpu_sm_60,nvidia_gpu_sm_70 -nocudalib --offload-new-driver  %s 2>&1 \
+// RUN:   | FileCheck -check-prefixes=CHK-MACRO-SM-60,CHK-MACRO-SM-70 %s
+// CHK-MACRO-SM-60: clang{{.*}} "-fsycl-is-device"{{.*}} "-D__SYCL_TARGET_NVIDIA_GPU_SM_60__"{{.*}}
+// CHK-MACRO-SM-70: clang{{.*}} "-fsycl-is-device"{{.*}} "-D__SYCL_TARGET_NVIDIA_GPU_SM_70__"{{.*}}
+// RUN:   %clang -### -fsycl -fsycl-targets=amd_gpu_gfx90a,amd_gpu_gfx90c -fno-sycl-libspirv -nogpulib --offload-new-driver  %s 2>&1 \
+// RUN:   | FileCheck -check-prefixes=CHK-MACRO-GFX90A,CHK-MACRO-GFX90C %s
+// CHK-MACRO-GFX90A: clang{{.*}} "-fsycl-is-device"{{.*}} "-D__SYCL_TARGET_AMD_GPU_GFX90A__"{{.*}}
+// CHK-MACRO-GFX90C: clang{{.*}} "-fsycl-is-device"{{.*}} "-D__SYCL_TARGET_AMD_GPU_GFX90C__"{{.*}}
+
@@ -861,3 +861,15 @@
 // FSYCL-PREVIEW-BREAKING-CHANGES-DEBUG-CHECK: --dependent-lib=sycl{{[0-9]*}}-previewd
 // FSYCL-PREVIEW-BREAKING-CHANGES-DEBUG-CHECK-NOT: -defaultlib:sycl{{[0-9]*}}.lib
 // FSYCL-PREVIEW-BREAKING-CHANGES-DEBUG-CHECK-NOT: -defaultlib:sycl{{[0-9]*}}-preview.lib
+
+// Check if fsycl-targets correctly processes multiple NVidia
+// and AMD GPU targets.
+// RUN:   %clang -### -fsycl -fsycl-targets=nvidia_gpu_sm_60,nvidia_gpu_sm_70 -nocudalib --no-offload-new-driver  %s 2>&1 \
+// RUN:   | FileCheck -check-prefixes=CHK-MACRO-SM-60,CHK-MACRO-SM-70 %s
+// CHK-MACRO-SM-60: clang{{.*}} "-fsycl-is-device"{{.*}} "-D__SYCL_TARGET_NVIDIA_GPU_SM_60__"{{.*}}
+// CHK-MACRO-SM-70: clang{{.*}} "-fsycl-is-device"{{.*}} "-D__SYCL_TARGET_NVIDIA_GPU_SM_70__"{{.*}}
+// RUN:   %clang -### -fsycl -fsycl-targets=amd_gpu_gfx90a,amd_gpu_gfx90c -fno-sycl-libspirv -nogpulib --no-offload-new-driver  %s 2>&1 \
+// RUN:   | FileCheck -check-prefixes=CHK-MACRO-GFX90A,CHK-MACRO-GFX90C %s
+// CHK-MACRO-GFX90A: clang{{.*}} "-fsycl-is-device"{{.*}} "-D__SYCL_TARGET_AMD_GPU_GFX90A__"{{.*}}
+// CHK-MACRO-GFX90C: clang{{.*}} "-fsycl-is-device"{{.*}} "-D__SYCL_TARGET_AMD_GPU_GFX90C__"{{.*}}
+
@@ -45,8 +45,8 @@ and not recommended to use in production environment.
     currently overrides all the other specified SYCL targets when enabled.)
 
     Special target values specific to Intel, NVIDIA and AMD Processor Graphics
-    support are accepted, providing a streamlined interface for AOT. Only one of
-    these values at a time is supported.
+    support are accepted, providing a streamlined interface for AOT.
+    A comma-separated list of valid Intel, NVIDIA and AMD Processor Graphics values are supported.
     * intel_gpu_ptl_u, intel_gpu_30_1_1 - Panther Lake U Intel graphics architecture
     * intel_gpu_ptl_h, intel_gpu_30_0_4 - Panther Lake H Intel graphics architecture
     * intel_gpu_lnl_m, intel_gpu_20_4_4 - Lunar Lake Intel graphics architecture

@@ -1,7 +1,7 @@
 // REQUIRES: hip
-// RUN: %clangxx -fsycl -fsycl-targets=amd_gpu_gfx906 %s -S -emit-llvm -o - | FileCheck %s --check-prefixes=CHECK,CHECK-SAFE
-// RUN: %clangxx -fsycl -fsycl-targets=amd_gpu_gfx906 %s -mllvm --amdgpu-oclc-unsafe-int-atomics=true -S -emit-llvm -o - | FileCheck %s --check-prefixes=CHECK,CHECK-UNSAFE
-// RUN: %clangxx -fsycl -fsycl-targets=amd_gpu_gfx90a %s -mllvm --amdgpu-oclc-unsafe-fp-atomics=true  -mllvm --amdgpu-oclc-unsafe-int-atomics=true -S -emit-llvm -o - | FileCheck %s --check-prefixes=CHECK,CHECK-UNSAFE-FP
+// RUN: %clangxx -fsycl -fsycl-targets=amdgcn-amd-amdhsa %s -S -emit-llvm -o - | FileCheck %s --check-prefixes=CHECK,CHECK-SAFE
+// RUN: %clangxx -fsycl -fsycl-targets=amdgcn-amd-amdhsa %s -mllvm --amdgpu-oclc-unsafe-int-atomics=true -S -emit-llvm -o - | FileCheck %s --check-prefixes=CHECK,CHECK-UNSAFE
+// RUN: %clangxx -fsycl -fsycl-targets=amdgcn-amd-amdhsa %s -mllvm --amdgpu-oclc-unsafe-fp-atomics=true  -mllvm --amdgpu-oclc-unsafe-int-atomics=true -S -emit-llvm -o - | FileCheck %s --check-prefixes=CHECK,CHECK-UNSAFE-FP
 
 #include <sycl/sycl.hpp>
 

@@ -1,5 +1,5 @@
 // REQUIRES: hip
-// RUN: %clangxx -fsycl-device-only -fsycl-targets=amd_gpu_gfx90a -S -Xclang -emit-llvm %s -o -| FileCheck %s
+// RUN: %clangxx -fsycl-device-only -fsycl-targets=amdgcn-amd-amdhsa -S -Xclang -emit-llvm %s -o -| FileCheck %s
 
 #include <sycl/sycl.hpp>
 

@@ -1,5 +1,5 @@
 // REQUIRES: hip
-// RUN: %clangxx -fsycl-device-only -fsycl-targets=amd_gpu_gfx90a -S -Xclang -emit-llvm %s -o -| FileCheck %s
+// RUN: %clangxx -fsycl-device-only -fsycl-targets=amdgcn-amd-amdhsa -S -Xclang -emit-llvm %s -o -| FileCheck %s
 
 #include <sycl/sycl.hpp>
 

@@ -1,5 +1,5 @@
 // REQUIRES: hip
-// RUN: %clangxx -fsycl-device-only -fsycl-targets=amd_gpu_gfx90a -S -Xclang -emit-llvm %s -o -| FileCheck %s
+// RUN: %clangxx -fsycl-device-only -fsycl-targets=amdgcn-amd-amdhsa -S -Xclang -emit-llvm %s -o -| FileCheck %s
 
 #include <sycl/sycl.hpp>
 

@@ -1,5 +1,5 @@
 // REQUIRES: hip
-// RUN: %clangxx -fsycl-device-only -fsycl-targets=amd_gpu_gfx90a -S -Xclang -emit-llvm %s -o -| FileCheck %s
+// RUN: %clangxx -fsycl-device-only -fsycl-targets=amdgcn-amd-amdhsa -S -Xclang -emit-llvm %s -o -| FileCheck %s
 
 #include <sycl/sycl.hpp>
 

@@ -172,10 +172,10 @@
     # For AMD the specific GPU has to be specified with --offload-arch
     if not any([f.startswith("--offload-arch") for f in additional_flags]):
         # If the offload arch wasn't specified in SYCL_CLANG_EXTRA_FLAGS,
-        # hardcode it to gfx906, this is fine because only compiler tests
+        # hardcode it to gfx90a, this is fine because only compiler tests
         additional_flags += [
             "-Xsycl-target-backend=amdgcn-amd-amdhsa",
-            "--offload-arch=gfx906",
+            "--offload-arch=gfx90a",
         ]
 
 config.sycl_headers_filter = lit_config.params.get("SYCL_HEADERS_FILTER", None)