Skip to content

Commit 5d1e144

Browse files
[SYCL][Driver]Enable multiple third-party targets for SYCL AOT. (#18145)
This patch enables `-fsycl-targets` option to support a comma-separated list of `NVidia` and `AMD` Processor Graphics values as target triple values for the SYCL AOT flow. **Example:** ``` clang++ -### -fsycl -fsycl-targets=nvidia_gpu_sm_60,nvidia_gpu_sm_70 syclfile.cpp clang++ -### -fsycl -fsycl-targets=amd_gpu_gfx90a,amd_gpu_gfx90c syclfile.cpp ``` --------- Signed-off-by: srividya sundaram <[email protected]>
1 parent c752cce commit 5d1e144

10 files changed

+56
-40
lines changed

clang/lib/Driver/Driver.cpp

+21-29
Original file line numberDiff line numberDiff line change
@@ -5166,19 +5166,6 @@ class OffloadingActionBuilder final {
51665166
: DeviceActionBuilder(C, Args, Inputs, Action::OFK_SYCL, OAB),
51675167
SYCLInstallation(C.getDriver()) {}
51685168

5169-
void withBoundArchForToolChain(const ToolChain *TC,
5170-
llvm::function_ref<void(const char *)> Op) {
5171-
for (auto &A : GpuArchList) {
5172-
if (TC->getTriple() == A.first) {
5173-
Op(A.second ? Args.MakeArgString(A.second) : nullptr);
5174-
return;
5175-
}
5176-
}
5177-
5178-
// no bound arch for this toolchain
5179-
Op(nullptr);
5180-
}
5181-
51825169
void pushForeignAction(Action *A) override {
51835170
// Accept a foreign action from the CudaActionBuilder for compiling CUDA
51845171
// sources
@@ -5461,10 +5448,13 @@ class OffloadingActionBuilder final {
54615448
return;
54625449

54635450
OffloadAction::DeviceDependences Dep;
5464-
withBoundArchForToolChain(ToolChains.front(), [&](const char *BoundArch) {
5465-
Dep.add(*SYCLLinkBinary, *ToolChains.front(), BoundArch,
5466-
Action::OFK_SYCL);
5467-
});
5451+
for (auto &TripleAndArchPair : GpuArchList) {
5452+
if (ToolChains.front()->getTriple() == TripleAndArchPair.first) {
5453+
Dep.add(*SYCLLinkBinary, *ToolChains.front(),
5454+
TripleAndArchPair.second, Action::OFK_SYCL);
5455+
}
5456+
}
5457+
54685458
AL.push_back(C.MakeAction<OffloadAction>(Dep, SYCLLinkBinary->getType()));
54695459
SYCLLinkBinary = nullptr;
54705460
}
@@ -5908,9 +5898,7 @@ class OffloadingActionBuilder final {
59085898
}
59095899
if (SkipWrapper) {
59105900
// Wrapper step not requested.
5911-
withBoundArchForToolChain(TC, [&](const char *BoundArch) {
5912-
addDeps(WrapperInputs.front(), TC, BoundArch);
5913-
});
5901+
addDeps(WrapperInputs.front(), TC, BoundArch);
59145902
continue;
59155903
}
59165904

@@ -5924,9 +5912,7 @@ class OffloadingActionBuilder final {
59245912
BoundArch != nullptr);
59255913
addDeps(DeviceWrappingAction, TC, AddBA ? BoundArch : nullptr);
59265914
} else {
5927-
withBoundArchForToolChain(TC, [&](const char *BoundArch) {
5928-
addDeps(DeviceWrappingAction, TC, BoundArch);
5929-
});
5915+
addDeps(DeviceWrappingAction, TC, BoundArch);
59305916
}
59315917
}
59325918
}
@@ -6447,14 +6433,20 @@ class OffloadingActionBuilder final {
64476433
SYCLTargetInfoList.emplace_back(*TCIt, nullptr);
64486434
} else {
64496435
const char *OffloadArch = nullptr;
6450-
for (auto &A : GpuArchList) {
6451-
if (TT == A.first) {
6452-
OffloadArch = A.second;
6453-
break;
6436+
for (auto &TargetTripleArchPair : GpuArchList) {
6437+
if (TT == TargetTripleArchPair.first) {
6438+
OffloadArch = TargetTripleArchPair.second;
6439+
// Add an arch to the SYCLTargetInfoList
6440+
// only if it is not already present in the list.
6441+
auto Arch = llvm::find_if(
6442+
SYCLTargetInfoList, [&](auto &DeviceTargetInfo) {
6443+
return OffloadArch == DeviceTargetInfo.BoundArch;
6444+
});
6445+
6446+
if (Arch == SYCLTargetInfoList.end())
6447+
SYCLTargetInfoList.emplace_back(*TCIt, OffloadArch);
64546448
}
64556449
}
6456-
assert(OffloadArch && "Failed to find matching arch.");
6457-
SYCLTargetInfoList.emplace_back(*TCIt, OffloadArch);
64586450
}
64596451
}
64606452
}

clang/test/Driver/sycl-offload-new-driver.c

+12
Original file line numberDiff line numberDiff line change
@@ -211,3 +211,15 @@
211211
// RUN: %clangxx -fsycl -### --offload-new-driver %s 2>&1 \
212212
// RUN: | FileCheck -check-prefix CHECK_NO_DYNAMIC_LINKING %s
213213
// CHECK_NO_DYNAMIC_LINKING-NOT: clang-linker-wrapper{{.*}} "-sycl-allow-device-image-dependencies"
214+
215+
// Check if fsycl-targets correctly processes multiple NVidia
216+
// and AMD GPU targets.
217+
// RUN: %clang -### -fsycl -fsycl-targets=nvidia_gpu_sm_60,nvidia_gpu_sm_70 -nocudalib --offload-new-driver %s 2>&1 \
218+
// RUN: | FileCheck -check-prefixes=CHK-MACRO-SM-60,CHK-MACRO-SM-70 %s
219+
// CHK-MACRO-SM-60: clang{{.*}} "-fsycl-is-device"{{.*}} "-D__SYCL_TARGET_NVIDIA_GPU_SM_60__"{{.*}}
220+
// CHK-MACRO-SM-70: clang{{.*}} "-fsycl-is-device"{{.*}} "-D__SYCL_TARGET_NVIDIA_GPU_SM_70__"{{.*}}
221+
// RUN: %clang -### -fsycl -fsycl-targets=amd_gpu_gfx90a,amd_gpu_gfx90c -fno-sycl-libspirv -nogpulib --offload-new-driver %s 2>&1 \
222+
// RUN: | FileCheck -check-prefixes=CHK-MACRO-GFX90A,CHK-MACRO-GFX90C %s
223+
// CHK-MACRO-GFX90A: clang{{.*}} "-fsycl-is-device"{{.*}} "-D__SYCL_TARGET_AMD_GPU_GFX90A__"{{.*}}
224+
// CHK-MACRO-GFX90C: clang{{.*}} "-fsycl-is-device"{{.*}} "-D__SYCL_TARGET_AMD_GPU_GFX90C__"{{.*}}
225+

clang/test/Driver/sycl-offload-old-model.c

+12
Original file line numberDiff line numberDiff line change
@@ -861,3 +861,15 @@
861861
// FSYCL-PREVIEW-BREAKING-CHANGES-DEBUG-CHECK: --dependent-lib=sycl{{[0-9]*}}-previewd
862862
// FSYCL-PREVIEW-BREAKING-CHANGES-DEBUG-CHECK-NOT: -defaultlib:sycl{{[0-9]*}}.lib
863863
// FSYCL-PREVIEW-BREAKING-CHANGES-DEBUG-CHECK-NOT: -defaultlib:sycl{{[0-9]*}}-preview.lib
864+
865+
// Check if fsycl-targets correctly processes multiple NVidia
866+
// and AMD GPU targets.
867+
// RUN: %clang -### -fsycl -fsycl-targets=nvidia_gpu_sm_60,nvidia_gpu_sm_70 -nocudalib --no-offload-new-driver %s 2>&1 \
868+
// RUN: | FileCheck -check-prefixes=CHK-MACRO-SM-60,CHK-MACRO-SM-70 %s
869+
// CHK-MACRO-SM-60: clang{{.*}} "-fsycl-is-device"{{.*}} "-D__SYCL_TARGET_NVIDIA_GPU_SM_60__"{{.*}}
870+
// CHK-MACRO-SM-70: clang{{.*}} "-fsycl-is-device"{{.*}} "-D__SYCL_TARGET_NVIDIA_GPU_SM_70__"{{.*}}
871+
// RUN: %clang -### -fsycl -fsycl-targets=amd_gpu_gfx90a,amd_gpu_gfx90c -fno-sycl-libspirv -nogpulib --no-offload-new-driver %s 2>&1 \
872+
// RUN: | FileCheck -check-prefixes=CHK-MACRO-GFX90A,CHK-MACRO-GFX90C %s
873+
// CHK-MACRO-GFX90A: clang{{.*}} "-fsycl-is-device"{{.*}} "-D__SYCL_TARGET_AMD_GPU_GFX90A__"{{.*}}
874+
// CHK-MACRO-GFX90C: clang{{.*}} "-fsycl-is-device"{{.*}} "-D__SYCL_TARGET_AMD_GPU_GFX90C__"{{.*}}
875+

sycl/doc/UsersManual.md

+2-2
Original file line numberDiff line numberDiff line change
@@ -45,8 +45,8 @@ and not recommended to use in production environment.
4545
currently overrides all the other specified SYCL targets when enabled.)
4646

4747
Special target values specific to Intel, NVIDIA and AMD Processor Graphics
48-
support are accepted, providing a streamlined interface for AOT. Only one of
49-
these values at a time is supported.
48+
support are accepted, providing a streamlined interface for AOT.
49+
A comma-separated list of valid Intel, NVIDIA and AMD Processor Graphics values are supported.
5050
* intel_gpu_ptl_u, intel_gpu_30_1_1 - Panther Lake U Intel graphics architecture
5151
* intel_gpu_ptl_h, intel_gpu_30_0_4 - Panther Lake H Intel graphics architecture
5252
* intel_gpu_lnl_m, intel_gpu_20_4_4 - Lunar Lake Intel graphics architecture

sycl/test/check_device_code/hip/atomic/amdgpu_unsafe_atomics.cpp

+3-3
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
// REQUIRES: hip
2-
// RUN: %clangxx -fsycl -fsycl-targets=amd_gpu_gfx906 %s -S -emit-llvm -o - | FileCheck %s --check-prefixes=CHECK,CHECK-SAFE
3-
// RUN: %clangxx -fsycl -fsycl-targets=amd_gpu_gfx906 %s -mllvm --amdgpu-oclc-unsafe-int-atomics=true -S -emit-llvm -o - | FileCheck %s --check-prefixes=CHECK,CHECK-UNSAFE
4-
// RUN: %clangxx -fsycl -fsycl-targets=amd_gpu_gfx90a %s -mllvm --amdgpu-oclc-unsafe-fp-atomics=true -mllvm --amdgpu-oclc-unsafe-int-atomics=true -S -emit-llvm -o - | FileCheck %s --check-prefixes=CHECK,CHECK-UNSAFE-FP
2+
// RUN: %clangxx -fsycl -fsycl-targets=amdgcn-amd-amdhsa %s -S -emit-llvm -o - | FileCheck %s --check-prefixes=CHECK,CHECK-SAFE
3+
// RUN: %clangxx -fsycl -fsycl-targets=amdgcn-amd-amdhsa %s -mllvm --amdgpu-oclc-unsafe-int-atomics=true -S -emit-llvm -o - | FileCheck %s --check-prefixes=CHECK,CHECK-UNSAFE
4+
// RUN: %clangxx -fsycl -fsycl-targets=amdgcn-amd-amdhsa %s -mllvm --amdgpu-oclc-unsafe-fp-atomics=true -mllvm --amdgpu-oclc-unsafe-int-atomics=true -S -emit-llvm -o - | FileCheck %s --check-prefixes=CHECK,CHECK-UNSAFE-FP
55

66
#include <sycl/sycl.hpp>
77

sycl/test/check_device_code/hip/matrix/matrix-hip-bfloat16-float-test.cpp

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
// REQUIRES: hip
2-
// RUN: %clangxx -fsycl-device-only -fsycl-targets=amd_gpu_gfx90a -S %s -o -| FileCheck %s
2+
// RUN: %clangxx -fsycl-device-only -fsycl-targets=amdgcn-amd-amdhsa -S %s -o -| FileCheck %s
33

44
#include <sycl/sycl.hpp>
55

sycl/test/check_device_code/hip/matrix/matrix-hip-double-double-test.cpp

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
// REQUIRES: hip
2-
// RUN: %clangxx -fsycl-device-only -fsycl-targets=amd_gpu_gfx90a -S %s -o -| FileCheck %s
2+
// RUN: %clangxx -fsycl-device-only -fsycl-targets=amdgcn-amd-amdhsa -S %s -o -| FileCheck %s
33

44
#include <sycl/sycl.hpp>
55

sycl/test/check_device_code/hip/matrix/matrix-hip-half-float-test.cpp

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
// REQUIRES: hip
2-
// RUN: %clangxx -fsycl-device-only -fsycl-targets=amd_gpu_gfx90a -S %s -o -| FileCheck %s
2+
// RUN: %clangxx -fsycl-device-only -fsycl-targets=amdgcn-amd-amdhsa -S %s -o -| FileCheck %s
33

44
#include <sycl/sycl.hpp>
55

sycl/test/check_device_code/hip/matrix/matrix-hip-int8-int32-test.cpp

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
// REQUIRES: hip
2-
// RUN: %clangxx -fsycl-device-only -fsycl-targets=amd_gpu_gfx90a -S %s -o -| FileCheck %s
2+
// RUN: %clangxx -fsycl-device-only -fsycl-targets=amdgcn-amd-amdhsa -S %s -o -| FileCheck %s
33

44
#include <sycl/sycl.hpp>
55

sycl/test/lit.cfg.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -177,10 +177,10 @@
177177
# For AMD the specific GPU has to be specified with --offload-arch
178178
if not any([f.startswith("--offload-arch") for f in additional_flags]):
179179
# If the offload arch wasn't specified in SYCL_CLANG_EXTRA_FLAGS,
180-
# hardcode it to gfx906, this is fine because only compiler tests
180+
# hardcode it to gfx90a, this is fine because only compiler tests
181181
additional_flags += [
182182
"-Xsycl-target-backend=amdgcn-amd-amdhsa",
183-
"--offload-arch=gfx906",
183+
"--offload-arch=gfx90a",
184184
]
185185

186186
config.sycl_headers_filter = lit_config.params.get("SYCL_HEADERS_FILTER", None)

0 commit comments

Comments
 (0)