Skip to content

Commit adfb9c0

Browse files
committed
[AMDGPU][clang] provide device implementation for __builtin_logb and __builtin_scalbn
Clang generates library calls for __builtin_* functions which can be a problem for GPUs that cannot handle them. This patch generates a device implementations for __builtin_logb and __builtin_scalbn.
1 parent 87916f8 commit adfb9c0

File tree

3 files changed

+89
-1
lines changed

3 files changed

+89
-1
lines changed

clang/lib/CodeGen/CGBuiltin.cpp

+42-1
Original file line numberDiff line numberDiff line change
@@ -6621,10 +6621,27 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,
66216621
}
66226622
}
66236623

6624+
// These will be emitted as Intrinsic later.
6625+
auto NeedsDeviceOverloadToIntrin = [&](unsigned BuiltinID) {
6626+
if (getTarget().getTriple().isAMDGCN()) {
6627+
switch (BuiltinID) {
6628+
default:
6629+
return false;
6630+
case Builtin::BIlogb:
6631+
case Builtin::BI__builtin_logb:
6632+
case Builtin::BIscalbn:
6633+
case Builtin::BI__builtin_scalbn:
6634+
return true;
6635+
}
6636+
}
6637+
return false;
6638+
};
6639+
66246640
// If this is an alias for a lib function (e.g. __builtin_sin), emit
66256641
// the call using the normal call path, but using the unmangled
66266642
// version of the function name.
6627-
if (getContext().BuiltinInfo.isLibFunction(BuiltinID))
6643+
if (!NeedsDeviceOverloadToIntrin(BuiltinID) &&
6644+
getContext().BuiltinInfo.isLibFunction(BuiltinID))
66286645
return emitLibraryCall(*this, FD, E,
66296646
CGM.getBuiltinLibFunction(FD, BuiltinID));
66306647

@@ -20910,6 +20927,30 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID,
2091020927
case AMDGPU::BI__builtin_amdgcn_s_prefetch_data:
2091120928
return emitBuiltinWithOneOverloadedType<2>(
2091220929
*this, E, Intrinsic::amdgcn_s_prefetch_data);
20930+
case Builtin::BIlogb:
20931+
case Builtin::BI__builtin_logb: {
20932+
auto Src0 = EmitScalarExpr(E->getArg(0));
20933+
auto FrExpFunc = CGM.getIntrinsic(Intrinsic::amdgcn_frexp_exp,
20934+
{Builder.getInt32Ty(), Src0->getType()});
20935+
auto FrExp = Builder.CreateCall(FrExpFunc, Src0);
20936+
auto Add = Builder.CreateAdd(
20937+
FrExp, ConstantInt::getSigned(FrExp->getType(), -1), "", false, true);
20938+
auto SIToFP = Builder.CreateSIToFP(Add, Builder.getDoubleTy());
20939+
auto Fabs = emitBuiltinWithOneOverloadedType<1>(*this, E, Intrinsic::fabs);
20940+
auto FCmpONE = Builder.CreateFCmpONE(
20941+
Fabs, ConstantFP::getInfinity(Builder.getDoubleTy()));
20942+
auto Sel1 = Builder.CreateSelect(FCmpONE, SIToFP, Fabs);
20943+
auto FCmpOEQ =
20944+
Builder.CreateFCmpOEQ(Src0, ConstantFP::getZero(Builder.getDoubleTy()));
20945+
auto Sel2 = Builder.CreateSelect(
20946+
FCmpOEQ, ConstantFP::getInfinity(Builder.getDoubleTy(), /*Neg*/ true),
20947+
Sel1);
20948+
return Sel2;
20949+
}
20950+
case Builtin::BIscalbn:
20951+
case Builtin::BI__builtin_scalbn:
20952+
return emitBinaryExpMaybeConstrainedFPBuiltin(
20953+
*this, E, Intrinsic::ldexp, Intrinsic::experimental_constrained_ldexp);
2091320954
default:
2091420955
return nullptr;
2091520956
}

clang/lib/CodeGen/CodeGenModule.h

+5
Original file line numberDiff line numberDiff line change
@@ -1231,6 +1231,11 @@ class CodeGenModule : public CodeGenTypeCache {
12311231
llvm::FunctionType *FnType = nullptr, bool DontDefer = false,
12321232
ForDefinition_t IsForDefinition = NotForDefinition);
12331233

1234+
/// Given a builtin id for a function, return a Function* for device
1235+
/// overload implementation.
1236+
llvm::Constant *getDeviceLibFunction(const FunctionDecl *FD,
1237+
unsigned BuiltinID);
1238+
12341239
/// Given a builtin id for a function like "__builtin_fabsf", return a
12351240
/// Function* for "fabsf".
12361241
llvm::Constant *getBuiltinLibFunction(const FunctionDecl *FD,

clang/test/CodeGenHIP/logb_scalbn.hip

+42
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,42 @@
1+
// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 5
2+
// RUN: %clang --cuda-device-only -nogpuinc -nogpulib -emit-llvm -S -o - %s | FileCheck %s
3+
#include <math.h>
4+
#define __device__ __attribute__((device))
5+
6+
// CHECK-LABEL: define hidden void @_Z9my_kernelv(
7+
// CHECK-SAME: ) #[[ATTR0:[0-9]+]] {
8+
// CHECK-NEXT: [[ENTRY:.*:]]
9+
// CHECK-NEXT: [[D1:%.*]] = alloca float, align 4, addrspace(5)
10+
// CHECK-NEXT: [[D2:%.*]] = alloca float, align 4, addrspace(5)
11+
// CHECK-NEXT: [[D3:%.*]] = alloca float, align 4, addrspace(5)
12+
// CHECK-NEXT: [[D4:%.*]] = alloca float, align 4, addrspace(5)
13+
// CHECK-NEXT: [[D1_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[D1]] to ptr
14+
// CHECK-NEXT: [[D2_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[D2]] to ptr
15+
// CHECK-NEXT: [[D3_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[D3]] to ptr
16+
// CHECK-NEXT: [[D4_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[D4]] to ptr
17+
// CHECK-NEXT: [[TMP0:%.*]] = call i32 @llvm.amdgcn.frexp.exp.i32.f64(double 1.600000e+01)
18+
// CHECK-NEXT: [[TMP1:%.*]] = add nsw i32 [[TMP0]], -1
19+
// CHECK-NEXT: [[TMP2:%.*]] = sitofp i32 [[TMP1]] to double
20+
// CHECK-NEXT: [[TMP3:%.*]] = call contract double @llvm.fabs.f64(double 1.600000e+01)
21+
// CHECK-NEXT: [[TMP4:%.*]] = fcmp contract one double [[TMP3]], 0x7FF0000000000000
22+
// CHECK-NEXT: [[TMP5:%.*]] = select contract i1 [[TMP4]], double [[TMP2]], double [[TMP3]]
23+
// CHECK-NEXT: [[TMP6:%.*]] = select contract i1 false, double 0xFFF0000000000000, double [[TMP5]]
24+
// CHECK-NEXT: [[CONV:%.*]] = fptrunc contract double [[TMP6]] to float
25+
// CHECK-NEXT: store float [[CONV]], ptr [[D1_ASCAST]], align 4
26+
// CHECK-NEXT: [[CALL:%.*]] = call contract noundef float @_ZSt4logbf(float noundef 1.600000e+01) #[[ATTR3:[0-9]+]]
27+
// CHECK-NEXT: store float [[CALL]], ptr [[D2_ASCAST]], align 4
28+
// CHECK-NEXT: [[TMP7:%.*]] = call contract double @llvm.ldexp.f64.i32(double 1.600000e+01, i32 10)
29+
// CHECK-NEXT: [[CONV1:%.*]] = fptrunc contract double [[TMP7]] to float
30+
// CHECK-NEXT: store float [[CONV1]], ptr [[D3_ASCAST]], align 4
31+
// CHECK-NEXT: [[CALL2:%.*]] = call contract noundef float @_ZSt6scalbnfi(float noundef 9.000000e+00, i32 noundef 4) #[[ATTR3]]
32+
// CHECK-NEXT: store float [[CALL2]], ptr [[D4_ASCAST]], align 4
33+
// CHECK-NEXT: ret void
34+
//
35+
__device__ void my_kernel(){
36+
37+
float D1 = __builtin_logb((float)16);
38+
float D2 = logb((float)16);
39+
float D3 = __builtin_scalbn((float)16, 10);
40+
float D4 = scalbn((float)9.0, 4);
41+
}
42+

0 commit comments

Comments
 (0)