Skip to content

Commit 54076ba

Browse files
committed
[AMDGPU][clang] provide device implementation for __builtin_logb and __builtin_scalbn
Clang generates library calls for __builtin_* functions which can be a problem for GPUs that cannot handle them. This patch generates a device implementations for __builtin_logb and __builtin_scalbn.
1 parent 8d67d00 commit 54076ba

File tree

3 files changed

+92
-1
lines changed

3 files changed

+92
-1
lines changed

clang/lib/CodeGen/CGBuiltin.cpp

+18-1
Original file line numberDiff line numberDiff line change
@@ -6011,10 +6011,27 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,
60116011
}
60126012
}
60136013

6014+
// These will be emitted as Intrinsic later.
6015+
auto NeedsDeviceOverload = [&](unsigned BuiltinID) {
6016+
if (getTarget().getTriple().isAMDGCN()) {
6017+
switch (BuiltinID) {
6018+
default:
6019+
return false;
6020+
case Builtin::BIlogb:
6021+
case Builtin::BI__builtin_logb:
6022+
case Builtin::BIscalbn:
6023+
case Builtin::BI__builtin_scalbn:
6024+
return true;
6025+
}
6026+
}
6027+
return false;
6028+
};
6029+
60146030
// If this is an alias for a lib function (e.g. __builtin_sin), emit
60156031
// the call using the normal call path, but using the unmangled
60166032
// version of the function name.
6017-
if (getContext().BuiltinInfo.isLibFunction(BuiltinID))
6033+
if (!NeedsDeviceOverload(BuiltinID) &&
6034+
getContext().BuiltinInfo.isLibFunction(BuiltinID))
60186035
return emitLibraryCall(*this, FD, E,
60196036
CGM.getBuiltinLibFunction(FD, BuiltinID));
60206037

clang/lib/CodeGen/TargetBuiltins/AMDGPU.cpp

+45
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,27 @@ using namespace CodeGen;
3131
using namespace llvm;
3232

3333
namespace {
34+
35+
// Has second type mangled argument.
36+
static Value *
37+
emitBinaryExpMaybeConstrainedFPBuiltin(CodeGenFunction &CGF, const CallExpr *E,
38+
Intrinsic::ID IntrinsicID,
39+
Intrinsic::ID ConstrainedIntrinsicID) {
40+
llvm::Value *Src0 = CGF.EmitScalarExpr(E->getArg(0));
41+
llvm::Value *Src1 = CGF.EmitScalarExpr(E->getArg(1));
42+
43+
CodeGenFunction::CGFPOptionsRAII FPOptsRAII(CGF, E);
44+
if (CGF.Builder.getIsFPConstrained()) {
45+
Function *F = CGF.CGM.getIntrinsic(ConstrainedIntrinsicID,
46+
{Src0->getType(), Src1->getType()});
47+
return CGF.Builder.CreateConstrainedFPCall(F, {Src0, Src1});
48+
}
49+
50+
Function *F =
51+
CGF.CGM.getIntrinsic(IntrinsicID, {Src0->getType(), Src1->getType()});
52+
return CGF.Builder.CreateCall(F, {Src0, Src1});
53+
}
54+
3455
// If \p E is not null pointer, insert address space cast to match return
3556
// type of \p E if necessary.
3657
Value *EmitAMDGPUDispatchPtr(CodeGenFunction &CGF,
@@ -1876,6 +1897,30 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID,
18761897
case AMDGPU::BI__builtin_amdgcn_s_prefetch_data:
18771898
return emitBuiltinWithOneOverloadedType<2>(
18781899
*this, E, Intrinsic::amdgcn_s_prefetch_data);
1900+
case Builtin::BIlogb:
1901+
case Builtin::BI__builtin_logb: {
1902+
auto *Src0 = EmitScalarExpr(E->getArg(0));
1903+
auto *FrExpFunc = CGM.getIntrinsic(Intrinsic::amdgcn_frexp_exp,
1904+
{Builder.getInt32Ty(), Src0->getType()});
1905+
auto *FrExp = Builder.CreateCall(FrExpFunc, Src0);
1906+
auto *Add = Builder.CreateAdd(
1907+
FrExp, ConstantInt::getSigned(FrExp->getType(), -1), "", false, true);
1908+
auto *SIToFP = Builder.CreateSIToFP(Add, Builder.getDoubleTy());
1909+
auto *Fabs = emitBuiltinWithOneOverloadedType<1>(*this, E, Intrinsic::fabs);
1910+
auto *FCmpONE = Builder.CreateFCmpONE(
1911+
Fabs, ConstantFP::getInfinity(Builder.getDoubleTy()));
1912+
auto *Sel1 = Builder.CreateSelect(FCmpONE, SIToFP, Fabs);
1913+
auto *FCmpOEQ =
1914+
Builder.CreateFCmpOEQ(Src0, ConstantFP::getZero(Builder.getDoubleTy()));
1915+
auto *Sel2 = Builder.CreateSelect(
1916+
FCmpOEQ, ConstantFP::getInfinity(Builder.getDoubleTy(), /*Neg*/ true),
1917+
Sel1);
1918+
return Sel2;
1919+
}
1920+
case Builtin::BIscalbn:
1921+
case Builtin::BI__builtin_scalbn:
1922+
return emitBinaryExpMaybeConstrainedFPBuiltin(
1923+
*this, E, Intrinsic::ldexp, Intrinsic::experimental_constrained_ldexp);
18791924
default:
18801925
return nullptr;
18811926
}

clang/test/CodeGen/logb_scalbn.c

+29
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,29 @@
1+
// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 5
2+
// RUN: %clang -cc1 -triple amdgcn-amd-amdhsa -emit-llvm -o - %s | FileCheck %s
3+
4+
// CHECK-LABEL: define dso_local void @my_kernel(
5+
// CHECK-SAME: ) #[[ATTR0:[0-9]+]] {
6+
// CHECK-NEXT: [[ENTRY:.*:]]
7+
// CHECK-NEXT: [[D1:%.*]] = alloca float, align 4, addrspace(5)
8+
// CHECK-NEXT: [[D2:%.*]] = alloca float, align 4, addrspace(5)
9+
// CHECK-NEXT: [[D1_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[D1]] to ptr
10+
// CHECK-NEXT: [[D2_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[D2]] to ptr
11+
// CHECK-NEXT: [[TMP0:%.*]] = call i32 @llvm.amdgcn.frexp.exp.i32.f64(double 1.600000e+01)
12+
// CHECK-NEXT: [[TMP1:%.*]] = add nsw i32 [[TMP0]], -1
13+
// CHECK-NEXT: [[TMP2:%.*]] = sitofp i32 [[TMP1]] to double
14+
// CHECK-NEXT: [[TMP3:%.*]] = call double @llvm.fabs.f64(double 1.600000e+01)
15+
// CHECK-NEXT: [[TMP4:%.*]] = fcmp one double [[TMP3]], 0x7FF0000000000000
16+
// CHECK-NEXT: [[TMP5:%.*]] = select i1 [[TMP4]], double [[TMP2]], double [[TMP3]]
17+
// CHECK-NEXT: [[TMP6:%.*]] = select i1 false, double 0xFFF0000000000000, double [[TMP5]]
18+
// CHECK-NEXT: [[CONV:%.*]] = fptrunc double [[TMP6]] to float
19+
// CHECK-NEXT: store float [[CONV]], ptr [[D1_ASCAST]], align 4
20+
// CHECK-NEXT: [[TMP7:%.*]] = call double @llvm.ldexp.f64.i32(double 1.600000e+01, i32 10)
21+
// CHECK-NEXT: [[CONV1:%.*]] = fptrunc double [[TMP7]] to float
22+
// CHECK-NEXT: store float [[CONV1]], ptr [[D2_ASCAST]], align 4
23+
// CHECK-NEXT: ret void
24+
//
25+
void my_kernel(){
26+
float D1 = __builtin_logb((float)16);
27+
float D2 = __builtin_scalbn((float)16, 10);
28+
}
29+

0 commit comments

Comments
 (0)