Index: include/clang/Basic/TargetBuiltins.h =================================================================== --- include/clang/Basic/TargetBuiltins.h +++ include/clang/Basic/TargetBuiltins.h @@ -72,6 +72,15 @@ }; } + /// \brief R600 builtins + namespace R600 { + enum { + LastTIBuiltin = clang::Builtin::FirstTSBuiltin - 1, + #define BUILTIN(ID, TYPE, ATTRS) BI##ID, + #include "clang/Basic/BuiltinsR600.def" + LastTSBuiltin + }; + } /// \brief X86 builtins namespace X86 { Index: include/clang/module.modulemap =================================================================== --- include/clang/module.modulemap +++ include/clang/module.modulemap @@ -36,6 +36,7 @@ exclude header "Basic/BuiltinsNEON.def" exclude header "Basic/BuiltinsNVPTX.def" exclude header "Basic/BuiltinsPPC.def" + exclude header "Basic/BuiltinsR600.def" exclude header "Basic/BuiltinsX86.def" exclude header "Basic/BuiltinsXCore.def" exclude header "Basic/DiagnosticOptions.def" Index: lib/Basic/Targets.cpp =================================================================== --- lib/Basic/Targets.cpp +++ lib/Basic/Targets.cpp @@ -1458,6 +1458,8 @@ "-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64"; class R600TargetInfo : public TargetInfo { + static const Builtin::Info BuiltinInfo[]; + /// \brief The GPU profiles supported by the R600 target. enum GPUKind { GK_NONE, @@ -1504,11 +1506,10 @@ void getTargetBuiltins(const Builtin::Info *&Records, unsigned &NumRecords) const override { - Records = nullptr; - NumRecords = 0; + Records = BuiltinInfo; + NumRecords = clang::R600::LastTSBuiltin - Builtin::FirstTSBuiltin; } - void getTargetDefines(const LangOptions &Opts, MacroBuilder &Builder) const override { Builder.defineMacro("__R600__"); @@ -1584,6 +1585,12 @@ } }; +const Builtin::Info R600TargetInfo::BuiltinInfo[] = { +#define BUILTIN(ID, TYPE, ATTRS) \ + { #ID, TYPE, ATTRS, 0, ALL_LANGUAGES }, +#include "clang/Basic/BuiltinsR600.def" +}; + } // end anonymous namespace namespace { Index: lib/CodeGen/CGBuiltin.cpp =================================================================== --- lib/CodeGen/CGBuiltin.cpp +++ lib/CodeGen/CGBuiltin.cpp @@ -1689,6 +1689,8 @@ case llvm::Triple::ppc64: case llvm::Triple::ppc64le: return EmitPPCBuiltinExpr(BuiltinID, E); + case llvm::Triple::r600: + return EmitR600BuiltinExpr(BuiltinID, E); default: return nullptr; } @@ -5952,3 +5954,38 @@ } } } + +Value *CodeGenFunction::EmitR600BuiltinExpr(unsigned BuiltinID, + const CallExpr *E) { + switch (BuiltinID) { + case R600::BI__builtin_amdgpu_div_scale: + case R600::BI__builtin_amdgpu_div_scalef: { + // Translate from the intrinsics's struct return to the builtin's out + // argument. + + std::pair FlagOutPtr + = EmitPointerWithAlignment(E->getArg(3)); + + llvm::Value *X = EmitScalarExpr(E->getArg(0)); + llvm::Value *Y = EmitScalarExpr(E->getArg(1)); + llvm::Value *Z = EmitScalarExpr(E->getArg(2)); + + llvm::Value *Callee = CGM.getIntrinsic(Intrinsic::AMDGPU_div_scale, + X->getType()); + + llvm::Value *Tmp = Builder.CreateCall3(Callee, X, Y, Z); + + llvm::Value *Result = Builder.CreateExtractValue(Tmp, 0); + llvm::Value *Flag = Builder.CreateExtractValue(Tmp, 1); + + llvm::Type *RealFlagType + = FlagOutPtr.first->getType()->getPointerElementType(); + + llvm::Value *FlagExt = Builder.CreateZExt(Flag, RealFlagType); + llvm::StoreInst *FlagStore = Builder.CreateStore(FlagExt, FlagOutPtr.first); + FlagStore->setAlignment(FlagOutPtr.second); + return Result; + } default: + return nullptr; + } +} Index: lib/CodeGen/CodeGenFunction.h =================================================================== --- lib/CodeGen/CodeGenFunction.h +++ lib/CodeGen/CodeGenFunction.h @@ -2252,6 +2252,7 @@ llvm::Value *BuildVector(ArrayRef Ops); llvm::Value *EmitX86BuiltinExpr(unsigned BuiltinID, const CallExpr *E); llvm::Value *EmitPPCBuiltinExpr(unsigned BuiltinID, const CallExpr *E); + llvm::Value *EmitR600BuiltinExpr(unsigned BuiltinID, const CallExpr *E); llvm::Value *EmitObjCProtocolExpr(const ObjCProtocolExpr *E); llvm::Value *EmitObjCStringLiteral(const ObjCStringLiteral *E); Index: test/CodeGenOpenCL/builtins-r600.cl =================================================================== --- /dev/null +++ test/CodeGenOpenCL/builtins-r600.cl @@ -0,0 +1,30 @@ +// REQUIRES: r600-registered-target +// RUN: %clang_cc1 -triple r600-unknown-unknown -S -emit-llvm -o - %s | FileCheck %s + +#pragma OPENCL EXTENSION cl_khr_fp64 : enable + +// CHECK-LABEL: @test_div_scale_f64 +// CHECK: call { double, i1 } @llvm.AMDGPU.div.scale.f64(double %a, double %b, i1 true) +// CHECK-DAG: [[FLAG:%.+]] = extractvalue { double, i1 } %{{.+}}, 1 +// CHECK-DAG: [[VAL:%.+]] = extractvalue { double, i1 } %{{.+}}, 0 +// CHECK: [[FLAGEXT:%.+]] = zext i1 [[FLAG]] to i32 +// CHECK: store i32 [[FLAGEXT]] +void test_div_scale_f64(global double* out, global int* flagout, double a, double b) +{ + bool flag; + *out = __builtin_amdgpu_div_scale(a, b, true, &flag); + *flagout = flag; +} + +// CHECK-LABEL: @test_div_scale_f32 +// CHECK: call { float, i1 } @llvm.AMDGPU.div.scale.f32(float %a, float %b, i1 true) +// CHECK-DAG: [[FLAG:%.+]] = extractvalue { float, i1 } %{{.+}}, 1 +// CHECK-DAG: [[VAL:%.+]] = extractvalue { float, i1 } %{{.+}}, 0 +// CHECK: [[FLAGEXT:%.+]] = zext i1 [[FLAG]] to i32 +// CHECK: store i32 [[FLAGEXT]] +void test_div_scale_f32(global float* out, global int* flagout, float a, float b) +{ + bool flag; + *out = __builtin_amdgpu_div_scalef(a, b, true, &flag); + *flagout = flag; +}