Index: include/clang/Basic/BuiltinsAMDGPU.def =================================================================== --- include/clang/Basic/BuiltinsAMDGPU.def +++ include/clang/Basic/BuiltinsAMDGPU.def @@ -7,30 +7,36 @@ // //===----------------------------------------------------------------------===// // -// This file defines the R600-specific builtin function database. Users of this -// file must define the BUILTIN macro to make use of this information. +// This file defines the AMDGPU-specific builtin function database. Users of +// this file must define the BUILTIN macro to make use of this information. // //===----------------------------------------------------------------------===// // The format of this database matches clang/Basic/Builtins.def. -BUILTIN(__builtin_amdgpu_div_scale, "dddbb*", "n") -BUILTIN(__builtin_amdgpu_div_scalef, "fffbb*", "n") -BUILTIN(__builtin_amdgpu_div_fmas, "ddddb", "nc") -BUILTIN(__builtin_amdgpu_div_fmasf, "ffffb", "nc") -BUILTIN(__builtin_amdgpu_div_fixup, "dddd", "nc") -BUILTIN(__builtin_amdgpu_div_fixupf, "ffff", "nc") -BUILTIN(__builtin_amdgpu_trig_preop, "ddi", "nc") -BUILTIN(__builtin_amdgpu_trig_preopf, "ffi", "nc") -BUILTIN(__builtin_amdgpu_rcp, "dd", "nc") -BUILTIN(__builtin_amdgpu_rcpf, "ff", "nc") +BUILTIN(__builtin_amdgcn_div_scale, "dddbb*", "n") +BUILTIN(__builtin_amdgcn_div_scalef, "fffbb*", "n") +BUILTIN(__builtin_amdgcn_div_fmas, "ddddb", "nc") +BUILTIN(__builtin_amdgcn_div_fmasf, "ffffb", "nc") +BUILTIN(__builtin_amdgcn_div_fixup, "dddd", "nc") +BUILTIN(__builtin_amdgcn_div_fixupf, "ffff", "nc") +BUILTIN(__builtin_amdgcn_trig_preop, "ddi", "nc") +BUILTIN(__builtin_amdgcn_trig_preopf, "ffi", "nc") +BUILTIN(__builtin_amdgcn_rcp, "dd", "nc") +BUILTIN(__builtin_amdgcn_rcpf, "ff", "nc") +BUILTIN(__builtin_amdgcn_rsq, "dd", "nc") +BUILTIN(__builtin_amdgcn_rsqf, "ff", "nc") +BUILTIN(__builtin_amdgcn_rsq_clamped, "dd", "nc") +BUILTIN(__builtin_amdgcn_rsq_clampedf, "ff", "nc") +BUILTIN(__builtin_amdgcn_ldexp, "ddi", "nc") +BUILTIN(__builtin_amdgcn_ldexpf, "ffi", "nc") +BUILTIN(__builtin_amdgcn_class, "bdi", "nc") +BUILTIN(__builtin_amdgcn_classf, "bfi", "nc") + +// Legacy names with amdgpu prefix BUILTIN(__builtin_amdgpu_rsq, "dd", "nc") BUILTIN(__builtin_amdgpu_rsqf, "ff", "nc") -BUILTIN(__builtin_amdgpu_rsq_clamped, "dd", "nc") -BUILTIN(__builtin_amdgpu_rsq_clampedf, "ff", "nc") BUILTIN(__builtin_amdgpu_ldexp, "ddi", "nc") BUILTIN(__builtin_amdgpu_ldexpf, "ffi", "nc") -BUILTIN(__builtin_amdgpu_class, "bdi", "nc") -BUILTIN(__builtin_amdgpu_classf, "bfi", "nc") #undef BUILTIN Index: lib/CodeGen/CGBuiltin.cpp =================================================================== --- lib/CodeGen/CGBuiltin.cpp +++ lib/CodeGen/CGBuiltin.cpp @@ -6887,8 +6887,8 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID, const CallExpr *E) { switch (BuiltinID) { - case AMDGPU::BI__builtin_amdgpu_div_scale: - case AMDGPU::BI__builtin_amdgpu_div_scalef: { + case AMDGPU::BI__builtin_amdgcn_div_scale: + case AMDGPU::BI__builtin_amdgcn_div_scalef: { // Translate from the intrinsics's struct return to the builtin's out // argument. @@ -6898,7 +6898,7 @@ llvm::Value *Y = EmitScalarExpr(E->getArg(1)); llvm::Value *Z = EmitScalarExpr(E->getArg(2)); - llvm::Value *Callee = CGM.getIntrinsic(Intrinsic::AMDGPU_div_scale, + llvm::Value *Callee = CGM.getIntrinsic(Intrinsic::amdgcn_div_scale, X->getType()); llvm::Value *Tmp = Builder.CreateCall(Callee, {X, Y, Z}); @@ -6913,40 +6913,54 @@ Builder.CreateStore(FlagExt, FlagOutPtr); return Result; } - case AMDGPU::BI__builtin_amdgpu_div_fmas: - case AMDGPU::BI__builtin_amdgpu_div_fmasf: { + case AMDGPU::BI__builtin_amdgcn_div_fmas: + case AMDGPU::BI__builtin_amdgcn_div_fmasf: { llvm::Value *Src0 = EmitScalarExpr(E->getArg(0)); llvm::Value *Src1 = EmitScalarExpr(E->getArg(1)); llvm::Value *Src2 = EmitScalarExpr(E->getArg(2)); llvm::Value *Src3 = EmitScalarExpr(E->getArg(3)); - llvm::Value *F = CGM.getIntrinsic(Intrinsic::AMDGPU_div_fmas, + llvm::Value *F = CGM.getIntrinsic(Intrinsic::amdgcn_div_fmas, Src0->getType()); llvm::Value *Src3ToBool = Builder.CreateIsNotNull(Src3); return Builder.CreateCall(F, {Src0, Src1, Src2, Src3ToBool}); } - case AMDGPU::BI__builtin_amdgpu_div_fixup: - case AMDGPU::BI__builtin_amdgpu_div_fixupf: - return emitTernaryFPBuiltin(*this, E, Intrinsic::AMDGPU_div_fixup); - case AMDGPU::BI__builtin_amdgpu_trig_preop: - case AMDGPU::BI__builtin_amdgpu_trig_preopf: - return emitFPIntBuiltin(*this, E, Intrinsic::AMDGPU_trig_preop); - case AMDGPU::BI__builtin_amdgpu_rcp: - case AMDGPU::BI__builtin_amdgpu_rcpf: - return emitUnaryFPBuiltin(*this, E, Intrinsic::AMDGPU_rcp); + case AMDGPU::BI__builtin_amdgcn_div_fixup: + case AMDGPU::BI__builtin_amdgcn_div_fixupf: + return emitTernaryFPBuiltin(*this, E, Intrinsic::amdgcn_div_fixup); + case AMDGPU::BI__builtin_amdgcn_trig_preop: + case AMDGPU::BI__builtin_amdgcn_trig_preopf: + return emitFPIntBuiltin(*this, E, Intrinsic::amdgcn_trig_preop); + case AMDGPU::BI__builtin_amdgcn_rcp: + case AMDGPU::BI__builtin_amdgcn_rcpf: + return emitUnaryFPBuiltin(*this, E, Intrinsic::amdgcn_rcp); + case AMDGPU::BI__builtin_amdgcn_rsq: + case AMDGPU::BI__builtin_amdgcn_rsqf: + return emitUnaryFPBuiltin(*this, E, Intrinsic::amdgcn_rsq); + case AMDGPU::BI__builtin_amdgcn_rsq_clamped: + case AMDGPU::BI__builtin_amdgcn_rsq_clampedf: + return emitUnaryFPBuiltin(*this, E, Intrinsic::amdgcn_rsq_clamped); + case AMDGPU::BI__builtin_amdgcn_ldexp: + case AMDGPU::BI__builtin_amdgcn_ldexpf: + return emitFPIntBuiltin(*this, E, Intrinsic::amdgcn_ldexp); + case AMDGPU::BI__builtin_amdgcn_class: + case AMDGPU::BI__builtin_amdgcn_classf: + return emitFPIntBuiltin(*this, E, Intrinsic::amdgcn_class); + + // Legacy amdgpu prefix case AMDGPU::BI__builtin_amdgpu_rsq: - case AMDGPU::BI__builtin_amdgpu_rsqf: - return emitUnaryFPBuiltin(*this, E, Intrinsic::AMDGPU_rsq); - case AMDGPU::BI__builtin_amdgpu_rsq_clamped: - case AMDGPU::BI__builtin_amdgpu_rsq_clampedf: - return emitUnaryFPBuiltin(*this, E, Intrinsic::AMDGPU_rsq_clamped); + case AMDGPU::BI__builtin_amdgpu_rsqf: { + if (getTarget().getTriple().getArch() == Triple::amdgcn) + return emitUnaryFPBuiltin(*this, E, Intrinsic::amdgcn_rsq); + return emitUnaryFPBuiltin(*this, E, Intrinsic::r600_rsq); + } case AMDGPU::BI__builtin_amdgpu_ldexp: - case AMDGPU::BI__builtin_amdgpu_ldexpf: + case AMDGPU::BI__builtin_amdgpu_ldexpf: { + if (getTarget().getTriple().getArch() == Triple::amdgcn) + return emitFPIntBuiltin(*this, E, Intrinsic::amdgcn_ldexp); return emitFPIntBuiltin(*this, E, Intrinsic::AMDGPU_ldexp); - case AMDGPU::BI__builtin_amdgpu_class: - case AMDGPU::BI__builtin_amdgpu_classf: - return emitFPIntBuiltin(*this, E, Intrinsic::AMDGPU_class); - default: + } + default: return nullptr; } } Index: test/CodeGenOpenCL/builtins-amdgcn.cl =================================================================== --- /dev/null +++ test/CodeGenOpenCL/builtins-amdgcn.cl @@ -0,0 +1,173 @@ +// REQUIRES: amdgpu-registered-target +// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -S -emit-llvm -o - %s | FileCheck %s + +#pragma OPENCL EXTENSION cl_khr_fp64 : enable + +// CHECK-LABEL: @test_div_scale_f64 +// CHECK: call { double, i1 } @llvm.amdgcn.div.scale.f64(double %a, double %b, i1 true) +// CHECK-DAG: [[FLAG:%.+]] = extractvalue { double, i1 } %{{.+}}, 1 +// CHECK-DAG: [[VAL:%.+]] = extractvalue { double, i1 } %{{.+}}, 0 +// CHECK: [[FLAGEXT:%.+]] = zext i1 [[FLAG]] to i32 +// CHECK: store i32 [[FLAGEXT]] +void test_div_scale_f64(global double* out, global int* flagout, double a, double b) +{ + bool flag; + *out = __builtin_amdgcn_div_scale(a, b, true, &flag); + *flagout = flag; +} + +// CHECK-LABEL: @test_div_scale_f32 +// CHECK: call { float, i1 } @llvm.amdgcn.div.scale.f32(float %a, float %b, i1 true) +// CHECK-DAG: [[FLAG:%.+]] = extractvalue { float, i1 } %{{.+}}, 1 +// CHECK-DAG: [[VAL:%.+]] = extractvalue { float, i1 } %{{.+}}, 0 +// CHECK: [[FLAGEXT:%.+]] = zext i1 [[FLAG]] to i32 +// CHECK: store i32 [[FLAGEXT]] +void test_div_scale_f32(global float* out, global int* flagout, float a, float b) +{ + bool flag; + *out = __builtin_amdgcn_div_scalef(a, b, true, &flag); + *flagout = flag; +} + +// CHECK-LABEL: @test_div_fmas_f32 +// CHECK: call float @llvm.amdgcn.div.fmas.f32 +void test_div_fmas_f32(global float* out, float a, float b, float c, int d) +{ + *out = __builtin_amdgcn_div_fmasf(a, b, c, d); +} + +// CHECK-LABEL: @test_div_fmas_f64 +// CHECK: call double @llvm.amdgcn.div.fmas.f64 +void test_div_fmas_f64(global double* out, double a, double b, double c, int d) +{ + *out = __builtin_amdgcn_div_fmas(a, b, c, d); +} + +// CHECK-LABEL: @test_div_fixup_f32 +// CHECK: call float @llvm.amdgcn.div.fixup.f32 +void test_div_fixup_f32(global float* out, float a, float b, float c) +{ + *out = __builtin_amdgcn_div_fixupf(a, b, c); +} + +// CHECK-LABEL: @test_div_fixup_f64 +// CHECK: call double @llvm.amdgcn.div.fixup.f64 +void test_div_fixup_f64(global double* out, double a, double b, double c) +{ + *out = __builtin_amdgcn_div_fixup(a, b, c); +} + +// CHECK-LABEL: @test_trig_preop_f32 +// CHECK: call float @llvm.amdgcn.trig.preop.f32 +void test_trig_preop_f32(global float* out, float a, int b) +{ + *out = __builtin_amdgcn_trig_preopf(a, b); +} + +// CHECK-LABEL: @test_trig_preop_f64 +// CHECK: call double @llvm.amdgcn.trig.preop.f64 +void test_trig_preop_f64(global double* out, double a, int b) +{ + *out = __builtin_amdgcn_trig_preop(a, b); +} + +// CHECK-LABEL: @test_rcp_f32 +// CHECK: call float @llvm.amdgcn.rcp.f32 +void test_rcp_f32(global float* out, float a) +{ + *out = __builtin_amdgcn_rcpf(a); +} + +// CHECK-LABEL: @test_rcp_f64 +// CHECK: call double @llvm.amdgcn.rcp.f64 +void test_rcp_f64(global double* out, double a) +{ + *out = __builtin_amdgcn_rcp(a); +} + +// CHECK-LABEL: @test_rsq_f32 +// CHECK: call float @llvm.amdgcn.rsq.f32 +void test_rsq_f32(global float* out, float a) +{ + *out = __builtin_amdgcn_rsqf(a); +} + +// CHECK-LABEL: @test_rsq_f64 +// CHECK: call double @llvm.amdgcn.rsq.f64 +void test_rsq_f64(global double* out, double a) +{ + *out = __builtin_amdgcn_rsq(a); +} + +// CHECK-LABEL: @test_rsq_clamped_f32 +// CHECK: call float @llvm.amdgcn.rsq.clamped.f32 +void test_rsq_clamped_f32(global float* out, float a) +{ + *out = __builtin_amdgcn_rsq_clampedf(a); +} + +// CHECK-LABEL: @test_rsq_clamped_f64 +// CHECK: call double @llvm.amdgcn.rsq.clamped.f64 +void test_rsq_clamped_f64(global double* out, double a) +{ + *out = __builtin_amdgcn_rsq_clamped(a); +} + +// CHECK-LABEL: @test_ldexp_f32 +// CHECK: call float @llvm.amdgcn.ldexp.f32 +void test_ldexp_f32(global float* out, float a, int b) +{ + *out = __builtin_amdgcn_ldexpf(a, b); +} + +// CHECK-LABEL: @test_ldexp_f64 +// CHECK: call double @llvm.amdgcn.ldexp.f64 +void test_ldexp_f64(global double* out, double a, int b) +{ + *out = __builtin_amdgcn_ldexp(a, b); +} + +// CHECK-LABEL: @test_class_f32 +// CHECK: call i1 @llvm.amdgcn.class.f32 +void test_class_f32(global float* out, float a, int b) +{ + *out = __builtin_amdgcn_classf(a, b); +} + +// CHECK-LABEL: @test_class_f64 +// CHECK: call i1 @llvm.amdgcn.class.f64 +void test_class_f64(global double* out, double a, int b) +{ + *out = __builtin_amdgcn_class(a, b); +} + + +// Legacy intrinsics with AMDGPU prefix + +// CHECK-LABEL: @test_legacy_rsq_f32 +// CHECK: call float @llvm.amdgcn.rsq.f32 +void test_legacy_rsq_f32(global float* out, float a) +{ + *out = __builtin_amdgpu_rsqf(a); +} + +// CHECK-LABEL: @test_legacy_rsq_f64 +// CHECK: call double @llvm.amdgcn.rsq.f64 +void test_legacy_rsq_f64(global double* out, double a) +{ + *out = __builtin_amdgpu_rsq(a); +} + +// CHECK-LABEL: @test_legacy_ldexp_f32 +// CHECK: call float @llvm.amdgcn.ldexp.f32 +void test_legacy_ldexp_f32(global float* out, float a, int b) +{ + *out = __builtin_amdgpu_ldexpf(a, b); +} + +// CHECK-LABEL: @test_legacy_ldexp_f64 +// CHECK: call double @llvm.amdgcn.ldexp.f64 +void test_legacy_ldexp_f64(global double* out, double a, int b) +{ + *out = __builtin_amdgpu_ldexp(a, b); +} Index: test/CodeGenOpenCL/builtins-r600.cl =================================================================== --- test/CodeGenOpenCL/builtins-r600.cl +++ test/CodeGenOpenCL/builtins-r600.cl @@ -1,143 +1,32 @@ -// REQUIRES: r600-registered-target +// REQUIRES: amdgpu-registered-target // RUN: %clang_cc1 -triple r600-unknown-unknown -S -emit-llvm -o - %s | FileCheck %s -// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -S -emit-llvm -o - %s | FileCheck %s #pragma OPENCL EXTENSION cl_khr_fp64 : enable -// CHECK-LABEL: @test_div_scale_f64 -// CHECK: call { double, i1 } @llvm.AMDGPU.div.scale.f64(double %a, double %b, i1 true) -// CHECK-DAG: [[FLAG:%.+]] = extractvalue { double, i1 } %{{.+}}, 1 -// CHECK-DAG: [[VAL:%.+]] = extractvalue { double, i1 } %{{.+}}, 0 -// CHECK: [[FLAGEXT:%.+]] = zext i1 [[FLAG]] to i32 -// CHECK: store i32 [[FLAGEXT]] -void test_div_scale_f64(global double* out, global int* flagout, double a, double b) -{ - bool flag; - *out = __builtin_amdgpu_div_scale(a, b, true, &flag); - *flagout = flag; -} - -// CHECK-LABEL: @test_div_scale_f32 -// CHECK: call { float, i1 } @llvm.AMDGPU.div.scale.f32(float %a, float %b, i1 true) -// CHECK-DAG: [[FLAG:%.+]] = extractvalue { float, i1 } %{{.+}}, 1 -// CHECK-DAG: [[VAL:%.+]] = extractvalue { float, i1 } %{{.+}}, 0 -// CHECK: [[FLAGEXT:%.+]] = zext i1 [[FLAG]] to i32 -// CHECK: store i32 [[FLAGEXT]] -void test_div_scale_f32(global float* out, global int* flagout, float a, float b) -{ - bool flag; - *out = __builtin_amdgpu_div_scalef(a, b, true, &flag); - *flagout = flag; -} - -// CHECK-LABEL: @test_div_fmas_f32 -// CHECK: call float @llvm.AMDGPU.div.fmas.f32 -void test_div_fmas_f32(global float* out, float a, float b, float c, int d) -{ - *out = __builtin_amdgpu_div_fmasf(a, b, c, d); -} - -// CHECK-LABEL: @test_div_fmas_f64 -// CHECK: call double @llvm.AMDGPU.div.fmas.f64 -void test_div_fmas_f64(global double* out, double a, double b, double c, int d) -{ - *out = __builtin_amdgpu_div_fmas(a, b, c, d); -} - -// CHECK-LABEL: @test_div_fixup_f32 -// CHECK: call float @llvm.AMDGPU.div.fixup.f32 -void test_div_fixup_f32(global float* out, float a, float b, float c) -{ - *out = __builtin_amdgpu_div_fixupf(a, b, c); -} - -// CHECK-LABEL: @test_div_fixup_f64 -// CHECK: call double @llvm.AMDGPU.div.fixup.f64 -void test_div_fixup_f64(global double* out, double a, double b, double c) -{ - *out = __builtin_amdgpu_div_fixup(a, b, c); -} - -// CHECK-LABEL: @test_trig_preop_f32 -// CHECK: call float @llvm.AMDGPU.trig.preop.f32 -void test_trig_preop_f32(global float* out, float a, int b) -{ - *out = __builtin_amdgpu_trig_preopf(a, b); -} - -// CHECK-LABEL: @test_trig_preop_f64 -// CHECK: call double @llvm.AMDGPU.trig.preop.f64 -void test_trig_preop_f64(global double* out, double a, int b) -{ - *out = __builtin_amdgpu_trig_preop(a, b); -} - -// CHECK-LABEL: @test_rcp_f32 -// CHECK: call float @llvm.AMDGPU.rcp.f32 -void test_rcp_f32(global float* out, float a) -{ - *out = __builtin_amdgpu_rcpf(a); -} - -// CHECK-LABEL: @test_rcp_f64 -// CHECK: call double @llvm.AMDGPU.rcp.f64 -void test_rcp_f64(global double* out, double a) -{ - *out = __builtin_amdgpu_rcp(a); -} - // CHECK-LABEL: @test_rsq_f32 -// CHECK: call float @llvm.AMDGPU.rsq.f32 +// CHECK: call float @llvm.r600.rsq.f32 void test_rsq_f32(global float* out, float a) { *out = __builtin_amdgpu_rsqf(a); } // CHECK-LABEL: @test_rsq_f64 -// CHECK: call double @llvm.AMDGPU.rsq.f64 +// CHECK: call double @llvm.r600.rsq.f64 void test_rsq_f64(global double* out, double a) { *out = __builtin_amdgpu_rsq(a); } -// CHECK-LABEL: @test_rsq_clamped_f32 -// CHECK: call float @llvm.AMDGPU.rsq.clamped.f32 -void test_rsq_clamped_f32(global float* out, float a) -{ - *out = __builtin_amdgpu_rsq_clampedf(a); -} - -// CHECK-LABEL: @test_rsq_clamped_f64 -// CHECK: call double @llvm.AMDGPU.rsq.clamped.f64 -void test_rsq_clamped_f64(global double* out, double a) -{ - *out = __builtin_amdgpu_rsq_clamped(a); -} - -// CHECK-LABEL: @test_ldexp_f32 +// CHECK-LABEL: @test_legacy_ldexp_f32 // CHECK: call float @llvm.AMDGPU.ldexp.f32 -void test_ldexp_f32(global float* out, float a, int b) +void test_legacy_ldexp_f32(global float* out, float a, int b) { *out = __builtin_amdgpu_ldexpf(a, b); } // CHECK-LABEL: @test_ldexp_f64 // CHECK: call double @llvm.AMDGPU.ldexp.f64 -void test_ldexp_f64(global double* out, double a, int b) +void test_legacy_ldexp_f64(global double* out, double a, int b) { *out = __builtin_amdgpu_ldexp(a, b); } - -// CHECK-LABEL: @test_class_f32 -// CHECK: call i1 @llvm.AMDGPU.class.f32 -void test_class_f32(global float* out, float a, int b) -{ - *out = __builtin_amdgpu_classf(a, b); -} - -// CHECK-LABEL: @test_class_f64 -// CHECK: call i1 @llvm.AMDGPU.class.f64 -void test_class_f64(global double* out, double a, int b) -{ - *out = __builtin_amdgpu_class(a, b); -}