Index: clang/lib/Headers/opencl-c.h =================================================================== --- clang/lib/Headers/opencl-c.h +++ clang/lib/Headers/opencl-c.h @@ -8498,13 +8498,35 @@ /** * Compute square root. + * + * Provide inline implementations using the builtin so that we get appropriate + * !fpmath based on -cl-fp32-correctly-rounded-divide-sqrt. The implementation + * should still provide an external definition. */ -float __ovld __cnfn sqrt(float); -float2 __ovld __cnfn sqrt(float2); -float3 __ovld __cnfn sqrt(float3); -float4 __ovld __cnfn sqrt(float4); -float8 __ovld __cnfn sqrt(float8); -float16 __ovld __cnfn sqrt(float16); +inline float __ovld __cnfn sqrt(float __x) { + return __builtin_elementwise_sqrt(__x); +} + +inline float2 __ovld __cnfn sqrt(float2 __x) { + return __builtin_elementwise_sqrt(__x); +} + +inline float3 __ovld __cnfn sqrt(float3 __x) { + return __builtin_elementwise_sqrt(__x); +} + +inline float4 __ovld __cnfn sqrt(float4 __x) { + return __builtin_elementwise_sqrt(__x); +} + +inline float8 __ovld __cnfn sqrt(float8 __x) { + return __builtin_elementwise_sqrt(__x); +} + +inline float16 __ovld __cnfn sqrt(float16 __x) { + return __builtin_elementwise_sqrt(__x); +} + #ifdef cl_khr_fp64 double __ovld __cnfn sqrt(double); double2 __ovld __cnfn sqrt(double2); Index: clang/test/CodeGenOpenCL/sqrt-fpmath.cl =================================================================== --- /dev/null +++ clang/test/CodeGenOpenCL/sqrt-fpmath.cl @@ -0,0 +1,119 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 2 +// REQUIRES: amdgpu-registered-target + +// Test with -fdeclare-opencl-builtins +// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -fdeclare-opencl-builtins -finclude-default-header -target-cpu hawaii -S -emit-llvm -o - %s | FileCheck -check-prefixes=CHECK,DEFAULT %s +// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -fdeclare-opencl-builtins -finclude-default-header -cl-fp32-correctly-rounded-divide-sqrt -target-cpu hawaii -S -emit-llvm -o - %s | FileCheck -check-prefixes=CHECK,CORRECTLYROUNDED %s + +// Test without -fdeclare-opencl-builtins +// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -finclude-default-header -target-cpu hawaii -S -emit-llvm -o - %s | FileCheck -check-prefixes=CHECK,DEFAULT %s +// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -finclude-default-header -cl-fp32-correctly-rounded-divide-sqrt -target-cpu hawaii -S -emit-llvm -o - %s | FileCheck -check-prefixes=CHECK,CORRECTLYROUNDED %s + +#pragma OPENCL EXTENSION cl_khr_fp16 : enable + +// CHECK-LABEL: define dso_local float @call_sqrt_f32 +// CHECK-SAME: (float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[CALL:%.*]] = tail call float @_Z4sqrtf(float noundef [[X]]) #[[ATTR2:[0-9]+]] +// CHECK-NEXT: ret float [[CALL]] +// +float call_sqrt_f32(float x) { + return sqrt(x); +} + +// CHECK-LABEL: define dso_local <2 x float> @call_sqrt_v2f32 +// CHECK-SAME: (<2 x float> noundef [[X:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[CALL:%.*]] = tail call <2 x float> @_Z4sqrtDv2_f(<2 x float> noundef [[X]]) #[[ATTR2]] +// CHECK-NEXT: ret <2 x float> [[CALL]] +// +float2 call_sqrt_v2f32(float2 x) { + return sqrt(x); +} + +// CHECK-LABEL: define dso_local <3 x float> @call_sqrt_v3f32 +// CHECK-SAME: (<3 x float> noundef [[X:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[CALL:%.*]] = tail call <3 x float> @_Z4sqrtDv3_f(<3 x float> noundef [[X]]) #[[ATTR2]] +// CHECK-NEXT: ret <3 x float> [[CALL]] +// +float3 call_sqrt_v3f32(float3 x) { + return sqrt(x); +} + +// CHECK-LABEL: define dso_local <4 x float> @call_sqrt_v4f32 +// CHECK-SAME: (<4 x float> noundef [[X:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[CALL:%.*]] = tail call <4 x float> @_Z4sqrtDv4_f(<4 x float> noundef [[X]]) #[[ATTR2]] +// CHECK-NEXT: ret <4 x float> [[CALL]] +// +float4 call_sqrt_v4f32(float4 x) { + return sqrt(x); +} + +// CHECK-LABEL: define dso_local <8 x float> @call_sqrt_v8f32 +// CHECK-SAME: (<8 x float> noundef [[X:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[CALL:%.*]] = tail call <8 x float> @_Z4sqrtDv8_f(<8 x float> noundef [[X]]) #[[ATTR2]] +// CHECK-NEXT: ret <8 x float> [[CALL]] +// +float8 call_sqrt_v8f32(float8 x) { + return sqrt(x); +} + +// CHECK-LABEL: define dso_local <16 x float> @call_sqrt_v16f32 +// CHECK-SAME: (<16 x float> noundef [[X:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[CALL:%.*]] = tail call <16 x float> @_Z4sqrtDv16_f(<16 x float> noundef [[X]]) #[[ATTR2]] +// CHECK-NEXT: ret <16 x float> [[CALL]] +// +float16 call_sqrt_v16f32(float16 x) { + return sqrt(x); +} + +// Not for f64 +// CHECK-LABEL: define dso_local double @call_sqrt_f64 +// CHECK-SAME: (double noundef [[X:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[CALL:%.*]] = tail call double @_Z4sqrtd(double noundef [[X]]) #[[ATTR2]] +// CHECK-NEXT: ret double [[CALL]] +// +double call_sqrt_f64(double x) { + return sqrt(x); +} + +// Not for f64 +// CHECK-LABEL: define dso_local <2 x double> @call_sqrt_v2f64 +// CHECK-SAME: (<2 x double> noundef [[X:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[CALL:%.*]] = tail call <2 x double> @_Z4sqrtDv2_d(<2 x double> noundef [[X]]) #[[ATTR2]] +// CHECK-NEXT: ret <2 x double> [[CALL]] +// +double2 call_sqrt_v2f64(double2 x) { + return sqrt(x); +} + +// Not for f64 +// CHECK-LABEL: define dso_local half @call_sqrt_f16 +// CHECK-SAME: (half noundef [[X:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[CALL:%.*]] = tail call half @_Z4sqrtDh(half noundef [[X]]) #[[ATTR2]] +// CHECK-NEXT: ret half [[CALL]] +// +half call_sqrt_f16(half x) { + return sqrt(x); +} + +// Not for f64 +// CHECK-LABEL: define dso_local <2 x half> @call_sqrt_v2f16 +// CHECK-SAME: (<2 x half> noundef [[X:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[CALL:%.*]] = tail call <2 x half> @_Z4sqrtDv2_Dh(<2 x half> noundef [[X]]) #[[ATTR2]] +// CHECK-NEXT: ret <2 x half> [[CALL]] +// +half2 call_sqrt_v2f16(half2 x) { + return sqrt(x); +} +//// NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: +// CORRECTLYROUNDED: {{.*}} +// DEFAULT: {{.*}}