diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp --- a/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp @@ -1082,9 +1082,12 @@ case Intrinsic::amdgcn_flat_atomic_fadd: case Intrinsic::amdgcn_flat_atomic_fmax: case Intrinsic::amdgcn_flat_atomic_fmin: { - Module *M = II->getParent()->getParent()->getParent(); Type *DestTy = II->getType(); Type *SrcTy = NewV->getType(); + unsigned NewAS = SrcTy->getPointerAddressSpace(); + if (!AMDGPU::isExtendedGlobalAddrSpace(NewAS)) + return nullptr; + Module *M = II->getModule(); Function *NewDecl = Intrinsic::getDeclaration(M, II->getIntrinsicID(), {DestTy, SrcTy, DestTy}); II->setArgOperand(0, NewV); diff --git a/llvm/test/Transforms/InferAddressSpaces/AMDGPU/flat-fadd-fmin-fmax-intrinsics.ll b/llvm/test/Transforms/InferAddressSpaces/AMDGPU/flat-fadd-fmin-fmax-intrinsics.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/InferAddressSpaces/AMDGPU/flat-fadd-fmin-fmax-intrinsics.ll @@ -0,0 +1,258 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 2 +; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -passes=infer-address-spaces %s | FileCheck %s + +declare float @llvm.amdgcn.flat.atomic.fadd.f32.p0.f32(ptr %ptr, float %data) +declare float @llvm.amdgcn.flat.atomic.fmax.f32.p0.f32(ptr %ptr, float %data) +declare float @llvm.amdgcn.flat.atomic.fmin.f32.p0.f32(ptr %ptr, float %data) + +define amdgpu_kernel void @flat_atomic_fadd_f32_p1(ptr addrspace(1) %ptr, float %data) { +; CHECK-LABEL: define amdgpu_kernel void @flat_atomic_fadd_f32_p1 +; CHECK-SAME: (ptr addrspace(1) [[PTR:%.*]], float [[DATA:%.*]]) { +; CHECK-NEXT: [[ADD:%.*]] = call float @llvm.amdgcn.flat.atomic.fadd.f32.p1.f32(ptr addrspace(1) [[PTR]], float [[DATA]]) +; CHECK-NEXT: [[MAX:%.*]] = call float @llvm.amdgcn.flat.atomic.fmax.f32.p1.f32(ptr addrspace(1) [[PTR]], float [[DATA]]) +; CHECK-NEXT: [[MIN:%.*]] = call float @llvm.amdgcn.flat.atomic.fmin.f32.p1.f32(ptr addrspace(1) [[PTR]], float [[DATA]]) +; CHECK-NEXT: ret void +; + %cast = addrspacecast ptr addrspace(1) %ptr to ptr + %add = call float @llvm.amdgcn.flat.atomic.fadd.f32.p0.f32(ptr %cast, float %data) + %max = call float @llvm.amdgcn.flat.atomic.fmax.f32.p0.f32(ptr %cast, float %data) + %min = call float @llvm.amdgcn.flat.atomic.fmin.f32.p0.f32(ptr %cast, float %data) + ret void +} + +define amdgpu_kernel void @flat_atomic_fadd_f32_p2(ptr addrspace(2) %ptr, float %data) { +; CHECK-LABEL: define amdgpu_kernel void @flat_atomic_fadd_f32_p2 +; CHECK-SAME: (ptr addrspace(2) [[PTR:%.*]], float [[DATA:%.*]]) { +; CHECK-NEXT: [[CAST:%.*]] = addrspacecast ptr addrspace(2) [[PTR]] to ptr +; CHECK-NEXT: [[ADD:%.*]] = call float @llvm.amdgcn.flat.atomic.fadd.f32.p0.f32(ptr [[CAST]], float [[DATA]]) +; CHECK-NEXT: [[MAX:%.*]] = call float @llvm.amdgcn.flat.atomic.fmax.f32.p0.f32(ptr [[CAST]], float [[DATA]]) +; CHECK-NEXT: [[MIN:%.*]] = call float @llvm.amdgcn.flat.atomic.fmin.f32.p0.f32(ptr [[CAST]], float [[DATA]]) +; CHECK-NEXT: ret void +; + %cast = addrspacecast ptr addrspace(2) %ptr to ptr + %add = call float @llvm.amdgcn.flat.atomic.fadd.f32.p0.f32(ptr %cast, float %data) + %max = call float @llvm.amdgcn.flat.atomic.fmax.f32.p0.f32(ptr %cast, float %data) + %min = call float @llvm.amdgcn.flat.atomic.fmin.f32.p0.f32(ptr %cast, float %data) + ret void +} + +define amdgpu_kernel void @flat_atomic_fadd_f32_p3(ptr addrspace(3) %ptr, float %data) { +; CHECK-LABEL: define amdgpu_kernel void @flat_atomic_fadd_f32_p3 +; CHECK-SAME: (ptr addrspace(3) [[PTR:%.*]], float [[DATA:%.*]]) { +; CHECK-NEXT: [[CAST:%.*]] = addrspacecast ptr addrspace(3) [[PTR]] to ptr +; CHECK-NEXT: [[ADD:%.*]] = call float @llvm.amdgcn.flat.atomic.fadd.f32.p0.f32(ptr [[CAST]], float [[DATA]]) +; CHECK-NEXT: [[MAX:%.*]] = call float @llvm.amdgcn.flat.atomic.fmax.f32.p0.f32(ptr [[CAST]], float [[DATA]]) +; CHECK-NEXT: [[MIN:%.*]] = call float @llvm.amdgcn.flat.atomic.fmin.f32.p0.f32(ptr [[CAST]], float [[DATA]]) +; CHECK-NEXT: ret void +; + %cast = addrspacecast ptr addrspace(3) %ptr to ptr + %add = call float @llvm.amdgcn.flat.atomic.fadd.f32.p0.f32(ptr %cast, float %data) + %max = call float @llvm.amdgcn.flat.atomic.fmax.f32.p0.f32(ptr %cast, float %data) + %min = call float @llvm.amdgcn.flat.atomic.fmin.f32.p0.f32(ptr %cast, float %data) + ret void +} + +define amdgpu_kernel void @flat_atomic_fadd_f32_p4(ptr addrspace(4) %ptr, float %data) { +; CHECK-LABEL: define amdgpu_kernel void @flat_atomic_fadd_f32_p4 +; CHECK-SAME: (ptr addrspace(4) [[PTR:%.*]], float [[DATA:%.*]]) { +; CHECK-NEXT: [[ADD:%.*]] = call float @llvm.amdgcn.flat.atomic.fadd.f32.p4.f32(ptr addrspace(4) [[PTR]], float [[DATA]]) +; CHECK-NEXT: [[MAX:%.*]] = call float @llvm.amdgcn.flat.atomic.fmax.f32.p4.f32(ptr addrspace(4) [[PTR]], float [[DATA]]) +; CHECK-NEXT: [[MIN:%.*]] = call float @llvm.amdgcn.flat.atomic.fmin.f32.p4.f32(ptr addrspace(4) [[PTR]], float [[DATA]]) +; CHECK-NEXT: ret void +; + %cast = addrspacecast ptr addrspace(4) %ptr to ptr + %add = call float @llvm.amdgcn.flat.atomic.fadd.f32.p0.f32(ptr %cast, float %data) + %max = call float @llvm.amdgcn.flat.atomic.fmax.f32.p0.f32(ptr %cast, float %data) + %min = call float @llvm.amdgcn.flat.atomic.fmin.f32.p0.f32(ptr %cast, float %data) + ret void +} + +define amdgpu_kernel void @flat_atomic_fadd_f32_p5(ptr addrspace(5) %ptr, float %data) { +; CHECK-LABEL: define amdgpu_kernel void @flat_atomic_fadd_f32_p5 +; CHECK-SAME: (ptr addrspace(5) [[PTR:%.*]], float [[DATA:%.*]]) { +; CHECK-NEXT: [[CAST:%.*]] = addrspacecast ptr addrspace(5) [[PTR]] to ptr +; CHECK-NEXT: [[ADD:%.*]] = call float @llvm.amdgcn.flat.atomic.fadd.f32.p0.f32(ptr [[CAST]], float [[DATA]]) +; CHECK-NEXT: [[MAX:%.*]] = call float @llvm.amdgcn.flat.atomic.fmax.f32.p0.f32(ptr [[CAST]], float [[DATA]]) +; CHECK-NEXT: [[MIN:%.*]] = call float @llvm.amdgcn.flat.atomic.fmin.f32.p0.f32(ptr [[CAST]], float [[DATA]]) +; CHECK-NEXT: ret void +; + %cast = addrspacecast ptr addrspace(5) %ptr to ptr + %add = call float @llvm.amdgcn.flat.atomic.fadd.f32.p0.f32(ptr %cast, float %data) + %max = call float @llvm.amdgcn.flat.atomic.fmax.f32.p0.f32(ptr %cast, float %data) + %min = call float @llvm.amdgcn.flat.atomic.fmin.f32.p0.f32(ptr %cast, float %data) + ret void +} + +define amdgpu_kernel void @flat_atomic_fadd_f32_p6(ptr addrspace(6) %ptr, float %data) { +; CHECK-LABEL: define amdgpu_kernel void @flat_atomic_fadd_f32_p6 +; CHECK-SAME: (ptr addrspace(6) [[PTR:%.*]], float [[DATA:%.*]]) { +; CHECK-NEXT: [[ADD:%.*]] = call float @llvm.amdgcn.flat.atomic.fadd.f32.p6.f32(ptr addrspace(6) [[PTR]], float [[DATA]]) +; CHECK-NEXT: [[MAX:%.*]] = call float @llvm.amdgcn.flat.atomic.fmax.f32.p6.f32(ptr addrspace(6) [[PTR]], float [[DATA]]) +; CHECK-NEXT: [[MIN:%.*]] = call float @llvm.amdgcn.flat.atomic.fmin.f32.p6.f32(ptr addrspace(6) [[PTR]], float [[DATA]]) +; CHECK-NEXT: ret void +; + %cast = addrspacecast ptr addrspace(6) %ptr to ptr + %add = call float @llvm.amdgcn.flat.atomic.fadd.f32.p0.f32(ptr %cast, float %data) + %max = call float @llvm.amdgcn.flat.atomic.fmax.f32.p0.f32(ptr %cast, float %data) + %min = call float @llvm.amdgcn.flat.atomic.fmin.f32.p0.f32(ptr %cast, float %data) + ret void +} + +define amdgpu_kernel void @flat_atomic_fadd_f32_p7(ptr addrspace(7) %ptr, float %data) { +; CHECK-LABEL: define amdgpu_kernel void @flat_atomic_fadd_f32_p7 +; CHECK-SAME: (ptr addrspace(7) [[PTR:%.*]], float [[DATA:%.*]]) { +; CHECK-NEXT: [[CAST:%.*]] = addrspacecast ptr addrspace(7) [[PTR]] to ptr +; CHECK-NEXT: [[ADD:%.*]] = call float @llvm.amdgcn.flat.atomic.fadd.f32.p0.f32(ptr [[CAST]], float [[DATA]]) +; CHECK-NEXT: [[MAX:%.*]] = call float @llvm.amdgcn.flat.atomic.fmax.f32.p0.f32(ptr [[CAST]], float [[DATA]]) +; CHECK-NEXT: [[MIN:%.*]] = call float @llvm.amdgcn.flat.atomic.fmin.f32.p0.f32(ptr [[CAST]], float [[DATA]]) +; CHECK-NEXT: ret void +; + %cast = addrspacecast ptr addrspace(7) %ptr to ptr + %add = call float @llvm.amdgcn.flat.atomic.fadd.f32.p0.f32(ptr %cast, float %data) + %max = call float @llvm.amdgcn.flat.atomic.fmax.f32.p0.f32(ptr %cast, float %data) + %min = call float @llvm.amdgcn.flat.atomic.fmin.f32.p0.f32(ptr %cast, float %data) + ret void +} + +define amdgpu_kernel void @flat_atomic_fadd_f32_p99(ptr addrspace(99) %ptr, float %data) { +; CHECK-LABEL: define amdgpu_kernel void @flat_atomic_fadd_f32_p99 +; CHECK-SAME: (ptr addrspace(99) [[PTR:%.*]], float [[DATA:%.*]]) { +; CHECK-NEXT: [[ADD:%.*]] = call float @llvm.amdgcn.flat.atomic.fadd.f32.p99.f32(ptr addrspace(99) [[PTR]], float [[DATA]]) +; CHECK-NEXT: [[MAX:%.*]] = call float @llvm.amdgcn.flat.atomic.fmax.f32.p99.f32(ptr addrspace(99) [[PTR]], float [[DATA]]) +; CHECK-NEXT: [[MIN:%.*]] = call float @llvm.amdgcn.flat.atomic.fmin.f32.p99.f32(ptr addrspace(99) [[PTR]], float [[DATA]]) +; CHECK-NEXT: ret void +; + %cast = addrspacecast ptr addrspace(99) %ptr to ptr + %add = call float @llvm.amdgcn.flat.atomic.fadd.f32.p0.f32(ptr %cast, float %data) + %max = call float @llvm.amdgcn.flat.atomic.fmax.f32.p0.f32(ptr %cast, float %data) + %min = call float @llvm.amdgcn.flat.atomic.fmin.f32.p0.f32(ptr %cast, float %data) + ret void +} + +declare double @llvm.amdgcn.flat.atomic.fadd.f64.p0.f64(ptr %ptr, double %data) +declare double @llvm.amdgcn.flat.atomic.fmax.f64.p0.f64(ptr %ptr, double %data) +declare double @llvm.amdgcn.flat.atomic.fmin.f64.p0.f64(ptr %ptr, double %data) + +define amdgpu_kernel void @flat_atomic_fadd_f64_p1(ptr addrspace(1) %ptr, double %data) { +; CHECK-LABEL: define amdgpu_kernel void @flat_atomic_fadd_f64_p1 +; CHECK-SAME: (ptr addrspace(1) [[PTR:%.*]], double [[DATA:%.*]]) { +; CHECK-NEXT: [[ADD:%.*]] = call double @llvm.amdgcn.flat.atomic.fadd.f64.p1.f64(ptr addrspace(1) [[PTR]], double [[DATA]]) +; CHECK-NEXT: [[MAX:%.*]] = call double @llvm.amdgcn.flat.atomic.fmax.f64.p1.f64(ptr addrspace(1) [[PTR]], double [[DATA]]) +; CHECK-NEXT: [[MIN:%.*]] = call double @llvm.amdgcn.flat.atomic.fmin.f64.p1.f64(ptr addrspace(1) [[PTR]], double [[DATA]]) +; CHECK-NEXT: ret void +; + %cast = addrspacecast ptr addrspace(1) %ptr to ptr + %add = call double @llvm.amdgcn.flat.atomic.fadd.f64.p0.f64(ptr %cast, double %data) + %max = call double @llvm.amdgcn.flat.atomic.fmax.f64.p0.f64(ptr %cast, double %data) + %min = call double @llvm.amdgcn.flat.atomic.fmin.f64.p0.f64(ptr %cast, double %data) + ret void +} + +define amdgpu_kernel void @flat_atomic_fadd_f64_p2(ptr addrspace(2) %ptr, double %data) { +; CHECK-LABEL: define amdgpu_kernel void @flat_atomic_fadd_f64_p2 +; CHECK-SAME: (ptr addrspace(2) [[PTR:%.*]], double [[DATA:%.*]]) { +; CHECK-NEXT: [[CAST:%.*]] = addrspacecast ptr addrspace(2) [[PTR]] to ptr +; CHECK-NEXT: [[ADD:%.*]] = call double @llvm.amdgcn.flat.atomic.fadd.f64.p0.f64(ptr [[CAST]], double [[DATA]]) +; CHECK-NEXT: [[MAX:%.*]] = call double @llvm.amdgcn.flat.atomic.fmax.f64.p0.f64(ptr [[CAST]], double [[DATA]]) +; CHECK-NEXT: [[MIN:%.*]] = call double @llvm.amdgcn.flat.atomic.fmin.f64.p0.f64(ptr [[CAST]], double [[DATA]]) +; CHECK-NEXT: ret void +; + %cast = addrspacecast ptr addrspace(2) %ptr to ptr + %add = call double @llvm.amdgcn.flat.atomic.fadd.f64.p0.f64(ptr %cast, double %data) + %max = call double @llvm.amdgcn.flat.atomic.fmax.f64.p0.f64(ptr %cast, double %data) + %min = call double @llvm.amdgcn.flat.atomic.fmin.f64.p0.f64(ptr %cast, double %data) + ret void +} + +define amdgpu_kernel void @flat_atomic_fadd_f64_p3(ptr addrspace(3) %ptr, double %data) { +; CHECK-LABEL: define amdgpu_kernel void @flat_atomic_fadd_f64_p3 +; CHECK-SAME: (ptr addrspace(3) [[PTR:%.*]], double [[DATA:%.*]]) { +; CHECK-NEXT: [[CAST:%.*]] = addrspacecast ptr addrspace(3) [[PTR]] to ptr +; CHECK-NEXT: [[ADD:%.*]] = call double @llvm.amdgcn.flat.atomic.fadd.f64.p0.f64(ptr [[CAST]], double [[DATA]]) +; CHECK-NEXT: [[MAX:%.*]] = call double @llvm.amdgcn.flat.atomic.fmax.f64.p0.f64(ptr [[CAST]], double [[DATA]]) +; CHECK-NEXT: [[MIN:%.*]] = call double @llvm.amdgcn.flat.atomic.fmin.f64.p0.f64(ptr [[CAST]], double [[DATA]]) +; CHECK-NEXT: ret void +; + %cast = addrspacecast ptr addrspace(3) %ptr to ptr + %add = call double @llvm.amdgcn.flat.atomic.fadd.f64.p0.f64(ptr %cast, double %data) + %max = call double @llvm.amdgcn.flat.atomic.fmax.f64.p0.f64(ptr %cast, double %data) + %min = call double @llvm.amdgcn.flat.atomic.fmin.f64.p0.f64(ptr %cast, double %data) + ret void +} + +define amdgpu_kernel void @flat_atomic_fadd_f64_p4(ptr addrspace(4) %ptr, double %data) { +; CHECK-LABEL: define amdgpu_kernel void @flat_atomic_fadd_f64_p4 +; CHECK-SAME: (ptr addrspace(4) [[PTR:%.*]], double [[DATA:%.*]]) { +; CHECK-NEXT: [[ADD:%.*]] = call double @llvm.amdgcn.flat.atomic.fadd.f64.p4.f64(ptr addrspace(4) [[PTR]], double [[DATA]]) +; CHECK-NEXT: [[MAX:%.*]] = call double @llvm.amdgcn.flat.atomic.fmax.f64.p4.f64(ptr addrspace(4) [[PTR]], double [[DATA]]) +; CHECK-NEXT: [[MIN:%.*]] = call double @llvm.amdgcn.flat.atomic.fmin.f64.p4.f64(ptr addrspace(4) [[PTR]], double [[DATA]]) +; CHECK-NEXT: ret void +; + %cast = addrspacecast ptr addrspace(4) %ptr to ptr + %add = call double @llvm.amdgcn.flat.atomic.fadd.f64.p0.f64(ptr %cast, double %data) + %max = call double @llvm.amdgcn.flat.atomic.fmax.f64.p0.f64(ptr %cast, double %data) + %min = call double @llvm.amdgcn.flat.atomic.fmin.f64.p0.f64(ptr %cast, double %data) + ret void +} + +define amdgpu_kernel void @flat_atomic_fadd_f64_p5(ptr addrspace(5) %ptr, double %data) { +; CHECK-LABEL: define amdgpu_kernel void @flat_atomic_fadd_f64_p5 +; CHECK-SAME: (ptr addrspace(5) [[PTR:%.*]], double [[DATA:%.*]]) { +; CHECK-NEXT: [[CAST:%.*]] = addrspacecast ptr addrspace(5) [[PTR]] to ptr +; CHECK-NEXT: [[ADD:%.*]] = call double @llvm.amdgcn.flat.atomic.fadd.f64.p0.f64(ptr [[CAST]], double [[DATA]]) +; CHECK-NEXT: [[MAX:%.*]] = call double @llvm.amdgcn.flat.atomic.fmax.f64.p0.f64(ptr [[CAST]], double [[DATA]]) +; CHECK-NEXT: [[MIN:%.*]] = call double @llvm.amdgcn.flat.atomic.fmin.f64.p0.f64(ptr [[CAST]], double [[DATA]]) +; CHECK-NEXT: ret void +; + %cast = addrspacecast ptr addrspace(5) %ptr to ptr + %add = call double @llvm.amdgcn.flat.atomic.fadd.f64.p0.f64(ptr %cast, double %data) + %max = call double @llvm.amdgcn.flat.atomic.fmax.f64.p0.f64(ptr %cast, double %data) + %min = call double @llvm.amdgcn.flat.atomic.fmin.f64.p0.f64(ptr %cast, double %data) + ret void +} + +define amdgpu_kernel void @flat_atomic_fadd_f64_p6(ptr addrspace(6) %ptr, double %data) { +; CHECK-LABEL: define amdgpu_kernel void @flat_atomic_fadd_f64_p6 +; CHECK-SAME: (ptr addrspace(6) [[PTR:%.*]], double [[DATA:%.*]]) { +; CHECK-NEXT: [[ADD:%.*]] = call double @llvm.amdgcn.flat.atomic.fadd.f64.p6.f64(ptr addrspace(6) [[PTR]], double [[DATA]]) +; CHECK-NEXT: [[MAX:%.*]] = call double @llvm.amdgcn.flat.atomic.fmax.f64.p6.f64(ptr addrspace(6) [[PTR]], double [[DATA]]) +; CHECK-NEXT: [[MIN:%.*]] = call double @llvm.amdgcn.flat.atomic.fmin.f64.p6.f64(ptr addrspace(6) [[PTR]], double [[DATA]]) +; CHECK-NEXT: ret void +; + %cast = addrspacecast ptr addrspace(6) %ptr to ptr + %add = call double @llvm.amdgcn.flat.atomic.fadd.f64.p0.f64(ptr %cast, double %data) + %max = call double @llvm.amdgcn.flat.atomic.fmax.f64.p0.f64(ptr %cast, double %data) + %min = call double @llvm.amdgcn.flat.atomic.fmin.f64.p0.f64(ptr %cast, double %data) + ret void +} + +define amdgpu_kernel void @flat_atomic_fadd_f64_p7(ptr addrspace(7) %ptr, double %data) { +; CHECK-LABEL: define amdgpu_kernel void @flat_atomic_fadd_f64_p7 +; CHECK-SAME: (ptr addrspace(7) [[PTR:%.*]], double [[DATA:%.*]]) { +; CHECK-NEXT: [[CAST:%.*]] = addrspacecast ptr addrspace(7) [[PTR]] to ptr +; CHECK-NEXT: [[ADD:%.*]] = call double @llvm.amdgcn.flat.atomic.fadd.f64.p0.f64(ptr [[CAST]], double [[DATA]]) +; CHECK-NEXT: [[MAX:%.*]] = call double @llvm.amdgcn.flat.atomic.fmax.f64.p0.f64(ptr [[CAST]], double [[DATA]]) +; CHECK-NEXT: [[MIN:%.*]] = call double @llvm.amdgcn.flat.atomic.fmin.f64.p0.f64(ptr [[CAST]], double [[DATA]]) +; CHECK-NEXT: ret void +; + %cast = addrspacecast ptr addrspace(7) %ptr to ptr + %add = call double @llvm.amdgcn.flat.atomic.fadd.f64.p0.f64(ptr %cast, double %data) + %max = call double @llvm.amdgcn.flat.atomic.fmax.f64.p0.f64(ptr %cast, double %data) + %min = call double @llvm.amdgcn.flat.atomic.fmin.f64.p0.f64(ptr %cast, double %data) + ret void +} + +define amdgpu_kernel void @flat_atomic_fadd_f64_p99(ptr addrspace(99) %ptr, double %data) { +; CHECK-LABEL: define amdgpu_kernel void @flat_atomic_fadd_f64_p99 +; CHECK-SAME: (ptr addrspace(99) [[PTR:%.*]], double [[DATA:%.*]]) { +; CHECK-NEXT: [[ADD:%.*]] = call double @llvm.amdgcn.flat.atomic.fadd.f64.p99.f64(ptr addrspace(99) [[PTR]], double [[DATA]]) +; CHECK-NEXT: [[MAX:%.*]] = call double @llvm.amdgcn.flat.atomic.fmax.f64.p99.f64(ptr addrspace(99) [[PTR]], double [[DATA]]) +; CHECK-NEXT: [[MIN:%.*]] = call double @llvm.amdgcn.flat.atomic.fmin.f64.p99.f64(ptr addrspace(99) [[PTR]], double [[DATA]]) +; CHECK-NEXT: ret void +; + %cast = addrspacecast ptr addrspace(99) %ptr to ptr + %add = call double @llvm.amdgcn.flat.atomic.fadd.f64.p0.f64(ptr %cast, double %data) + %max = call double @llvm.amdgcn.flat.atomic.fmax.f64.p0.f64(ptr %cast, double %data) + %min = call double @llvm.amdgcn.flat.atomic.fmin.f64.p0.f64(ptr %cast, double %data) + ret void +}