Diff 519771

llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp

Show First 20 Lines • Show All 1,075 Lines • ▼ Show 20 Lines	case Intrinsic::ptrmask: {
}		}

return B.CreateIntrinsic(Intrinsic::ptrmask, {NewV->getType(), MaskTy},		return B.CreateIntrinsic(Intrinsic::ptrmask, {NewV->getType(), MaskTy},
{NewV, MaskOp});		{NewV, MaskOp});
}		}
case Intrinsic::amdgcn_flat_atomic_fadd:		case Intrinsic::amdgcn_flat_atomic_fadd:
case Intrinsic::amdgcn_flat_atomic_fmax:		case Intrinsic::amdgcn_flat_atomic_fmax:
case Intrinsic::amdgcn_flat_atomic_fmin: {		case Intrinsic::amdgcn_flat_atomic_fmin: {
		unsigned OldAS = OldV->getType()->getPointerAddressSpace();
		unsigned NewAS = NewV->getType()->getPointerAddressSpace();
		const GCNTargetMachine &TM =
		static_cast<const GCNTargetMachine &>(getTLI()->getTargetMachine());
		if (!TM.isNoopAddrSpaceCast(OldAS, NewAS))
		return nullptr;
		arsenmUnsubmitted Done Reply Inline Actions But the not-noop address space casts are the cases we're interested in handling. Do you mean to check for valid casts? arsenm: But the not-noop address space casts are the cases we're interested in handling. Do you mean to…
		jmmartinezAuthorUnsubmitted Done Reply Inline Actions The problem is that currently we cannot lower this builtin for other address spaces other than flat. I agree that the rewrite of this intrinsic loses almost all interest for this intrinsics. Maybe we shouldn't even rewrite them at all, which would simplify the code. Handling CONSTANT_ADDRESS_32BIT gets awkward since it's not handled in isFlatGlobalAddrSpace. I modified isFlatGlobalAddrSpace to take it into account but I've got several fails in other tests (I haven't looked at the details yet). jmmartinez: The problem is that currently we cannot lower this builtin for other address spaces other than…
		jmmartinezAuthorUnsubmitted Done Reply Inline Actions Ping jmmartinez: Ping
Module *M = II->getParent()->getParent()->getParent();		Module *M = II->getParent()->getParent()->getParent();
Type *DestTy = II->getType();		Type *DestTy = II->getType();
Type *SrcTy = NewV->getType();		Type *SrcTy = NewV->getType();
Function *NewDecl = Intrinsic::getDeclaration(M, II->getIntrinsicID(),		Function *NewDecl = Intrinsic::getDeclaration(M, II->getIntrinsicID(),
		arsenmUnsubmitted Done Reply Inline Actions Is this not just isExtendedGlobalAddrSpace? arsenm: Is this not just isExtendedGlobalAddrSpace?
{DestTy, SrcTy, DestTy});		{DestTy, SrcTy, DestTy});
II->setArgOperand(0, NewV);		II->setArgOperand(0, NewV);
II->setCalledFunction(NewDecl);		II->setCalledFunction(NewDecl);
return II;		return II;
		arsenmUnsubmitted Done Reply Inline Actions I don't understand why you are checking both address spaces. The old address space had to have been flat? arsenm: I don't understand why you are checking both address spaces. The old address space had to have…
		jmmartinezAuthorUnsubmitted Done Reply Inline Actions You're right. I fixed the code to match. jmmartinez: You're right. I fixed the code to match.
}		}
default:		default:
return nullptr;		return nullptr;
}		}
}		}

InstructionCost GCNTTIImpl::getShuffleCost(TTI::ShuffleKind Kind,		InstructionCost GCNTTIImpl::getShuffleCost(TTI::ShuffleKind Kind,
VectorType *VT, ArrayRef<int> Mask,		VectorType *VT, ArrayRef<int> Mask,
▲ Show 20 Lines • Show All 172 Lines • Show Last 20 Lines

llvm/test/Transforms/InferAddressSpaces/AMDGPU/flat-fadd-fmin-fmax-intrinsics.ll

This file was added.

				; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 2
				; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -passes=infer-address-spaces %s \| FileCheck %s

				declare float @llvm.amdgcn.flat.atomic.fadd.f32.p0.f32(ptr %ptr, float %data)
				arsenmUnsubmitted Done Reply Inline Actions Can you precommit the test to show the diff? arsenm: Can you precommit the test to show the diff?
				jmmartinezAuthorUnsubmitted Done Reply Inline Actions I've added https://reviews.llvm.org/D150259 as a parent patch. jmmartinez: I've added https://reviews.llvm.org/D150259 as a parent patch.
				declare float @llvm.amdgcn.flat.atomic.fmax.f32.p0.f32(ptr %ptr, float %data)
				declare float @llvm.amdgcn.flat.atomic.fmin.f32.p0.f32(ptr %ptr, float %data)

				define amdgpu_kernel void @flat_atomic_fadd_f32_p1(ptr addrspace(1) %ptr, float %data) {
				; CHECK-LABEL: define amdgpu_kernel void @flat_atomic_fadd_f32_p1
				; CHECK-SAME: (ptr addrspace(1) [[PTR:%.]], float [[DATA:%.]]) {
				; CHECK-NEXT: [[ADD:%.*]] = call float @llvm.amdgcn.flat.atomic.fadd.f32.p1.f32(ptr addrspace(1) [[PTR]], float [[DATA]])
				; CHECK-NEXT: [[MAX:%.*]] = call float @llvm.amdgcn.flat.atomic.fmax.f32.p1.f32(ptr addrspace(1) [[PTR]], float [[DATA]])
				; CHECK-NEXT: [[MIN:%.*]] = call float @llvm.amdgcn.flat.atomic.fmin.f32.p1.f32(ptr addrspace(1) [[PTR]], float [[DATA]])
				; CHECK-NEXT: ret void
				;
				%cast = addrspacecast ptr addrspace(1) %ptr to ptr
				%add = call float @llvm.amdgcn.flat.atomic.fadd.f32.p0.f32(ptr %cast, float %data)
				%max = call float @llvm.amdgcn.flat.atomic.fmax.f32.p0.f32(ptr %cast, float %data)
				%min = call float @llvm.amdgcn.flat.atomic.fmin.f32.p0.f32(ptr %cast, float %data)
				ret void
				}

				define amdgpu_kernel void @flat_atomic_fadd_f32_p2(ptr addrspace(2) %ptr, float %data) {
				; CHECK-LABEL: define amdgpu_kernel void @flat_atomic_fadd_f32_p2
				; CHECK-SAME: (ptr addrspace(2) [[PTR:%.]], float [[DATA:%.]]) {
				; CHECK-NEXT: [[CAST:%.*]] = addrspacecast ptr addrspace(2) [[PTR]] to ptr
				; CHECK-NEXT: [[ADD:%.*]] = call float @llvm.amdgcn.flat.atomic.fadd.f32.p0.f32(ptr [[CAST]], float [[DATA]])
				; CHECK-NEXT: [[MAX:%.*]] = call float @llvm.amdgcn.flat.atomic.fmax.f32.p0.f32(ptr [[CAST]], float [[DATA]])
				; CHECK-NEXT: [[MIN:%.*]] = call float @llvm.amdgcn.flat.atomic.fmin.f32.p0.f32(ptr [[CAST]], float [[DATA]])
				; CHECK-NEXT: ret void
				;
				%cast = addrspacecast ptr addrspace(2) %ptr to ptr
				%add = call float @llvm.amdgcn.flat.atomic.fadd.f32.p0.f32(ptr %cast, float %data)
				%max = call float @llvm.amdgcn.flat.atomic.fmax.f32.p0.f32(ptr %cast, float %data)
				%min = call float @llvm.amdgcn.flat.atomic.fmin.f32.p0.f32(ptr %cast, float %data)
				ret void
				}

				define amdgpu_kernel void @flat_atomic_fadd_f32_p3(ptr addrspace(3) %ptr, float %data) {
				; CHECK-LABEL: define amdgpu_kernel void @flat_atomic_fadd_f32_p3
				; CHECK-SAME: (ptr addrspace(3) [[PTR:%.]], float [[DATA:%.]]) {
				; CHECK-NEXT: [[CAST:%.*]] = addrspacecast ptr addrspace(3) [[PTR]] to ptr
				; CHECK-NEXT: [[ADD:%.*]] = call float @llvm.amdgcn.flat.atomic.fadd.f32.p0.f32(ptr [[CAST]], float [[DATA]])
				; CHECK-NEXT: [[MAX:%.*]] = call float @llvm.amdgcn.flat.atomic.fmax.f32.p0.f32(ptr [[CAST]], float [[DATA]])
				; CHECK-NEXT: [[MIN:%.*]] = call float @llvm.amdgcn.flat.atomic.fmin.f32.p0.f32(ptr [[CAST]], float [[DATA]])
				; CHECK-NEXT: ret void
				;
				%cast = addrspacecast ptr addrspace(3) %ptr to ptr
				%add = call float @llvm.amdgcn.flat.atomic.fadd.f32.p0.f32(ptr %cast, float %data)
				%max = call float @llvm.amdgcn.flat.atomic.fmax.f32.p0.f32(ptr %cast, float %data)
				%min = call float @llvm.amdgcn.flat.atomic.fmin.f32.p0.f32(ptr %cast, float %data)
				ret void
				}

				define amdgpu_kernel void @flat_atomic_fadd_f32_p4(ptr addrspace(4) %ptr, float %data) {
				; CHECK-LABEL: define amdgpu_kernel void @flat_atomic_fadd_f32_p4
				; CHECK-SAME: (ptr addrspace(4) [[PTR:%.]], float [[DATA:%.]]) {
				; CHECK-NEXT: [[ADD:%.*]] = call float @llvm.amdgcn.flat.atomic.fadd.f32.p4.f32(ptr addrspace(4) [[PTR]], float [[DATA]])
				; CHECK-NEXT: [[MAX:%.*]] = call float @llvm.amdgcn.flat.atomic.fmax.f32.p4.f32(ptr addrspace(4) [[PTR]], float [[DATA]])
				; CHECK-NEXT: [[MIN:%.*]] = call float @llvm.amdgcn.flat.atomic.fmin.f32.p4.f32(ptr addrspace(4) [[PTR]], float [[DATA]])
				; CHECK-NEXT: ret void
				;
				%cast = addrspacecast ptr addrspace(4) %ptr to ptr
				%add = call float @llvm.amdgcn.flat.atomic.fadd.f32.p0.f32(ptr %cast, float %data)
				%max = call float @llvm.amdgcn.flat.atomic.fmax.f32.p0.f32(ptr %cast, float %data)
				%min = call float @llvm.amdgcn.flat.atomic.fmin.f32.p0.f32(ptr %cast, float %data)
				ret void
				}

				define amdgpu_kernel void @flat_atomic_fadd_f32_p5(ptr addrspace(5) %ptr, float %data) {
				; CHECK-LABEL: define amdgpu_kernel void @flat_atomic_fadd_f32_p5
				; CHECK-SAME: (ptr addrspace(5) [[PTR:%.]], float [[DATA:%.]]) {
				; CHECK-NEXT: [[CAST:%.*]] = addrspacecast ptr addrspace(5) [[PTR]] to ptr
				; CHECK-NEXT: [[ADD:%.*]] = call float @llvm.amdgcn.flat.atomic.fadd.f32.p0.f32(ptr [[CAST]], float [[DATA]])
				; CHECK-NEXT: [[MAX:%.*]] = call float @llvm.amdgcn.flat.atomic.fmax.f32.p0.f32(ptr [[CAST]], float [[DATA]])
				; CHECK-NEXT: [[MIN:%.*]] = call float @llvm.amdgcn.flat.atomic.fmin.f32.p0.f32(ptr [[CAST]], float [[DATA]])
				; CHECK-NEXT: ret void
				;
				%cast = addrspacecast ptr addrspace(5) %ptr to ptr
				%add = call float @llvm.amdgcn.flat.atomic.fadd.f32.p0.f32(ptr %cast, float %data)
				%max = call float @llvm.amdgcn.flat.atomic.fmax.f32.p0.f32(ptr %cast, float %data)
				%min = call float @llvm.amdgcn.flat.atomic.fmin.f32.p0.f32(ptr %cast, float %data)
				ret void
				}

				declare double @llvm.amdgcn.flat.atomic.fadd.f64.p0.f64(ptr %ptr, double %data)
				declare double @llvm.amdgcn.flat.atomic.fmax.f64.p0.f64(ptr %ptr, double %data)
				declare double @llvm.amdgcn.flat.atomic.fmin.f64.p0.f64(ptr %ptr, double %data)

				define amdgpu_kernel void @flat_atomic_fadd_f64_p1(ptr addrspace(1) %ptr, double %data) {
				; CHECK-LABEL: define amdgpu_kernel void @flat_atomic_fadd_f64_p1
				; CHECK-SAME: (ptr addrspace(1) [[PTR:%.]], double [[DATA:%.]]) {
				; CHECK-NEXT: [[ADD:%.*]] = call double @llvm.amdgcn.flat.atomic.fadd.f64.p1.f64(ptr addrspace(1) [[PTR]], double [[DATA]])
				; CHECK-NEXT: [[MAX:%.*]] = call double @llvm.amdgcn.flat.atomic.fmax.f64.p1.f64(ptr addrspace(1) [[PTR]], double [[DATA]])
				; CHECK-NEXT: [[MIN:%.*]] = call double @llvm.amdgcn.flat.atomic.fmin.f64.p1.f64(ptr addrspace(1) [[PTR]], double [[DATA]])
				; CHECK-NEXT: ret void
				;
				%cast = addrspacecast ptr addrspace(1) %ptr to ptr
				%add = call double @llvm.amdgcn.flat.atomic.fadd.f64.p0.f64(ptr %cast, double %data)
				%max = call double @llvm.amdgcn.flat.atomic.fmax.f64.p0.f64(ptr %cast, double %data)
				%min = call double @llvm.amdgcn.flat.atomic.fmin.f64.p0.f64(ptr %cast, double %data)
				ret void
				}

				define amdgpu_kernel void @flat_atomic_fadd_f64_p2(ptr addrspace(2) %ptr, double %data) {
				; CHECK-LABEL: define amdgpu_kernel void @flat_atomic_fadd_f64_p2
				; CHECK-SAME: (ptr addrspace(2) [[PTR:%.]], double [[DATA:%.]]) {
				; CHECK-NEXT: [[CAST:%.*]] = addrspacecast ptr addrspace(2) [[PTR]] to ptr
				; CHECK-NEXT: [[ADD:%.*]] = call double @llvm.amdgcn.flat.atomic.fadd.f64.p0.f64(ptr [[CAST]], double [[DATA]])
				; CHECK-NEXT: [[MAX:%.*]] = call double @llvm.amdgcn.flat.atomic.fmax.f64.p0.f64(ptr [[CAST]], double [[DATA]])
				; CHECK-NEXT: [[MIN:%.*]] = call double @llvm.amdgcn.flat.atomic.fmin.f64.p0.f64(ptr [[CAST]], double [[DATA]])
				; CHECK-NEXT: ret void
				;
				%cast = addrspacecast ptr addrspace(2) %ptr to ptr
				%add = call double @llvm.amdgcn.flat.atomic.fadd.f64.p0.f64(ptr %cast, double %data)
				%max = call double @llvm.amdgcn.flat.atomic.fmax.f64.p0.f64(ptr %cast, double %data)
				%min = call double @llvm.amdgcn.flat.atomic.fmin.f64.p0.f64(ptr %cast, double %data)
				ret void
				}

				define amdgpu_kernel void @flat_atomic_fadd_f64_p3(ptr addrspace(3) %ptr, double %data) {
				; CHECK-LABEL: define amdgpu_kernel void @flat_atomic_fadd_f64_p3
				; CHECK-SAME: (ptr addrspace(3) [[PTR:%.]], double [[DATA:%.]]) {
				; CHECK-NEXT: [[CAST:%.*]] = addrspacecast ptr addrspace(3) [[PTR]] to ptr
				; CHECK-NEXT: [[ADD:%.*]] = call double @llvm.amdgcn.flat.atomic.fadd.f64.p0.f64(ptr [[CAST]], double [[DATA]])
				; CHECK-NEXT: [[MAX:%.*]] = call double @llvm.amdgcn.flat.atomic.fmax.f64.p0.f64(ptr [[CAST]], double [[DATA]])
				; CHECK-NEXT: [[MIN:%.*]] = call double @llvm.amdgcn.flat.atomic.fmin.f64.p0.f64(ptr [[CAST]], double [[DATA]])
				; CHECK-NEXT: ret void
				;
				%cast = addrspacecast ptr addrspace(3) %ptr to ptr
				%add = call double @llvm.amdgcn.flat.atomic.fadd.f64.p0.f64(ptr %cast, double %data)
				%max = call double @llvm.amdgcn.flat.atomic.fmax.f64.p0.f64(ptr %cast, double %data)
				%min = call double @llvm.amdgcn.flat.atomic.fmin.f64.p0.f64(ptr %cast, double %data)
				ret void
				}

				define amdgpu_kernel void @flat_atomic_fadd_f64_p4(ptr addrspace(4) %ptr, double %data) {
				; CHECK-LABEL: define amdgpu_kernel void @flat_atomic_fadd_f64_p4
				; CHECK-SAME: (ptr addrspace(4) [[PTR:%.]], double [[DATA:%.]]) {
				; CHECK-NEXT: [[ADD:%.*]] = call double @llvm.amdgcn.flat.atomic.fadd.f64.p4.f64(ptr addrspace(4) [[PTR]], double [[DATA]])
				; CHECK-NEXT: [[MAX:%.*]] = call double @llvm.amdgcn.flat.atomic.fmax.f64.p4.f64(ptr addrspace(4) [[PTR]], double [[DATA]])
				; CHECK-NEXT: [[MIN:%.*]] = call double @llvm.amdgcn.flat.atomic.fmin.f64.p4.f64(ptr addrspace(4) [[PTR]], double [[DATA]])
				; CHECK-NEXT: ret void
				;
				%cast = addrspacecast ptr addrspace(4) %ptr to ptr
				%add = call double @llvm.amdgcn.flat.atomic.fadd.f64.p0.f64(ptr %cast, double %data)
				%max = call double @llvm.amdgcn.flat.atomic.fmax.f64.p0.f64(ptr %cast, double %data)
				%min = call double @llvm.amdgcn.flat.atomic.fmin.f64.p0.f64(ptr %cast, double %data)
				ret void
				}

				define amdgpu_kernel void @flat_atomic_fadd_f64_p5(ptr addrspace(5) %ptr, double %data) {
				; CHECK-LABEL: define amdgpu_kernel void @flat_atomic_fadd_f64_p5
				; CHECK-SAME: (ptr addrspace(5) [[PTR:%.]], double [[DATA:%.]]) {
				; CHECK-NEXT: [[CAST:%.*]] = addrspacecast ptr addrspace(5) [[PTR]] to ptr
				; CHECK-NEXT: [[ADD:%.*]] = call double @llvm.amdgcn.flat.atomic.fadd.f64.p0.f64(ptr [[CAST]], double [[DATA]])
				; CHECK-NEXT: [[MAX:%.*]] = call double @llvm.amdgcn.flat.atomic.fmax.f64.p0.f64(ptr [[CAST]], double [[DATA]])
				; CHECK-NEXT: [[MIN:%.*]] = call double @llvm.amdgcn.flat.atomic.fmin.f64.p0.f64(ptr [[CAST]], double [[DATA]])
				; CHECK-NEXT: ret void
				;
				%cast = addrspacecast ptr addrspace(5) %ptr to ptr
				%add = call double @llvm.amdgcn.flat.atomic.fadd.f64.p0.f64(ptr %cast, double %data)
				%max = call double @llvm.amdgcn.flat.atomic.fmax.f64.p0.f64(ptr %cast, double %data)
				%min = call double @llvm.amdgcn.flat.atomic.fmin.f64.p0.f64(ptr %cast, double %data)
				ret void
				}
				arsenmUnsubmitted Done Reply Inline Actions Add a test with a random number, other unknown address spaces should generally act like global. Also test the new fat pointer address spaces? arsenm: Add a test with a random number, other unknown address spaces should generally act like global.
				arsenmUnsubmitted Done Reply Inline Actions Also constant 32 bit arsenm: Also constant 32 bit

This is an archive of the discontinued LLVM Phabricator instance.

[AMDGPU][InferAddressSpaces] Only rewrite address-spaces that can be trivially casted to flat for llvm.amdgcn.flat.atomic.{fadd,fmax,fmin} (2/2)
ClosedPublic

Details

Diff Detail

Event Timeline

Revision Contents

Diff 519771

llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp

llvm/test/Transforms/InferAddressSpaces/AMDGPU/flat-fadd-fmin-fmax-intrinsics.ll

This is an archive of the discontinued LLVM Phabricator instance.

[AMDGPU][InferAddressSpaces] Only rewrite address-spaces that can be trivially casted to flat for llvm.amdgcn.flat.atomic.{fadd,fmax,fmin} (2/2)ClosedPublic

Details

Diff Detail

Event Timeline

Revision Contents

Diff 519771

llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp

llvm/test/Transforms/InferAddressSpaces/AMDGPU/flat-fadd-fmin-fmax-intrinsics.ll

[AMDGPU][InferAddressSpaces] Only rewrite address-spaces that can be trivially casted to flat for llvm.amdgcn.flat.atomic.{fadd,fmax,fmin} (2/2)
ClosedPublic