Index: lib/Target/AMDGPU/SIISelLowering.h =================================================================== --- lib/Target/AMDGPU/SIISelLowering.h +++ lib/Target/AMDGPU/SIISelLowering.h @@ -117,11 +117,15 @@ const SISubtarget *getSubtarget() const; + bool isShuffleMaskLegal(const SmallVectorImpl &/*Mask*/, + EVT /*VT*/) const override; + bool getTgtMemIntrinsic(IntrinsicInfo &, const CallInst &, unsigned IntrinsicID) const override; - bool isShuffleMaskLegal(const SmallVectorImpl &/*Mask*/, - EVT /*VT*/) const override; + bool GetAddrModeArguments(IntrinsicInst * /*I*/, + SmallVectorImpl &/*Ops*/, + Type *&/*AccessTy*/) const override; bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, Type *Ty, unsigned AS) const override; Index: lib/Target/AMDGPU/SIISelLowering.cpp =================================================================== --- lib/Target/AMDGPU/SIISelLowering.cpp +++ lib/Target/AMDGPU/SIISelLowering.cpp @@ -60,6 +60,7 @@ #include "llvm/IR/InstrTypes.h" #include "llvm/IR/Instruction.h" #include "llvm/IR/Instructions.h" +#include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/Type.h" #include "llvm/Support/Casting.h" #include "llvm/Support/CodeGen.h" @@ -417,6 +418,13 @@ // TargetLowering queries //===----------------------------------------------------------------------===// +bool SITargetLowering::isShuffleMaskLegal(const SmallVectorImpl &, + EVT) const { + // SI has some legal vector types, but no legal vector operations. Say no + // shuffles are legal in order to prefer scalarizing some vector operations. + return false; +} + bool SITargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info, const CallInst &CI, unsigned IntrID) const { @@ -436,11 +444,20 @@ } } -bool SITargetLowering::isShuffleMaskLegal(const SmallVectorImpl &, - EVT) const { - // SI has some legal vector types, but no legal vector operations. Say no - // shuffles are legal in order to prefer scalarizing some vector operations. - return false; +bool SITargetLowering::GetAddrModeArguments(IntrinsicInst *II, + SmallVectorImpl &Ops, + Type *&AccessTy) const { + switch (II->getIntrinsicID()) { + case Intrinsic::amdgcn_atomic_inc: + case Intrinsic::amdgcn_atomic_dec: { + Value *Ptr = II->getArgOperand(0); + AccessTy = II->getType(); + Ops.push_back(Ptr); + return true; + } + default: + return false; + } } bool SITargetLowering::isLegalFlatAddressingMode(const AddrMode &AM) const { Index: test/CodeGen/AMDGPU/cgp-addressing-modes.ll =================================================================== --- test/CodeGen/AMDGPU/cgp-addressing-modes.ll +++ test/CodeGen/AMDGPU/cgp-addressing-modes.ll @@ -581,7 +581,62 @@ ret void } +; OPT-LABEL: @test_sink_local_small_offset_atomic_inc_i32( +; OPT: %sunkaddr = ptrtoint i32 addrspace(3)* %in to i32 +; OPT: %sunkaddr1 = add i32 %sunkaddr, 28 +; OPT: %sunkaddr2 = inttoptr i32 %sunkaddr1 to i32 addrspace(3)* +; OPT: %tmp1 = call i32 @llvm.amdgcn.atomic.inc.i32.p3i32(i32 addrspace(3)* %sunkaddr2, i32 2) +define void @test_sink_local_small_offset_atomic_inc_i32(i32 addrspace(3)* %out, i32 addrspace(3)* %in) { +entry: + %out.gep = getelementptr i32, i32 addrspace(3)* %out, i32 999999 + %in.gep = getelementptr i32, i32 addrspace(3)* %in, i32 7 + %tid = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) #0 + %tmp0 = icmp eq i32 %tid, 0 + br i1 %tmp0, label %endif, label %if + +if: + %tmp1 = call i32 @llvm.amdgcn.atomic.inc.i32.p3i32(i32 addrspace(3)* %in.gep, i32 2) + br label %endif + +endif: + %x = phi i32 [ %tmp1, %if ], [ 0, %entry ] + store i32 %x, i32 addrspace(3)* %out.gep + br label %done + +done: + ret void +} + +; OPT-LABEL: @test_sink_local_small_offset_atomic_dec_i32( +; OPT: %sunkaddr = ptrtoint i32 addrspace(3)* %in to i32 +; OPT: %sunkaddr1 = add i32 %sunkaddr, 28 +; OPT: %sunkaddr2 = inttoptr i32 %sunkaddr1 to i32 addrspace(3)* +; OPT: %tmp1 = call i32 @llvm.amdgcn.atomic.dec.i32.p3i32(i32 addrspace(3)* %sunkaddr2, i32 2) +define void @test_sink_local_small_offset_atomic_dec_i32(i32 addrspace(3)* %out, i32 addrspace(3)* %in) { +entry: + %out.gep = getelementptr i32, i32 addrspace(3)* %out, i32 999999 + %in.gep = getelementptr i32, i32 addrspace(3)* %in, i32 7 + %tid = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) #0 + %tmp0 = icmp eq i32 %tid, 0 + br i1 %tmp0, label %endif, label %if + +if: + %tmp1 = call i32 @llvm.amdgcn.atomic.dec.i32.p3i32(i32 addrspace(3)* %in.gep, i32 2) + br label %endif + +endif: + %x = phi i32 [ %tmp1, %if ], [ 0, %entry ] + store i32 %x, i32 addrspace(3)* %out.gep + br label %done + +done: + ret void +} + declare i32 @llvm.amdgcn.mbcnt.lo(i32, i32) #0 +declare i32 @llvm.amdgcn.atomic.inc.i32.p3i32(i32 addrspace(3)* nocapture, i32) #2 +declare i32 @llvm.amdgcn.atomic.dec.i32.p3i32(i32 addrspace(3)* nocapture, i32) #2 attributes #0 = { nounwind readnone } attributes #1 = { nounwind } +attributes #2 = { nounwind argmemonly }