Index: llvm/lib/CodeGen/GlobalISel/GISelKnownBits.cpp =================================================================== --- llvm/lib/CodeGen/GlobalISel/GISelKnownBits.cpp +++ llvm/lib/CodeGen/GlobalISel/GISelKnownBits.cpp @@ -34,6 +34,8 @@ Align GISelKnownBits::computeKnownAlignment(Register R, unsigned Depth) { const MachineInstr *MI = MRI.getVRegDef(R); switch (MI->getOpcode()) { + case TargetOpcode::COPY: + return computeKnownAlignment(MI->getOperand(1).getReg(), Depth); case TargetOpcode::G_FRAME_INDEX: { int FrameIdx = MI->getOperand(1).getIndex(); return MF.getFrameInfo().getObjectAlign(FrameIdx); Index: llvm/lib/Target/AMDGPU/SIISelLowering.h =================================================================== --- llvm/lib/Target/AMDGPU/SIISelLowering.h +++ llvm/lib/Target/AMDGPU/SIISelLowering.h @@ -408,6 +408,9 @@ KnownBits &Known, const MachineFunction &MF) const override; + Align computeKnownAlignForTargetInstr(GISelKnownBits &Analysis, Register R, + const MachineRegisterInfo &MRI, + unsigned Depth = 0) const override; bool isSDNodeSourceOfDivergence(const SDNode *N, FunctionLoweringInfo *FLI, LegacyDivergenceAnalysis *DA) const override; Index: llvm/lib/Target/AMDGPU/SIISelLowering.cpp =================================================================== --- llvm/lib/Target/AMDGPU/SIISelLowering.cpp +++ llvm/lib/Target/AMDGPU/SIISelLowering.cpp @@ -35,6 +35,7 @@ #include "llvm/CodeGen/CallingConvLower.h" #include "llvm/CodeGen/DAGCombine.h" #include "llvm/CodeGen/ISDOpcodes.h" +#include "llvm/CodeGen/GlobalISel/GISelKnownBits.h" #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunction.h" @@ -11217,6 +11218,27 @@ Known.Zero.setHighBits(getSubtarget()->getKnownHighZeroBitsForFrameIndex()); } +Align SITargetLowering::computeKnownAlignForTargetInstr( + GISelKnownBits &KB, Register R, const MachineRegisterInfo &MRI, + unsigned Depth) const { + const MachineInstr *MI = MRI.getVRegDef(R); + switch (MI->getOpcode()) { + case AMDGPU::G_INTRINSIC: + case AMDGPU::G_INTRINSIC_W_SIDE_EFFECTS: { + // FIXME: Can this move to generic code? What about the case where the call + // site specifies a lower alignment? + Intrinsic::ID IID = MI->getIntrinsicID(); + LLVMContext &Ctx = KB.getMachineFunction().getFunction().getContext(); + AttributeList Attrs = Intrinsic::getAttributes(Ctx, IID); + if (MaybeAlign RetAlign = Attrs.getRetAlignment()) + return *RetAlign; + return Align(1); + } + default: + return Align(1); + } +} + Align SITargetLowering::getPrefLoopAlignment(MachineLoop *ML) const { const Align PrefAlign = TargetLowering::getPrefLoopAlignment(ML); const Align CacheLineAlign = Align(64); Index: llvm/unittests/CodeGen/GlobalISel/KnownBitsTest.cpp =================================================================== --- llvm/unittests/CodeGen/GlobalISel/KnownBitsTest.cpp +++ llvm/unittests/CodeGen/GlobalISel/KnownBitsTest.cpp @@ -431,3 +431,35 @@ EXPECT_EQ(16u, Info.computeNumSignBits(CopyLoadUShort)); EXPECT_EQ(17u, Info.computeNumSignBits(CopyLoadSShort)); } + +TEST_F(AMDGPUGISelMITest, TestTargetKnownAlign) { + StringRef MIRString = + " %5:_(p4) = G_INTRINSIC intrinsic(@llvm.amdgcn.dispatch.ptr)\n" + " %6:_(p4) = COPY %5\n" + " %7:_(p4) = G_INTRINSIC intrinsic(@llvm.amdgcn.queue.ptr)\n" + " %8:_(p4) = COPY %7\n" + " %9:_(p4) = G_INTRINSIC intrinsic(@llvm.amdgcn.kernarg.segment.ptr)\n" + " %10:_(p4) = COPY %9\n" + " %11:_(p4) = G_INTRINSIC intrinsic(@llvm.amdgcn.implicitarg.ptr)\n" + " %12:_(p4) = COPY %11\n" + " %13:_(p4) = G_INTRINSIC intrinsic(@llvm.amdgcn.implicit.buffer.ptr)\n" + " %14:_(p4) = COPY %13\n"; + + setUp(MIRString); + if (!TM) + return; + + Register CopyDispatchPtr = Copies[Copies.size() - 5]; + Register CopyQueuePtr = Copies[Copies.size() - 4]; + Register CopyKernargSegmentPtr = Copies[Copies.size() - 3]; + Register CopyImplicitArgPtr = Copies[Copies.size() - 2]; + Register CopyImplicitBufferPtr = Copies[Copies.size() - 1]; + + GISelKnownBits Info(*MF); + + EXPECT_EQ(Align(4), Info.computeKnownAlignment(CopyDispatchPtr)); + EXPECT_EQ(Align(4), Info.computeKnownAlignment(CopyQueuePtr)); + EXPECT_EQ(Align(4), Info.computeKnownAlignment(CopyKernargSegmentPtr)); + EXPECT_EQ(Align(4), Info.computeKnownAlignment(CopyImplicitArgPtr)); + EXPECT_EQ(Align(4), Info.computeKnownAlignment(CopyImplicitBufferPtr)); +}