Index: llvm/include/llvm/CodeGen/GlobalISel/GISelKnownBits.h =================================================================== --- llvm/include/llvm/CodeGen/GlobalISel/GISelKnownBits.h +++ llvm/include/llvm/CodeGen/GlobalISel/GISelKnownBits.h @@ -69,18 +69,14 @@ /// predicate to simplify operations downstream. bool signBitIsZero(Register Op); - // FIXME: Is this the right place for G_FRAME_INDEX? Should it be in - // TargetLowering? - void computeKnownBitsForFrameIndex(Register R, KnownBits &Known, - const APInt &DemandedElts, - unsigned Depth = 0); - static Align inferAlignmentForFrameIdx(int FrameIdx, int Offset, - const MachineFunction &MF); static void computeKnownBitsForAlignment(KnownBits &Known, - MaybeAlign Alignment); + Align Alignment) { + // The low bits are known zero if the pointer is aligned. + Known.Zero.setLowBits(Log2(Alignment)); + } - // Try to infer alignment for MI. - static Align inferPtrAlignment(const MachineInstr &MI); + /// Return the known alignment for the pointer-like value \p R. + Align computeKnownAlignment(Register R, unsigned Depth = 0); // Observer API. No-op for non-caching implementation. void erasingInstr(MachineInstr &MI) override{}; Index: llvm/include/llvm/CodeGen/TargetLowering.h =================================================================== --- llvm/include/llvm/CodeGen/TargetLowering.h +++ llvm/include/llvm/CodeGen/TargetLowering.h @@ -3341,6 +3341,15 @@ const MachineRegisterInfo &MRI, unsigned Depth = 0) const; + /// Determine the known alignment for the pointer value \p R. This is can + /// typically be inferred from the number of low known 0 bits. However, for a + /// pointer with a non-integral address space, the alignment value may be + /// independent from the known low bits. + virtual Align computeKnownAlignForTargetInstr(GISelKnownBits &Analysis, + Register R, + const MachineRegisterInfo &MRI, + unsigned Depth = 0) const; + /// Determine which of the bits of FrameIndex \p FIOp are known to be 0. /// Default implementation computes low bits based on alignment /// information. This should preserve known bits passed into it. Index: llvm/lib/CodeGen/GlobalISel/GISelKnownBits.cpp =================================================================== --- llvm/lib/CodeGen/GlobalISel/GISelKnownBits.cpp +++ llvm/lib/CodeGen/GlobalISel/GISelKnownBits.cpp @@ -31,36 +31,22 @@ : MF(MF), MRI(MF.getRegInfo()), TL(*MF.getSubtarget().getTargetLowering()), DL(MF.getFunction().getParent()->getDataLayout()), MaxDepth(MaxDepth) {} -Align GISelKnownBits::inferAlignmentForFrameIdx(int FrameIdx, int Offset, - const MachineFunction &MF) { - const MachineFrameInfo &MFI = MF.getFrameInfo(); - return commonAlignment(MFI.getObjectAlign(FrameIdx), Offset); - // TODO: How to handle cases with Base + Offset? -} - -Align GISelKnownBits::inferPtrAlignment(const MachineInstr &MI) { - if (MI.getOpcode() == TargetOpcode::G_FRAME_INDEX) { - int FrameIdx = MI.getOperand(1).getIndex(); - return inferAlignmentForFrameIdx(FrameIdx, 0, *MI.getMF()); +Align GISelKnownBits::computeKnownAlignment(Register R, unsigned Depth) { + const MachineInstr *MI = MRI.getVRegDef(R); + switch (MI->getOpcode()) { + case TargetOpcode::G_FRAME_INDEX: { + int FrameIdx = MI->getOperand(1).getIndex(); + return MF.getFrameInfo().getObjectAlign(FrameIdx); + } + case TargetOpcode::G_INTRINSIC: + case TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS: + default: + return TL.computeKnownAlignForTargetInstr(*this, R, MRI, Depth + 1); } - return Align(1); -} - -void GISelKnownBits::computeKnownBitsForFrameIndex(Register R, KnownBits &Known, - const APInt &DemandedElts, - unsigned Depth) { - const MachineInstr &MI = *MRI.getVRegDef(R); - computeKnownBitsForAlignment(Known, inferPtrAlignment(MI)); -} - -void GISelKnownBits::computeKnownBitsForAlignment(KnownBits &Known, - MaybeAlign Alignment) { - if (Alignment) - // The low bits are known zero if the pointer is aligned. - Known.Zero.setLowBits(Log2(*Alignment)); } KnownBits GISelKnownBits::getKnownBits(MachineInstr &MI) { + assert(MI.getNumExplicitDefs() == 1); return getKnownBits(MI.getOperand(0).getReg()); } @@ -215,7 +201,8 @@ break; } case TargetOpcode::G_FRAME_INDEX: { - computeKnownBitsForFrameIndex(R, Known, DemandedElts); + int FrameIdx = MI.getOperand(1).getIndex(); + TL.computeKnownBitsForFrameIndex(FrameIdx, Known, MF); break; } case TargetOpcode::G_SUB: { Index: llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp =================================================================== --- llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp +++ llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp @@ -2832,6 +2832,12 @@ Known.Zero.setLowBits(Log2(MF.getFrameInfo().getObjectAlign(FrameIdx))); } +Align TargetLowering::computeKnownAlignForTargetInstr( + GISelKnownBits &Analysis, Register R, const MachineRegisterInfo &MRI, + unsigned Depth) const { + return Align(1); +} + /// This method can be implemented by targets that want to expose additional /// information about sign bits to the DAG Combiner. unsigned TargetLowering::ComputeNumSignBitsForTargetNode(SDValue Op, Index: llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-private.mir =================================================================== --- llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-private.mir +++ llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-private.mir @@ -793,10 +793,7 @@ bb.0: ; GFX6-LABEL: name: load_private_s32_from_1_fi_offset_4095 - ; GFX6: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 %stack.0, implicit $exec - ; GFX6: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4095, implicit $exec - ; GFX6: %2:vgpr_32, dead %4:sreg_64_xexec = V_ADD_I32_e64 [[V_MOV_B32_e32_]], [[V_MOV_B32_e32_1]], 0, implicit $exec - ; GFX6: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN %2, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 5) + ; GFX6: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN %stack.0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4095, 0, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 5) ; GFX6: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_OFFEN]] ; GFX9-LABEL: name: load_private_s32_from_1_fi_offset_4095 ; GFX9: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN %stack.0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4095, 0, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 5) Index: llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-store-private.mir =================================================================== --- llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-store-private.mir +++ llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-store-private.mir @@ -205,11 +205,8 @@ bb.0: ; GFX6-LABEL: name: function_store_private_s32_to_1_fi_offset_4095 - ; GFX6: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 %stack.0, implicit $exec - ; GFX6: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4095, implicit $exec - ; GFX6: %2:vgpr_32, dead %4:sreg_64_xexec = V_ADD_I32_e64 [[V_MOV_B32_e32_]], [[V_MOV_B32_e32_1]], 0, implicit $exec - ; GFX6: [[V_MOV_B32_e32_2:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec - ; GFX6: BUFFER_STORE_BYTE_OFFEN [[V_MOV_B32_e32_2]], %2, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 1, addrspace 5) + ; GFX6: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec + ; GFX6: BUFFER_STORE_BYTE_OFFEN [[V_MOV_B32_e32_]], %stack.0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4095, 0, 0, 0, 0, 0, implicit $exec :: (store 1, addrspace 5) ; GFX9-LABEL: name: function_store_private_s32_to_1_fi_offset_4095 ; GFX9: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec ; GFX9: BUFFER_STORE_BYTE_OFFEN [[V_MOV_B32_e32_]], %stack.0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4095, 0, 0, 0, 0, 0, implicit $exec :: (store 1, addrspace 5) @@ -477,11 +474,8 @@ ; GFX6-LABEL: name: kernel_store_private_s32_to_1_fi_offset_4095 ; GFX6: liveins: $sgpr0_sgpr1_sgpr2_sgpr3 - ; GFX6: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 %stack.0, implicit $exec - ; GFX6: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4095, implicit $exec - ; GFX6: %2:vgpr_32, dead %4:sreg_64_xexec = V_ADD_I32_e64 [[V_MOV_B32_e32_]], [[V_MOV_B32_e32_1]], 0, implicit $exec - ; GFX6: [[V_MOV_B32_e32_2:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec - ; GFX6: BUFFER_STORE_BYTE_OFFEN [[V_MOV_B32_e32_2]], %2, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 1, addrspace 5) + ; GFX6: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec + ; GFX6: BUFFER_STORE_BYTE_OFFEN [[V_MOV_B32_e32_]], %stack.0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 4095, 0, 0, 0, 0, 0, implicit $exec :: (store 1, addrspace 5) ; GFX9-LABEL: name: kernel_store_private_s32_to_1_fi_offset_4095 ; GFX9: liveins: $sgpr0_sgpr1_sgpr2_sgpr3 ; GFX9: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec