Index: include/llvm/CodeGen/GlobalISel/InstructionSelector.h =================================================================== --- include/llvm/CodeGen/GlobalISel/InstructionSelector.h +++ include/llvm/CodeGen/GlobalISel/InstructionSelector.h @@ -341,6 +341,12 @@ bool isOperandImmEqual(const MachineOperand &MO, int64_t Value, const MachineRegisterInfo &MRI) const; + /// Return true if the specified operand is a G_GEP with a G_CONSTANT on the + /// right-hand side. GlobalISel's separation of pointer and integer types + /// means that we don't need to worry about G_OR with equivalent semantics. + bool isBaseWithConstantOffset(const MachineOperand &Root, + const MachineRegisterInfo &MRI) const; + bool isObviouslySafeToFold(MachineInstr &MI) const; }; Index: include/llvm/CodeGen/GlobalISel/InstructionSelectorImpl.h =================================================================== --- include/llvm/CodeGen/GlobalISel/InstructionSelectorImpl.h +++ include/llvm/CodeGen/GlobalISel/InstructionSelectorImpl.h @@ -248,10 +248,18 @@ int64_t InsnID = MatchTable[CurrentIdx++]; int64_t OpIdx = MatchTable[CurrentIdx++]; int64_t SizeInBits = MatchTable[CurrentIdx++]; + DEBUG(dbgs() << CurrentIdx << ": GIM_CheckPointerToAny(MIs[" << InsnID << "]->getOperand(" << OpIdx << "), SizeInBits=" << SizeInBits << ")\n"); assert(State.MIs[InsnID] != nullptr && "Used insn before defined"); + + // iPTR must be looked up in the target. + if (SizeInBits == 0) + SizeInBits = State.MIs[InsnID]->getParent()->getParent()->getDataLayout().getPointerSizeInBits(0); + + assert(SizeInBits != 0 && "Pointer size must be known"); + const LLT &Ty = MRI.getType(State.MIs[InsnID]->getOperand(OpIdx).getReg()); if (!Ty.isPointer() || Ty.getSizeInBits() != SizeInBits) { if (handleReject() == RejectAndGiveUp) Index: lib/CodeGen/GlobalISel/InstructionSelector.cpp =================================================================== --- lib/CodeGen/GlobalISel/InstructionSelector.cpp +++ lib/CodeGen/GlobalISel/InstructionSelector.cpp @@ -18,6 +18,7 @@ #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineOperand.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/MC/MCInstrDesc.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" @@ -98,6 +99,23 @@ return false; } +bool InstructionSelector::isBaseWithConstantOffset( + const MachineOperand &Root, const MachineRegisterInfo &MRI) const { + if (!Root.isReg()) + return false; + + MachineInstr *RootI = MRI.getVRegDef(Root.getReg()); + if (RootI->getOpcode() != TargetOpcode::G_GEP) + return false; + + MachineOperand &RHS = RootI->getOperand(2); + MachineInstr *RHSI = MRI.getVRegDef(RHS.getReg()); + if (RHSI->getOpcode() != TargetOpcode::G_CONSTANT) + return false; + + return true; +} + bool InstructionSelector::isObviouslySafeToFold(MachineInstr &MI) const { return !MI.mayLoadOrStore() && !MI.hasUnmodeledSideEffects() && MI.implicit_operands().begin() == MI.implicit_operands().end(); Index: lib/Target/AArch64/AArch64InstrFormats.td =================================================================== --- lib/Target/AArch64/AArch64InstrFormats.td +++ lib/Target/AArch64/AArch64InstrFormats.td @@ -2516,6 +2516,22 @@ def am_indexed64 : ComplexPattern; def am_indexed128 : ComplexPattern; +def gi_am_indexed8 : + GIComplexOperandMatcher">, + GIComplexPatternEquiv; +def gi_am_indexed16 : + GIComplexOperandMatcher">, + GIComplexPatternEquiv; +def gi_am_indexed32 : + GIComplexOperandMatcher">, + GIComplexPatternEquiv; +def gi_am_indexed64 : + GIComplexOperandMatcher">, + GIComplexPatternEquiv; +def gi_am_indexed128 : + GIComplexOperandMatcher">, + GIComplexPatternEquiv; + class UImm12OffsetOperand : AsmOperandClass { let Name = "UImm12Offset" # Scale; let RenderMethod = "addUImm12OffsetOperands<" # Scale # ">"; @@ -3146,6 +3162,23 @@ def am_unscaled64 : ComplexPattern; def am_unscaled128 :ComplexPattern; +def gi_am_unscaled8 : + GIComplexOperandMatcher, + GIComplexPatternEquiv; +def gi_am_unscaled16 : + GIComplexOperandMatcher, + GIComplexPatternEquiv; +def gi_am_unscaled32 : + GIComplexOperandMatcher, + GIComplexPatternEquiv; +def gi_am_unscaled64 : + GIComplexOperandMatcher, + GIComplexPatternEquiv; +def gi_am_unscaled128 : + GIComplexOperandMatcher, + GIComplexPatternEquiv; + + class BaseLoadStoreUnscale sz, bit V, bits<2> opc, dag oops, dag iops, string asm, list pattern> : I { Index: lib/Target/AArch64/AArch64InstructionSelector.cpp =================================================================== --- lib/Target/AArch64/AArch64InstructionSelector.cpp +++ lib/Target/AArch64/AArch64InstructionSelector.cpp @@ -66,6 +66,32 @@ ComplexRendererFn selectArithImmed(MachineOperand &Root) const; + ComplexRendererFn selectAddrModeUnscaled(MachineOperand &Root, + unsigned Size) const; + + ComplexRendererFn selectAddrModeUnscaled8(MachineOperand &Root) const { + return selectAddrModeUnscaled(Root, 1); + } + ComplexRendererFn selectAddrModeUnscaled16(MachineOperand &Root) const { + return selectAddrModeUnscaled(Root, 2); + } + ComplexRendererFn selectAddrModeUnscaled32(MachineOperand &Root) const { + return selectAddrModeUnscaled(Root, 4); + } + ComplexRendererFn selectAddrModeUnscaled64(MachineOperand &Root) const { + return selectAddrModeUnscaled(Root, 8); + } + ComplexRendererFn selectAddrModeUnscaled128(MachineOperand &Root) const { + return selectAddrModeUnscaled(Root, 16); + } + + ComplexRendererFn selectAddrModeIndexed(MachineOperand &Root, + unsigned Size) const; + template + ComplexRendererFn selectAddrModeIndexed(MachineOperand &Root) const { + return selectAddrModeIndexed(Root, Width / 8); + } + const AArch64TargetMachine &TM; const AArch64Subtarget &STI; const AArch64InstrInfo &TII; @@ -1391,6 +1417,105 @@ }}; } +/// Select a "register plus unscaled signed 9-bit immediate" address. This +/// should only match when there is an offset that is not valid for a scaled +/// immediate addressing mode. The "Size" argument is the size in bytes of the +/// memory reference, which is needed here to know what is valid for a scaled +/// immediate. +InstructionSelector::ComplexRendererFn +AArch64InstructionSelector::selectAddrModeUnscaled(MachineOperand &Root, + unsigned Size) const { + MachineRegisterInfo &MRI = + Root.getParent()->getParent()->getParent()->getRegInfo(); + + if (!Root.isReg()) + return None; + + if (!isBaseWithConstantOffset(Root, MRI)) + return None; + + MachineInstr *RootDef = MRI.getVRegDef(Root.getReg()); + if (!RootDef) + return None; + + MachineOperand &OffImm = RootDef->getOperand(2); + if (!OffImm.isReg()) + return None; + MachineInstr *RHS = MRI.getVRegDef(OffImm.getReg()); + if (!RHS || RHS->getOpcode() != TargetOpcode::G_CONSTANT) + return None; + int64_t RHSC; + MachineOperand &RHSOp1 = RHS->getOperand(1); + if (!RHSOp1.isCImm() || RHSOp1.getCImm()->getBitWidth() > 64) + return None; + RHSC = RHSOp1.getCImm()->getSExtValue(); + + // If the offset is valid as a scaled immediate, don't match here. + if ((RHSC & (Size - 1)) == 0 && RHSC >= 0 && RHSC < (0x1000 << Log2_32(Size))) + return None; + if (RHSC >= -256 && RHSC < 256) { + MachineOperand &Base = RootDef->getOperand(1); + return {{ + [=](MachineInstrBuilder &MIB) { MIB.add(Base); }, + [=](MachineInstrBuilder &MIB) { MIB.addImm(RHSC); }, + }}; + } + return None; +} + +/// Select a "register plus scaled unsigned 12-bit immediate" address. The +/// "Size" argument is the size in bytes of the memory reference, which +/// determines the scale. +InstructionSelector::ComplexRendererFn +AArch64InstructionSelector::selectAddrModeIndexed(MachineOperand &Root, + unsigned Size) const { + MachineRegisterInfo &MRI = + Root.getParent()->getParent()->getParent()->getRegInfo(); + + if (!Root.isReg()) + return None; + + MachineInstr *RootDef = MRI.getVRegDef(Root.getReg()); + if (!RootDef) + return None; + + if (RootDef->getOpcode() == TargetOpcode::G_FRAME_INDEX) { + return {{ + [=](MachineInstrBuilder &MIB) { MIB.add(RootDef->getOperand(1)); }, + [=](MachineInstrBuilder &MIB) { MIB.addImm(0); }, + }}; + } + + if (isBaseWithConstantOffset(Root, MRI)) { + MachineOperand &LHS = RootDef->getOperand(1); + MachineOperand &RHS = RootDef->getOperand(2); + MachineInstr *LHSDef = MRI.getVRegDef(LHS.getReg()); + MachineInstr *RHSDef = MRI.getVRegDef(RHS.getReg()); + if (LHSDef && RHSDef) { + int64_t RHSC = (int64_t)RHSDef->getOperand(1).getCImm()->getZExtValue(); + unsigned Scale = Log2_32(Size); + if ((RHSC & (Size - 1)) == 0 && RHSC >= 0 && RHSC < (0x1000 << Scale)) { + if (LHSDef->getOpcode() == TargetOpcode::G_FRAME_INDEX) + LHSDef = MRI.getVRegDef(LHSDef->getOperand(1).getReg()); + return {{ + [=](MachineInstrBuilder &MIB) { MIB.add(LHS); }, + [=](MachineInstrBuilder &MIB) { MIB.addImm(RHSC >> Scale); }, + }}; + } + } + } + + // Before falling back to our general case, check if the unscaled + // instructions can handle this. If so, that's preferable. + if (selectAddrModeUnscaled(Root, Size).hasValue()) + return None; + + return {{ + [=](MachineInstrBuilder &MIB) { MIB.add(Root); }, + [=](MachineInstrBuilder &MIB) { MIB.addImm(0); }, + }}; +} + namespace llvm { InstructionSelector * createAArch64InstructionSelector(const AArch64TargetMachine &TM, Index: test/CodeGen/AArch64/GlobalISel/select-load.mir =================================================================== --- test/CodeGen/AArch64/GlobalISel/select-load.mir +++ test/CodeGen/AArch64/GlobalISel/select-load.mir @@ -529,13 +529,13 @@ # CHECK: body: # CHECK: %0 = COPY %x0 -# CHECK: %1 = LD1Onev2s %0 +# CHECK: %1 = LDRDui %0, 0 :: (load 8 from %ir.addr) # CHECK: %d0 = COPY %1 body: | bb.0: liveins: %x0 %0(p0) = COPY %x0 - %1(<2 x s32>) = G_LOAD %0 :: (load 4 from %ir.addr) + %1(<2 x s32>) = G_LOAD %0 :: (load 8 from %ir.addr) %d0 = COPY %1(<2 x s32>) ... Index: utils/TableGen/GlobalISelEmitter.cpp =================================================================== --- utils/TableGen/GlobalISelEmitter.cpp +++ utils/TableGen/GlobalISelEmitter.cpp @@ -769,8 +769,8 @@ /// no reliable means to derive the missing type information from the pattern so /// imported rules must test the components of a pointer separately. /// -/// SizeInBits must be non-zero and the matched pointer must be that size. -/// TODO: Add support for iPTR via SizeInBits==0 and a subtarget query. +/// If SizeInBits is zero, then the pointer size will be obtained from the +/// subtarget. class PointerToAnyOperandMatcher : public OperandPredicateMatcher { protected: unsigned SizeInBits; @@ -973,9 +973,15 @@ Error addTypeCheckPredicate(const TypeSetByHwMode &VTy, bool OperandIsAPointer) { - auto OpTyOrNone = VTy.isMachineValueType() - ? MVTToLLT(VTy.getMachineValueType().SimpleTy) - : None; + if (!VTy.isMachineValueType()) + return failedImport("unsupported typeset"); + + if (VTy.getMachineValueType() == MVT::iPTR && OperandIsAPointer) { + addPredicate(0); + return Error::success(); + } + + auto OpTyOrNone = MVTToLLT(VTy.getMachineValueType().SimpleTy); if (!OpTyOrNone) return failedImport("unsupported type");