Index: include/llvm/Analysis/TargetTransformInfo.h =================================================================== --- include/llvm/Analysis/TargetTransformInfo.h +++ include/llvm/Analysis/TargetTransformInfo.h @@ -398,10 +398,12 @@ /// this target, for a load/store of the specified type. /// The type may be VoidTy, in which case only return true if the addressing /// mode is legal for a load/store of any legal type. + /// If target returns true in LSRWithInstrQueries(), I may be valid. /// TODO: Handle pre/postinc as well. bool isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset, bool HasBaseReg, int64_t Scale, - unsigned AddrSpace = 0) const; + unsigned AddrSpace = 0, + Instruction *I = nullptr) const; /// \brief Return true if LSR cost of C1 is lower than C1. bool isLSRCostLess(TargetTransformInfo::LSRCost &C1, @@ -431,6 +433,12 @@ bool HasBaseReg, int64_t Scale, unsigned AddrSpace = 0) const; + /// \brief Return true if the loop strength reduce pass should make + /// Instruction* based TTI queries to isFoldableMemAccessOffset() and + /// isLegalAddressingMode(). This is needed on SystemZ, where e.g. a memcpy + /// can only have a 12 bit unsigned immediate offset and no index register. + bool LSRWithInstrQueries() const; + /// \brief Return true if target supports the load / store /// instruction with the given Offset on the form reg + Offset. It /// may be that Offset is too big for a certain type (register @@ -835,7 +843,8 @@ virtual bool isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset, bool HasBaseReg, int64_t Scale, - unsigned AddrSpace) = 0; + unsigned AddrSpace, + Instruction *I) = 0; virtual bool isLSRCostLess(TargetTransformInfo::LSRCost &C1, TargetTransformInfo::LSRCost &C2) = 0; virtual bool isLegalMaskedStore(Type *DataType) = 0; @@ -846,6 +855,7 @@ virtual int getScalingFactorCost(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset, bool HasBaseReg, int64_t Scale, unsigned AddrSpace) = 0; + virtual bool LSRWithInstrQueries() = 0; virtual bool isFoldableMemAccessOffset(Instruction *I, int64_t Offset) = 0; virtual bool isTruncateFree(Type *Ty1, Type *Ty2) = 0; virtual bool isProfitableToHoist(Instruction *I) = 0; @@ -1026,9 +1036,10 @@ } bool isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset, bool HasBaseReg, int64_t Scale, - unsigned AddrSpace) override { + unsigned AddrSpace, + Instruction *I) override { return Impl.isLegalAddressingMode(Ty, BaseGV, BaseOffset, HasBaseReg, - Scale, AddrSpace); + Scale, AddrSpace, I); } bool isLSRCostLess(TargetTransformInfo::LSRCost &C1, TargetTransformInfo::LSRCost &C2) override { @@ -1055,6 +1066,9 @@ return Impl.getScalingFactorCost(Ty, BaseGV, BaseOffset, HasBaseReg, Scale, AddrSpace); } + bool LSRWithInstrQueries() override { + return Impl.LSRWithInstrQueries(); + } bool isFoldableMemAccessOffset(Instruction *I, int64_t Offset) override { return Impl.isFoldableMemAccessOffset(I, Offset); } Index: include/llvm/Analysis/TargetTransformInfoImpl.h =================================================================== --- include/llvm/Analysis/TargetTransformInfoImpl.h +++ include/llvm/Analysis/TargetTransformInfoImpl.h @@ -225,7 +225,7 @@ bool isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset, bool HasBaseReg, int64_t Scale, - unsigned AddrSpace) { + unsigned AddrSpace, Instruction *I = nullptr) { // Guess that only reg and reg+reg addressing is allowed. This heuristic is // taken from the implementation of LSR. return !BaseGV && BaseOffset == 0 && (Scale == 0 || Scale == 1); @@ -257,6 +257,8 @@ return -1; } + bool LSRWithInstrQueries() { return false; } + bool isFoldableMemAccessOffset(Instruction *I, int64_t Offset) { return true; } bool isTruncateFree(Type *Ty1, Type *Ty2) { return false; } Index: include/llvm/CodeGen/BasicTTIImpl.h =================================================================== --- include/llvm/CodeGen/BasicTTIImpl.h +++ include/llvm/CodeGen/BasicTTIImpl.h @@ -110,13 +110,13 @@ bool isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset, bool HasBaseReg, int64_t Scale, - unsigned AddrSpace) { + unsigned AddrSpace, Instruction *I = nullptr) { TargetLoweringBase::AddrMode AM; AM.BaseGV = BaseGV; AM.BaseOffs = BaseOffset; AM.HasBaseReg = HasBaseReg; AM.Scale = Scale; - return getTLI()->isLegalAddressingMode(DL, AM, Ty, AddrSpace); + return getTLI()->isLegalAddressingMode(DL, AM, Ty, AddrSpace, I); } bool isLSRCostLess(TTI::LSRCost C1, TTI::LSRCost C2) { Index: include/llvm/Target/TargetLowering.h =================================================================== --- include/llvm/Target/TargetLowering.h +++ include/llvm/Target/TargetLowering.h @@ -1870,7 +1870,8 @@ /// /// TODO: Remove default argument virtual bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, - Type *Ty, unsigned AddrSpace) const; + Type *Ty, unsigned AddrSpace, + Instruction *I = nullptr) const; /// \brief Return the cost of the scaling factor used in the addressing mode /// represented by AM for this target, for a load/store of the specified type. Index: lib/Analysis/TargetTransformInfo.cpp =================================================================== --- lib/Analysis/TargetTransformInfo.cpp +++ lib/Analysis/TargetTransformInfo.cpp @@ -132,9 +132,10 @@ int64_t BaseOffset, bool HasBaseReg, int64_t Scale, - unsigned AddrSpace) const { + unsigned AddrSpace, + Instruction *I) const { return TTIImpl->isLegalAddressingMode(Ty, BaseGV, BaseOffset, HasBaseReg, - Scale, AddrSpace); + Scale, AddrSpace, I); } bool TargetTransformInfo::isLSRCostLess(LSRCost &C1, LSRCost &C2) const { @@ -172,6 +173,10 @@ return Cost; } +bool TargetTransformInfo::LSRWithInstrQueries() const { + return TTIImpl->LSRWithInstrQueries(); +} + bool TargetTransformInfo::isFoldableMemAccessOffset(Instruction *I, int64_t Offset) const { return TTIImpl->isFoldableMemAccessOffset(I, Offset); Index: lib/CodeGen/TargetLoweringBase.cpp =================================================================== --- lib/CodeGen/TargetLoweringBase.cpp +++ lib/CodeGen/TargetLoweringBase.cpp @@ -1857,7 +1857,7 @@ /// by AM is legal for this target, for a load/store of the specified type. bool TargetLoweringBase::isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, Type *Ty, - unsigned AS) const { + unsigned AS, Instruction *I) const { // The default implementation of this implements a conservative RISCy, r+r and // r+i addr mode. Index: lib/Target/AArch64/AArch64ISelLowering.h =================================================================== --- lib/Target/AArch64/AArch64ISelLowering.h +++ lib/Target/AArch64/AArch64ISelLowering.h @@ -338,7 +338,8 @@ /// Return true if the addressing mode represented by AM is legal for this /// target, for a load/store of the specified type. bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, Type *Ty, - unsigned AS) const override; + unsigned AS, + Instruction *I = nullptr) const override; /// \brief Return the cost of the scaling factor used in the addressing /// mode represented by AM for this target, for a load/store Index: lib/Target/AArch64/AArch64ISelLowering.cpp =================================================================== --- lib/Target/AArch64/AArch64ISelLowering.cpp +++ lib/Target/AArch64/AArch64ISelLowering.cpp @@ -7772,7 +7772,7 @@ /// by AM is legal for this target, for a load/store of the specified type. bool AArch64TargetLowering::isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, Type *Ty, - unsigned AS) const { + unsigned AS, Instruction *I) const { // AArch64 has five basic addressing modes: // reg // reg + 9-bit signed offset Index: lib/Target/AMDGPU/SIISelLowering.h =================================================================== --- lib/Target/AMDGPU/SIISelLowering.h +++ lib/Target/AMDGPU/SIISelLowering.h @@ -148,7 +148,8 @@ Type *&/*AccessTy*/) const override; bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, Type *Ty, - unsigned AS) const override; + unsigned AS, + Instruction *I = nullptr) const override; bool canMergeStoresTo(unsigned AS, EVT MemVT) const override; Index: lib/Target/AMDGPU/SIISelLowering.cpp =================================================================== --- lib/Target/AMDGPU/SIISelLowering.cpp +++ lib/Target/AMDGPU/SIISelLowering.cpp @@ -617,7 +617,7 @@ bool SITargetLowering::isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, Type *Ty, - unsigned AS) const { + unsigned AS, Instruction *I) const { // No global is ever allowed as a base. if (AM.BaseGV) return false; Index: lib/Target/ARM/ARMISelLowering.h =================================================================== --- lib/Target/ARM/ARMISelLowering.h +++ lib/Target/ARM/ARMISelLowering.h @@ -317,7 +317,8 @@ /// isLegalAddressingMode - Return true if the addressing mode represented /// by AM is legal for this target, for a load/store of the specified type. bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, - Type *Ty, unsigned AS) const override; + Type *Ty, unsigned AS, + Instruction *I = nullptr) const override; /// getScalingFactorCost - Return the cost of the scaling used in /// addressing mode represented by AM. Index: lib/Target/ARM/ARMISelLowering.cpp =================================================================== --- lib/Target/ARM/ARMISelLowering.cpp +++ lib/Target/ARM/ARMISelLowering.cpp @@ -12349,7 +12349,7 @@ /// by AM is legal for this target, for a load/store of the specified type. bool ARMTargetLowering::isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, Type *Ty, - unsigned AS) const { + unsigned AS, Instruction *I) const { EVT VT = getValueType(DL, Ty, true); if (!isLegalAddressImmediate(AM.BaseOffs, VT, Subtarget)) return false; Index: lib/Target/AVR/AVRISelLowering.h =================================================================== --- lib/Target/AVR/AVRISelLowering.h +++ lib/Target/AVR/AVRISelLowering.h @@ -83,7 +83,8 @@ SelectionDAG &DAG) const override; bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, Type *Ty, - unsigned AS) const override; + unsigned AS, + Instruction *I = nullptr) const override; bool getPreIndexedAddressParts(SDNode *N, SDValue &Base, SDValue &Offset, ISD::MemIndexedMode &AM, Index: lib/Target/AVR/AVRISelLowering.cpp =================================================================== --- lib/Target/AVR/AVRISelLowering.cpp +++ lib/Target/AVR/AVRISelLowering.cpp @@ -724,7 +724,7 @@ /// by AM is legal for this target, for a load/store of the specified type. bool AVRTargetLowering::isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, Type *Ty, - unsigned AS) const { + unsigned AS, Instruction *I) const { int64_t Offs = AM.BaseOffs; // Allow absolute addresses. Index: lib/Target/Hexagon/HexagonISelLowering.h =================================================================== --- lib/Target/Hexagon/HexagonISelLowering.h +++ lib/Target/Hexagon/HexagonISelLowering.h @@ -250,7 +250,8 @@ /// mode is legal for a load/store of any legal type. /// TODO: Handle pre/postinc as well. bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, - Type *Ty, unsigned AS) const override; + Type *Ty, unsigned AS, + Instruction *I = nullptr) const override; /// Return true if folding a constant offset with the given GlobalAddress /// is legal. It is frequently not legal in PIC relocation models. bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const override; Index: lib/Target/Hexagon/HexagonISelLowering.cpp =================================================================== --- lib/Target/Hexagon/HexagonISelLowering.cpp +++ lib/Target/Hexagon/HexagonISelLowering.cpp @@ -3099,7 +3099,7 @@ /// AM is legal for this target, for a load/store of the specified type. bool HexagonTargetLowering::isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, Type *Ty, - unsigned AS) const { + unsigned AS, Instruction *I) const { if (Ty->isSized()) { // When LSR detects uses of the same base address to access different // types (e.g. unions), it will assume a conservative type for these Index: lib/Target/Mips/MipsISelLowering.h =================================================================== --- lib/Target/Mips/MipsISelLowering.h +++ lib/Target/Mips/MipsISelLowering.h @@ -619,7 +619,8 @@ } bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, - Type *Ty, unsigned AS) const override; + Type *Ty, unsigned AS, + Instruction *I = nullptr) const override; bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const override; Index: lib/Target/Mips/MipsISelLowering.cpp =================================================================== --- lib/Target/Mips/MipsISelLowering.cpp +++ lib/Target/Mips/MipsISelLowering.cpp @@ -3987,7 +3987,7 @@ bool MipsTargetLowering::isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, Type *Ty, - unsigned AS) const { + unsigned AS, Instruction *I) const { // No global is ever allowed as a base. if (AM.BaseGV) return false; Index: lib/Target/NVPTX/NVPTXISelLowering.h =================================================================== --- lib/Target/NVPTX/NVPTXISelLowering.h +++ lib/Target/NVPTX/NVPTXISelLowering.h @@ -456,7 +456,8 @@ /// reduction (LoopStrengthReduce.cpp) and memory optimization for /// address mode (CodeGenPrepare.cpp) bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, Type *Ty, - unsigned AS) const override; + unsigned AS, + Instruction *I = nullptr) const override; bool isTruncateFree(Type *SrcTy, Type *DstTy) const override { // Truncating 64-bit to 32-bit is free in SASS. Index: lib/Target/NVPTX/NVPTXISelLowering.cpp =================================================================== --- lib/Target/NVPTX/NVPTXISelLowering.cpp +++ lib/Target/NVPTX/NVPTXISelLowering.cpp @@ -3793,7 +3793,7 @@ /// (CodeGenPrepare.cpp) bool NVPTXTargetLowering::isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, Type *Ty, - unsigned AS) const { + unsigned AS, Instruction *I) const { // AddrMode - This represents an addressing mode of: // BaseGV + BaseOffs + BaseReg + Scale*ScaleReg // Index: lib/Target/PowerPC/PPCISelLowering.h =================================================================== --- lib/Target/PowerPC/PPCISelLowering.h +++ lib/Target/PowerPC/PPCISelLowering.h @@ -723,7 +723,8 @@ /// isLegalAddressingMode - Return true if the addressing mode represented /// by AM is legal for this target, for a load/store of the specified type. bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, - Type *Ty, unsigned AS) const override; + Type *Ty, unsigned AS, + Instruction *I = nullptr) const override; /// isLegalICmpImmediate - Return true if the specified immediate is legal /// icmp immediate, that is the target has icmp instructions which can Index: lib/Target/PowerPC/PPCISelLowering.cpp =================================================================== --- lib/Target/PowerPC/PPCISelLowering.cpp +++ lib/Target/PowerPC/PPCISelLowering.cpp @@ -12661,7 +12661,7 @@ // by AM is legal for this target, for a load/store of the specified type. bool PPCTargetLowering::isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, Type *Ty, - unsigned AS) const { + unsigned AS, Instruction *I) const { // PPC does not allow r+i addressing modes for vectors! if (Ty->isVectorTy() && AM.BaseOffs != 0) return false; Index: lib/Target/SystemZ/SystemZISelLowering.h =================================================================== --- lib/Target/SystemZ/SystemZISelLowering.h +++ lib/Target/SystemZ/SystemZISelLowering.h @@ -389,7 +389,8 @@ bool isLegalICmpImmediate(int64_t Imm) const override; bool isLegalAddImmediate(int64_t Imm) const override; bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, Type *Ty, - unsigned AS) const override; + unsigned AS, + Instruction *I = nullptr) const override; bool isFoldableMemAccessOffset(Instruction *I, int64_t Offset) const override; bool allowsMisalignedMemoryAccesses(EVT VT, unsigned AS, unsigned Align, Index: lib/Target/SystemZ/SystemZISelLowering.cpp =================================================================== --- lib/Target/SystemZ/SystemZISelLowering.cpp +++ lib/Target/SystemZ/SystemZISelLowering.cpp @@ -524,9 +524,57 @@ return true; } +// When checking for immediate offsets for Load->Store, it is beneficial +// generally to keep them small also for the non i8 case, since it is not +// that uncommon that isel folds adjacent load->stores into vector +// operations, which also needs small offsets. +static bool hasLessAddressing(Instruction *I, bool Offsets = false) { + if (IntrinsicInst *II = dyn_cast(I)) { + switch (II->getIntrinsicID()) { + default: break; + case Intrinsic::memset: + case Intrinsic::memmove: + case Intrinsic::memcpy: + return true; + } + } + + if (isa(I) && I->hasOneUse()) { + auto *SingleUser = dyn_cast(*I->user_begin()); + if (SingleUser->getParent() == I->getParent()) { + if (isa(SingleUser)) { + if (auto *C = dyn_cast(SingleUser->getOperand(1))) + if (isInt<16>(C->getSExtValue()) || isUInt<16>(C->getZExtValue())) + // Comparison of memory with 16 bit signed / unsigned immediate + return true; + } else if (Offsets && isa(SingleUser)) + // Load->Store: MVC + return true; + } + } else if (auto *StoreI = dyn_cast(I)) { + if (auto *LoadI = dyn_cast(StoreI->getValueOperand())) + if (Offsets && (LoadI->hasOneUse() && LoadI->getParent() == I->getParent())) + // Load->Store: MVC + return true; + } + + return false; +} + +static bool needsD12(Instruction *I) { + return hasLessAddressing(I, true); +} + +static bool hasNoIndexReg(Instruction *I) { + return hasLessAddressing(I, false); +} + bool SystemZTargetLowering::isLegalAddressingMode(const DataLayout &DL, - const AddrMode &AM, Type *Ty, - unsigned AS) const { + const AddrMode &AM, Type *Ty, unsigned AS, Instruction *I) const { + + if (AM.Scale != 0 && I != nullptr && hasNoIndexReg(I)) + return false; + // Punt on globals for now, although they can be used in limited // RELATIVE LONG cases. if (AM.BaseGV) @@ -542,6 +590,12 @@ bool SystemZTargetLowering::isFoldableMemAccessOffset(Instruction *I, int64_t Offset) const { + if (needsD12(I)) + return (isUInt<12>(Offset)); + + if (!isa(I) && !isa(I)) + return true; + // This only applies to z13. if (!Subtarget.hasVector()) return true; @@ -552,7 +606,6 @@ // values (vector load / store instructions only support small // offsets). - assert (isa(I) || isa(I)); Type *MemAccessTy = (isa(I) ? I->getType() : I->getOperand(0)->getType()); bool IsFPAccess = MemAccessTy->isFloatingPointTy(); Index: lib/Target/SystemZ/SystemZTargetTransformInfo.h =================================================================== --- lib/Target/SystemZ/SystemZTargetTransformInfo.h +++ lib/Target/SystemZ/SystemZTargetTransformInfo.h @@ -47,6 +47,8 @@ void getUnrollingPreferences(Loop *L, TTI::UnrollingPreferences &UP); + bool isLSRCostLess(TargetTransformInfo::LSRCost &C1, + TargetTransformInfo::LSRCost &C2); /// @} /// \name Vector TTI Implementations @@ -56,6 +58,7 @@ unsigned getRegisterBitWidth(bool Vector) const; bool prefersVectorizedAddressing() { return false; } + bool LSRWithInstrQueries() { return true; } bool supportsEfficientVectorElementLoadStore() { return true; } bool enableInterleavedAccessVectorization() { return true; } Index: lib/Target/SystemZ/SystemZTargetTransformInfo.cpp =================================================================== --- lib/Target/SystemZ/SystemZTargetTransformInfo.cpp +++ lib/Target/SystemZ/SystemZTargetTransformInfo.cpp @@ -292,6 +292,18 @@ UP.Force = true; } + +bool SystemZTTIImpl::isLSRCostLess(TargetTransformInfo::LSRCost &C1, + TargetTransformInfo::LSRCost &C2) { + // SystemZ specific: check instruction count (first). + return std::tie(C1.Insns, C1.NumRegs, C1.AddRecCost, + C1.NumIVMuls, C1.NumBaseAdds, + C1.ScaleCost, C1.ImmCost, C1.SetupCost) < + std::tie(C2.Insns, C2.NumRegs, C2.AddRecCost, + C2.NumIVMuls, C2.NumBaseAdds, + C2.ScaleCost, C2.ImmCost, C2.SetupCost); +} + unsigned SystemZTTIImpl::getNumberOfRegisters(bool Vector) { if (!Vector) // Discount the stack pointer. Also leave out %r0, since it can't Index: lib/Target/WebAssembly/WebAssemblyISelLowering.h =================================================================== --- lib/Target/WebAssembly/WebAssemblyISelLowering.h +++ lib/Target/WebAssembly/WebAssemblyISelLowering.h @@ -55,7 +55,8 @@ bool isCheapToSpeculateCttz() const override; bool isCheapToSpeculateCtlz() const override; bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, Type *Ty, - unsigned AS) const override; + unsigned AS, + Instruction *I = nullptr) const override; bool allowsMisalignedMemoryAccesses(EVT, unsigned AddrSpace, unsigned Align, bool *Fast) const override; bool isIntDivCheap(EVT VT, AttributeList Attr) const override; Index: lib/Target/WebAssembly/WebAssemblyISelLowering.cpp =================================================================== --- lib/Target/WebAssembly/WebAssemblyISelLowering.cpp +++ lib/Target/WebAssembly/WebAssemblyISelLowering.cpp @@ -233,7 +233,8 @@ bool WebAssemblyTargetLowering::isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, Type *Ty, - unsigned AS) const { + unsigned AS, + Instruction *I) const { // WebAssembly offsets are added as unsigned without wrapping. The // isLegalAddressingMode gives us no way to determine if wrapping could be // happening, so we approximate this by accepting only non-negative offsets. Index: lib/Target/X86/X86ISelLowering.h =================================================================== --- lib/Target/X86/X86ISelLowering.h +++ lib/Target/X86/X86ISelLowering.h @@ -887,7 +887,8 @@ /// Return true if the addressing mode represented /// by AM is legal for this target, for a load/store of the specified type. bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, - Type *Ty, unsigned AS) const override; + Type *Ty, unsigned AS, + Instruction *I = nullptr) const override; /// Return true if the specified immediate is legal /// icmp immediate, that is the target has icmp instructions which can Index: lib/Target/X86/X86ISelLowering.cpp =================================================================== --- lib/Target/X86/X86ISelLowering.cpp +++ lib/Target/X86/X86ISelLowering.cpp @@ -24516,7 +24516,8 @@ /// target, for a load/store of the specified type. bool X86TargetLowering::isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, Type *Ty, - unsigned AS) const { + unsigned AS, + Instruction *I) const { // X86 supports extremely general addressing modes. CodeModel::Model M = getTargetMachine().getCodeModel(); Index: lib/Target/XCore/XCoreISelLowering.h =================================================================== --- lib/Target/XCore/XCoreISelLowering.h +++ lib/Target/XCore/XCoreISelLowering.h @@ -123,7 +123,8 @@ MachineBasicBlock *MBB) const override; bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, - Type *Ty, unsigned AS) const override; + Type *Ty, unsigned AS, + Instruction *I = nullptr) const override; /// If a physical register, this returns the register that receives the /// exception address on entry to an EH pad. Index: lib/Target/XCore/XCoreISelLowering.cpp =================================================================== --- lib/Target/XCore/XCoreISelLowering.cpp +++ lib/Target/XCore/XCoreISelLowering.cpp @@ -1889,7 +1889,8 @@ /// by AM is legal for this target, for a load/store of the specified type. bool XCoreTargetLowering::isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, Type *Ty, - unsigned AS) const { + unsigned AS, + Instruction *I) const { if (Ty->getTypeID() == Type::VoidTyID) return AM.Scale == 0 && isImmUs(AM.BaseOffs) && isImmUs4(AM.BaseOffs); Index: lib/Transforms/Scalar/LoopStrengthReduce.cpp =================================================================== --- lib/Transforms/Scalar/LoopStrengthReduce.cpp +++ lib/Transforms/Scalar/LoopStrengthReduce.cpp @@ -1268,14 +1268,15 @@ if (F.BaseGV) C.ImmCost += 64; // Handle symbolic values conservatively. // TODO: This should probably be the pointer size. - else if (Offset != 0) - C.ImmCost += APInt(64, Offset, true).getMinSignedBits(); - // Check with target if this offset with this instruction is - // specifically not supported. - if ((isa(Fixup.UserInst) || isa(Fixup.UserInst)) && - !TTI.isFoldableMemAccessOffset(Fixup.UserInst, Offset)) - C.NumBaseAdds++; + if (Offset == 0) + continue; + + if (LU.Kind == LSRUse::Address && TTI.LSRWithInstrQueries()) { + if (!TTI.isFoldableMemAccessOffset(Fixup.UserInst, Offset)) + C.NumBaseAdds++; + } else + C.ImmCost += APInt(64, Offset, true).getMinSignedBits(); } // If we don't count instruction cost exit here. @@ -1528,11 +1529,12 @@ static bool isAMCompletelyFolded(const TargetTransformInfo &TTI, LSRUse::KindType Kind, MemAccessTy AccessTy, GlobalValue *BaseGV, int64_t BaseOffset, - bool HasBaseReg, int64_t Scale) { + bool HasBaseReg, int64_t Scale, + Instruction *Fixup = nullptr) { switch (Kind) { case LSRUse::Address: return TTI.isLegalAddressingMode(AccessTy.MemTy, BaseGV, BaseOffset, - HasBaseReg, Scale, AccessTy.AddrSpace); + HasBaseReg, Scale, AccessTy.AddrSpace, Fixup); case LSRUse::ICmpZero: // There's not even a target hook for querying whether it would be legal to @@ -1638,6 +1640,16 @@ static bool isAMCompletelyFolded(const TargetTransformInfo &TTI, const LSRUse &LU, const Formula &F) { + // Target may want to look at the user instructions. + if (LU.Kind == LSRUse::Address && TTI.LSRWithInstrQueries()) { + for (const LSRFixup &Fixup : LU.Fixups) + if (!isAMCompletelyFolded(TTI, LSRUse::Address, LU.AccessTy, F.BaseGV, + F.BaseOffset, F.HasBaseReg, F.Scale, + Fixup.UserInst)) + return false; + return true; + } + return isAMCompletelyFolded(TTI, LU.MinOffset, LU.MaxOffset, LU.Kind, LU.AccessTy, F.BaseGV, F.BaseOffset, F.HasBaseReg, F.Scale); Index: test/CodeGen/SystemZ/dag-combine-01.ll =================================================================== --- test/CodeGen/SystemZ/dag-combine-01.ll +++ test/CodeGen/SystemZ/dag-combine-01.ll @@ -40,7 +40,7 @@ for.body.3.i: ; preds = %for.body.3.i, %for.body.3.lr.ph.i ; CHECK-LABEL: .LBB0_5: ; CHECK-NOT: stfh %r{{.*}}, 0(%r{{.*}}) -; CHECK: lg %r{{.*}}, -4(%r{{.*}}) +; CHECK: lg %r{{.*}}, 8(%r{{.*}}) ; Overlapping load should go before the store %indvars.iv.i = phi i64 [ 0, %for.body.3.lr.ph.i ], [ %indvars.iv.next.i, %for.body.3.i ] %3 = shl nsw i64 %indvars.iv.i, 6 Index: test/CodeGen/SystemZ/loop-01.ll =================================================================== --- test/CodeGen/SystemZ/loop-01.ll +++ test/CodeGen/SystemZ/loop-01.ll @@ -9,7 +9,7 @@ define void @f1(i32 *%dest, i32 %a) { ; CHECK-LABEL: f1: ; CHECK-NOT: sllg -; CHECK: st %r3, 0({{%r[1-5],%r[1-5]}}) +; CHECK: st %r3, 400({{%r[1-5],%r[1-5]}}) ; CHECK: br %r14 entry: br label %loop @@ -239,3 +239,84 @@ %exitcond = icmp eq i32 %lftr.wideiv, %S br i1 %exitcond, label %for.cond.cleanup.loopexit, label %for.body } + +; Test that a memcpy loop does not get a lot of lays before each mvc (D12 and no index-reg). +%0 = type { %1, %2* } +%1 = type { %2*, %2* } +%2 = type <{ %3, i32, [4 x i8] }> +%3 = type { i16*, i16*, i16* } + +declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture writeonly, i8* nocapture readonly, i64, i32, i1) #0 + +define void @f8() { +; CHECK-Z13-LABEL: f8: +; CHECK-Z13: mvc +; CHECK-Z13-NEXT: mvc +; CHECK-Z13-NEXT: mvc +; CHECK-Z13-NEXT: mvc + +bb: + %tmp = load %0*, %0** undef, align 8 + br i1 undef, label %bb2, label %bb1 + +bb1: ; preds = %bb + br label %bb2 + +bb2: ; preds = %bb1, %bb + %tmp3 = phi %0* [ %tmp, %bb ], [ undef, %bb1 ] + %tmp4 = phi %0* [ undef, %bb ], [ undef, %bb1 ] + br label %bb5 + +bb5: ; preds = %bb5, %bb2 + %tmp6 = phi %0* [ %tmp21, %bb5 ], [ %tmp3, %bb2 ] + %tmp7 = phi %0* [ %tmp20, %bb5 ], [ %tmp4, %bb2 ] + %tmp8 = getelementptr inbounds %0, %0* %tmp7, i64 -1 + %tmp9 = getelementptr inbounds %0, %0* %tmp6, i64 -1 + %tmp10 = bitcast %0* %tmp9 to i8* + %tmp11 = bitcast %0* %tmp8 to i8* + tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %tmp10, i8* %tmp11, i64 24, i32 8, i1 false) + %tmp12 = getelementptr inbounds %0, %0* %tmp7, i64 -2 + %tmp13 = getelementptr inbounds %0, %0* %tmp6, i64 -2 + %tmp14 = bitcast %0* %tmp13 to i8* + %tmp15 = bitcast %0* %tmp12 to i8* + tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %tmp14, i8* %tmp15, i64 24, i32 8, i1 false) + %tmp16 = getelementptr inbounds %0, %0* %tmp7, i64 -3 + %tmp17 = getelementptr inbounds %0, %0* %tmp6, i64 -3 + %tmp18 = bitcast %0* %tmp17 to i8* + %tmp19 = bitcast %0* %tmp16 to i8* + tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %tmp18, i8* %tmp19, i64 24, i32 8, i1 false) + %tmp20 = getelementptr inbounds %0, %0* %tmp7, i64 -4 + %tmp21 = getelementptr inbounds %0, %0* %tmp6, i64 -4 + %tmp22 = bitcast %0* %tmp21 to i8* + %tmp23 = bitcast %0* %tmp20 to i8* + tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %tmp22, i8* %tmp23, i64 24, i32 8, i1 false) + br label %bb5 +} + +; Test that a chsi does not need an aghik inside the loop (no index reg) +define void @f9() { +; CHECK-Z13-LABEL: f9: +; CHECK-Z13: # =>This Inner Loop Header: Depth=1 +; CHECK-Z13-NOT: aghik +; CHECK-Z13: chsi + +entry: + br label %for.body.i63 + +for.body.i63: ; preds = %for.inc.i, %entry + %indvars.iv155.i = phi i64 [ 0, %entry ], [ %indvars.iv.next156.i.3, %for.inc.i ] + %arrayidx.i62 = getelementptr inbounds i32, i32* undef, i64 %indvars.iv155.i + %tmp = load i32, i32* %arrayidx.i62, align 4 + %cmp9.i = icmp eq i32 %tmp, 0 + br i1 %cmp9.i, label %for.inc.i, label %if.then10.i + +if.then10.i: ; preds = %for.body.i63 + unreachable + +for.inc.i: ; preds = %for.body.i63 + %indvars.iv.next156.i = or i64 %indvars.iv155.i, 1 + %arrayidx.i62.1 = getelementptr inbounds i32, i32* undef, i64 %indvars.iv.next156.i + %tmp1 = load i32, i32* %arrayidx.i62.1, align 4 + %indvars.iv.next156.i.3 = add nsw i64 %indvars.iv155.i, 4 + br label %for.body.i63 +}