Index: llvm/include/llvm/Analysis/TargetTransformInfo.h =================================================================== --- llvm/include/llvm/Analysis/TargetTransformInfo.h +++ llvm/include/llvm/Analysis/TargetTransformInfo.h @@ -690,6 +690,11 @@ /// immediate offset and no index register. bool LSRWithInstrQueries() const; + /// Return true if the loop strength reduce pass should try to group + /// similar unfolded offsets together and evaluate slightly more formulas + /// without unfoldable offsets. + bool LSRUnfOffsetsReconc() const; + /// Return true if it's free to truncate a value of type Ty1 to type /// Ty2. e.g. On x86 it's free to truncate a i32 value in register EAX to i16 /// by referencing its sub-register AX. @@ -1466,6 +1471,7 @@ int64_t BaseOffset, bool HasBaseReg, int64_t Scale, unsigned AddrSpace) = 0; virtual bool LSRWithInstrQueries() = 0; + virtual bool LSRUnfOffsetsReconc() = 0; virtual bool isTruncateFree(Type *Ty1, Type *Ty2) = 0; virtual bool isProfitableToHoist(Instruction *I) = 0; virtual bool useAA() = 0; @@ -1838,6 +1844,7 @@ AddrSpace); } bool LSRWithInstrQueries() override { return Impl.LSRWithInstrQueries(); } + bool LSRUnfOffsetsReconc() override { return Impl.LSRUnfOffsetsReconc(); } bool isTruncateFree(Type *Ty1, Type *Ty2) override { return Impl.isTruncateFree(Ty1, Ty2); } Index: llvm/include/llvm/Analysis/TargetTransformInfoImpl.h =================================================================== --- llvm/include/llvm/Analysis/TargetTransformInfoImpl.h +++ llvm/include/llvm/Analysis/TargetTransformInfoImpl.h @@ -271,6 +271,8 @@ bool LSRWithInstrQueries() const { return false; } + bool LSRUnfOffsetsReconc() const { return false; } + bool isTruncateFree(Type *Ty1, Type *Ty2) const { return false; } bool isProfitableToHoist(Instruction *I) const { return true; } Index: llvm/lib/Analysis/TargetTransformInfo.cpp =================================================================== --- llvm/lib/Analysis/TargetTransformInfo.cpp +++ llvm/lib/Analysis/TargetTransformInfo.cpp @@ -431,6 +431,10 @@ return TTIImpl->LSRWithInstrQueries(); } +bool TargetTransformInfo::LSRUnfOffsetsReconc() const { + return TTIImpl->LSRUnfOffsetsReconc(); +} + bool TargetTransformInfo::isTruncateFree(Type *Ty1, Type *Ty2) const { return TTIImpl->isTruncateFree(Ty1, Ty2); } Index: llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.h =================================================================== --- llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.h +++ llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.h @@ -75,6 +75,7 @@ bool hasDivRemOp(Type *DataType, bool IsSigned); bool prefersVectorizedAddressing() { return false; } bool LSRWithInstrQueries() { return true; } + bool LSRUnfOffsetsReconc() { return true; } bool supportsEfficientVectorElementLoadStore() { return true; } bool enableInterleavedAccessVectorization() { return true; } Index: llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp =================================================================== --- llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp +++ llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp @@ -1973,9 +1973,13 @@ // Support for sharing of LSRUses between LSRFixups. using UseMapTy = DenseMap; UseMapTy UseMap; + struct UseUnfOffsMapTy : std::multimap {}; + UseUnfOffsMapTy UseUnfOffsMap; bool reconcileNewOffset(LSRUse &LU, int64_t NewOffset, bool HasBaseReg, LSRUse::KindType Kind, MemAccessTy AccessTy); + bool reconcileUnfoldedAddressOffsets(LSRUse &LU, const SCEV *Expr, + int64_t &Offset, MemAccessTy AccessTy); std::pair getUse(const SCEV *&Expr, LSRUse::KindType Kind, MemAccessTy AccessTy); @@ -2562,6 +2566,30 @@ return true; } +bool LSRInstance::reconcileUnfoldedAddressOffsets(LSRUse &LU, const SCEV *Expr, + int64_t &Offset, + MemAccessTy AccessTy) { + assert(!LU.Formulae.empty() && "Expected at least one formula."); + assert(!LU.Fixups.empty() && "Expected at least one fixup."); + + for (const Formula &F : LU.Formulae) { + for (const SCEV *BaseReg : F.BaseRegs) { + const SCEV *Copy = BaseReg; + int64_t RegOffs = ExtractImmediate(Copy, SE); + if (RegOffs != 0 && Copy == Expr) { // RegOffs? XXX + int64_t NewOffset = Offset - RegOffs; + if (reconcileNewOffset(LU, NewOffset, /*HasBaseReg=*/true, + LSRUse::Address, AccessTy)) { + Offset = NewOffset; + return true; + } + } + } + } + + return false; +} + /// Return an LSRUse index and an offset value for a fixup which needs the given /// expression, with the given kind and optional access type. Either reuse an /// existing use or create a new one, as needed. @@ -2572,8 +2600,25 @@ int64_t Offset = ExtractImmediate(Expr, SE); // Basic uses can't accept any offset, for example. + UseUnfOffsMapTy::iterator ItrUnfolded = UseUnfOffsMap.end(); if (!isAlwaysFoldable(TTI, Kind, AccessTy, /*BaseGV=*/ nullptr, Offset, /*HasBaseReg=*/ true)) { + if (Kind == LSRUse::Address) { + if (TTI.LSRUnfOffsetsReconc()) { + // Try to find a usable existing LSRUse with an unfoldable offset + // that is reconcilable with Offset. + auto R = UseUnfOffsMap.equal_range(Expr); + for (auto I = R.first; I != R.second; ++I) { + size_t LUIdx = I->second; + LSRUse &LU = Uses[LUIdx]; + if (reconcileUnfoldedAddressOffsets(LU, Expr, Offset, AccessTy)) + return std::make_pair(LUIdx, Offset); + } + // Remember that the bare Expr has an unfoldable offset and record + // the new LUIdx for it below. + ItrUnfolded = UseUnfOffsMap.insert(std::make_pair(Expr, SIZE_MAX)); + } + } Expr = Copy; Offset = 0; } @@ -2581,6 +2626,8 @@ std::pair P = UseMap.insert(std::make_pair(LSRUse::SCEVUseKindPair(Expr, Kind), 0)); if (!P.second) { + assert(ItrUnfolded == UseUnfOffsMap.end() && + "Could not reconcile unfolded offsets with same Expr?"); // A use already existed with this base. size_t LUIdx = P.first->second; LSRUse &LU = Uses[LUIdx]; @@ -2594,6 +2641,8 @@ P.first->second = LUIdx; Uses.push_back(LSRUse(Kind, AccessTy)); LSRUse &LU = Uses[LUIdx]; + if (ItrUnfolded != UseUnfOffsMap.end()) + ItrUnfolded->second = LUIdx; LU.MinOffset = Offset; LU.MaxOffset = Offset; @@ -4660,6 +4709,10 @@ Formula &Best = LU.Formulae[P.first->second]; if (IsBetterThan(F, Best)) std::swap(F, Best); + if (TTI.LSRUnfOffsetsReconc() && LU.Kind == LSRUse::Address && + Best.UnfoldedOffset && !F.UnfoldedOffset) + continue; // Evaluate formula with unfolded offset also. + LLVM_DEBUG(dbgs() << " Filtering out formula "; F.print(dbgs()); dbgs() << "\n" " in favor of formula "; Index: llvm/test/CodeGen/SystemZ/loop-01.ll =================================================================== --- llvm/test/CodeGen/SystemZ/loop-01.ll +++ llvm/test/CodeGen/SystemZ/loop-01.ll @@ -320,3 +320,43 @@ %indvars.iv.next156.i.3 = add nsw i64 %indvars.iv155.i, 4 br label %for.body.i63 } + +; Test that there are two agfi:s in the preheader but none in the loop body. +define void @f10(double* %arg, double %V) { +; CHECK-Z13-LABEL: f10: +; CHECK-Z13-LABEL: # %bb.0: +; CHECK-Z13: lgr %r1, %r2 +; CHECK-Z13: lgr %r3, %r2 +; CHECK-Z13: agfi %r1, -1599952 +; CHECK-Z13: agfi %r3, 1600280 +; CHECK-Z13: lghi %r4, 0 +; CHECK-Z13-LABEL: .LBB9_1: +; CHECK-Z13-NOT: agfi +; CHECK-Z13: j .LBB9_1 +; CHECK-Z13-LABEL: .Lfunc_end9: + +bb: + br label %bb1 + +bb1: + %i = phi i64 [ 0, %bb ], [ %i13, %bb1 ] + %i2 = getelementptr inbounds double, double* %arg, i64 %i + store volatile double %V, double* %i2, align 8 + %i3 = add nsw i64 %i, -199994 + %i4 = getelementptr inbounds double, double* %arg, i64 %i3 + store volatile double %V, double* %i4, align 8 + %i5 = add nuw nsw i64 %i, 202011 + %i6 = getelementptr inbounds double, double* %arg, i64 %i5 + store volatile double %V, double* %i6, align 8 + %i7 = add nuw nsw i64 %i, 198013 + %i8 = getelementptr inbounds double, double* %arg, i64 %i7 + store volatile double %V, double* %i8, align 8 + %i9 = add nuw nsw i64 %i, 200035 + %i10 = getelementptr inbounds double, double* %arg, i64 %i9 + store volatile double %V, double* %i10, align 8 + %i11 = add nsw i64 %i, -199964 + %i12 = getelementptr inbounds double, double* %arg, i64 %i11 + store volatile double %V, double* %i12, align 8 + %i13 = add nuw nsw i64 %i, 20 + br label %bb1 +}