Index: lib/Transforms/Scalar/LoopStrengthReduce.cpp =================================================================== --- lib/Transforms/Scalar/LoopStrengthReduce.cpp +++ lib/Transforms/Scalar/LoopStrengthReduce.cpp @@ -257,8 +257,8 @@ int64_t UnfoldedOffset; Formula() - : BaseGV(nullptr), BaseOffset(0), HasBaseReg(false), Scale(0), - ScaledReg(nullptr), UnfoldedOffset(0) {} + : BaseGV(nullptr), BaseOffset(0), HasBaseReg(false), Scale(0), + ScaledReg(nullptr), UnfoldedOffset(0) {} void InitialMatch(const SCEV *S, Loop *L, ScalarEvolution &SE); @@ -683,11 +683,14 @@ } /// getAccessType - Return the type of the memory being accessed. -static Type *getAccessType(const Instruction *Inst) { +static Type *getAccessType(const Instruction *Inst, unsigned &AddrSpace) { Type *AccessTy = Inst->getType(); - if (const StoreInst *SI = dyn_cast(Inst)) + if (const StoreInst *SI = dyn_cast(Inst)) { AccessTy = SI->getOperand(0)->getType(); - else if (const IntrinsicInst *II = dyn_cast(Inst)) { + AddrSpace = SI->getPointerAddressSpace(); + } else if (const LoadInst *LI = dyn_cast(Inst)) { + AddrSpace = LI->getPointerAddressSpace(); + } else if (const IntrinsicInst *II = dyn_cast(Inst)) { // Addressing modes can also be folded into prefetches and a variety // of intrinsics. switch (II->getIntrinsicID()) { @@ -707,6 +710,7 @@ AccessTy = PointerType::get(IntegerType::get(PTy->getContext(), 1), PTy->getAddressSpace()); + return AccessTy; } @@ -1205,6 +1209,7 @@ KindType Kind; Type *AccessTy; + unsigned AddrSpace; SmallVector Offsets; int64_t MinOffset; @@ -1236,12 +1241,13 @@ /// Regs - The set of register candidates used by all formulae in this LSRUse. SmallPtrSet Regs; - LSRUse(KindType K, Type *T) : Kind(K), AccessTy(T), - MinOffset(INT64_MAX), - MaxOffset(INT64_MIN), - AllFixupsOutsideLoop(true), - RigidFormula(false), - WidestFixupType(nullptr) {} + LSRUse(KindType K, Type *T, unsigned AS) + : Kind(K), AccessTy(T), AddrSpace(AS), + MinOffset(INT64_MAX), + MaxOffset(INT64_MIN), + AllFixupsOutsideLoop(true), + RigidFormula(false), + WidestFixupType(nullptr) {} bool HasFormulaWithSameRegs(const Formula &F) const; bool InsertFormula(const Formula &F); @@ -1361,11 +1367,13 @@ static bool isAMCompletelyFolded(const TargetTransformInfo &TTI, LSRUse::KindType Kind, Type *AccessTy, - GlobalValue *BaseGV, int64_t BaseOffset, + unsigned AddrSpace, GlobalValue *BaseGV, + int64_t BaseOffset, bool HasBaseReg, int64_t Scale) { switch (Kind) { case LSRUse::Address: - return TTI.isLegalAddressingMode(AccessTy, BaseGV, BaseOffset, HasBaseReg, Scale); + return TTI.isLegalAddressingMode(AccessTy, BaseGV, BaseOffset, + HasBaseReg, Scale, AddrSpace); case LSRUse::ICmpZero: // There's not even a target hook for querying whether it would be legal to @@ -1413,8 +1421,9 @@ static bool isAMCompletelyFolded(const TargetTransformInfo &TTI, int64_t MinOffset, int64_t MaxOffset, LSRUse::KindType Kind, Type *AccessTy, - GlobalValue *BaseGV, int64_t BaseOffset, - bool HasBaseReg, int64_t Scale) { + unsigned AddrSpace, GlobalValue *BaseGV, + int64_t BaseOffset, bool HasBaseReg, + int64_t Scale) { // Check for overflow. if (((int64_t)((uint64_t)BaseOffset + MinOffset) > BaseOffset) != (MinOffset > 0)) @@ -1425,16 +1434,16 @@ return false; MaxOffset = (uint64_t)BaseOffset + MaxOffset; - return isAMCompletelyFolded(TTI, Kind, AccessTy, BaseGV, MinOffset, + return isAMCompletelyFolded(TTI, Kind, AccessTy, AddrSpace, BaseGV, MinOffset, HasBaseReg, Scale) && - isAMCompletelyFolded(TTI, Kind, AccessTy, BaseGV, MaxOffset, - HasBaseReg, Scale); + isAMCompletelyFolded(TTI, Kind, AccessTy, AddrSpace, BaseGV, MaxOffset, + HasBaseReg, Scale); } static bool isAMCompletelyFolded(const TargetTransformInfo &TTI, int64_t MinOffset, int64_t MaxOffset, LSRUse::KindType Kind, Type *AccessTy, - const Formula &F) { + unsigned AddrSpace, const Formula &F) { // For the purpose of isAMCompletelyFolded either having a canonical formula // or a scale not equal to zero is correct. // Problems may arise from non canonical formulae having a scale == 0. @@ -1444,36 +1453,39 @@ // compile time sake. assert((F.isCanonical() || F.Scale != 0)); return isAMCompletelyFolded(TTI, MinOffset, MaxOffset, Kind, AccessTy, - F.BaseGV, F.BaseOffset, F.HasBaseReg, F.Scale); + AddrSpace, F.BaseGV, F.BaseOffset, F.HasBaseReg, + F.Scale); } /// isLegalUse - Test whether we know how to expand the current formula. static bool isLegalUse(const TargetTransformInfo &TTI, int64_t MinOffset, - int64_t MaxOffset, LSRUse::KindType Kind, Type *AccessTy, - GlobalValue *BaseGV, int64_t BaseOffset, bool HasBaseReg, - int64_t Scale) { + int64_t MaxOffset, LSRUse::KindType Kind, + Type *AccessTy, unsigned AddrSpace, + GlobalValue *BaseGV, int64_t BaseOffset, + bool HasBaseReg, int64_t Scale) { // We know how to expand completely foldable formulae. - return isAMCompletelyFolded(TTI, MinOffset, MaxOffset, Kind, AccessTy, BaseGV, - BaseOffset, HasBaseReg, Scale) || + return isAMCompletelyFolded(TTI, MinOffset, MaxOffset, Kind, + AccessTy, AddrSpace, + BaseGV, BaseOffset, HasBaseReg, Scale) || // Or formulae that use a base register produced by a sum of base // registers. (Scale == 1 && isAMCompletelyFolded(TTI, MinOffset, MaxOffset, Kind, AccessTy, - BaseGV, BaseOffset, true, 0)); + AddrSpace, BaseGV, BaseOffset, true, 0)); } static bool isLegalUse(const TargetTransformInfo &TTI, int64_t MinOffset, int64_t MaxOffset, LSRUse::KindType Kind, Type *AccessTy, - const Formula &F) { - return isLegalUse(TTI, MinOffset, MaxOffset, Kind, AccessTy, F.BaseGV, - F.BaseOffset, F.HasBaseReg, F.Scale); + unsigned AddrSpace, const Formula &F) { + return isLegalUse(TTI, MinOffset, MaxOffset, Kind, AccessTy, AddrSpace, + F.BaseGV, F.BaseOffset, F.HasBaseReg, F.Scale); } static bool isAMCompletelyFolded(const TargetTransformInfo &TTI, const LSRUse &LU, const Formula &F) { return isAMCompletelyFolded(TTI, LU.MinOffset, LU.MaxOffset, LU.Kind, - LU.AccessTy, F.BaseGV, F.BaseOffset, F.HasBaseReg, - F.Scale); + LU.AccessTy, LU.AddrSpace, F.BaseGV, F.BaseOffset, + F.HasBaseReg, F.Scale); } static unsigned getScalingFactorCost(const TargetTransformInfo &TTI, @@ -1484,7 +1496,7 @@ // If the use is not completely folded in that instruction, we will have to // pay an extra cost only for scale != 1. if (!isAMCompletelyFolded(TTI, LU.MinOffset, LU.MaxOffset, LU.Kind, - LU.AccessTy, F)) + LU.AccessTy, LU.AddrSpace, F)) return F.Scale != 1; switch (LU.Kind) { @@ -1493,11 +1505,11 @@ int ScaleCostMinOffset = TTI.getScalingFactorCost(LU.AccessTy, F.BaseGV, F.BaseOffset + LU.MinOffset, - F.HasBaseReg, F.Scale); + F.HasBaseReg, F.Scale, LU.AddrSpace); int ScaleCostMaxOffset = TTI.getScalingFactorCost(LU.AccessTy, F.BaseGV, F.BaseOffset + LU.MaxOffset, - F.HasBaseReg, F.Scale); + F.HasBaseReg, F.Scale, LU.AddrSpace); assert(ScaleCostMinOffset >= 0 && ScaleCostMaxOffset >= 0 && "Legal addressing mode has an illegal cost!"); @@ -1515,7 +1527,8 @@ } static bool isAlwaysFoldable(const TargetTransformInfo &TTI, - LSRUse::KindType Kind, Type *AccessTy, + LSRUse::KindType Kind, + Type *AccessTy, unsigned AddrSpace, GlobalValue *BaseGV, int64_t BaseOffset, bool HasBaseReg) { // Fast-path: zero is always foldable. @@ -1532,14 +1545,15 @@ HasBaseReg = true; } - return isAMCompletelyFolded(TTI, Kind, AccessTy, BaseGV, BaseOffset, - HasBaseReg, Scale); + return isAMCompletelyFolded(TTI, Kind, AccessTy, AddrSpace, BaseGV, + BaseOffset, HasBaseReg, Scale); } static bool isAlwaysFoldable(const TargetTransformInfo &TTI, ScalarEvolution &SE, int64_t MinOffset, int64_t MaxOffset, LSRUse::KindType Kind, - Type *AccessTy, const SCEV *S, bool HasBaseReg) { + Type *AccessTy, const SCEV *S, + bool HasBaseReg, unsigned AddrSpace) { // Fast-path: zero is always foldable. if (S->isZero()) return true; @@ -1558,8 +1572,8 @@ // base and a scale. int64_t Scale = Kind == LSRUse::ICmpZero ? -1 : 1; - return isAMCompletelyFolded(TTI, MinOffset, MaxOffset, Kind, AccessTy, BaseGV, - BaseOffset, HasBaseReg, Scale); + return isAMCompletelyFolded(TTI, MinOffset, MaxOffset, Kind, AccessTy, + AddrSpace, BaseGV, BaseOffset, HasBaseReg, Scale); } namespace { @@ -1696,11 +1710,13 @@ UseMapTy UseMap; bool reconcileNewOffset(LSRUse &LU, int64_t NewOffset, bool HasBaseReg, - LSRUse::KindType Kind, Type *AccessTy); + LSRUse::KindType Kind, Type *AccessTy, + unsigned AddrSpace); std::pair getUse(const SCEV *&Expr, LSRUse::KindType Kind, - Type *AccessTy); + Type *AccessTy, + unsigned AddrSpace); void DeleteUse(LSRUse &LU, size_t LUIdx); @@ -2152,16 +2168,19 @@ C->getValue().isMinSignedValue()) goto decline_post_inc; // Check for possible scaled-address reuse. - Type *AccessTy = getAccessType(UI->getUser()); + unsigned AddrSpace = ~0u; + Type *AccessTy = getAccessType(UI->getUser(), AddrSpace); int64_t Scale = C->getSExtValue(); if (TTI.isLegalAddressingMode(AccessTy, /*BaseGV=*/ nullptr, /*BaseOffset=*/ 0, - /*HasBaseReg=*/ false, Scale)) + /*HasBaseReg=*/ false, Scale, + AddrSpace)) goto decline_post_inc; Scale = -Scale; if (TTI.isLegalAddressingMode(AccessTy, /*BaseGV=*/ nullptr, /*BaseOffset=*/ 0, - /*HasBaseReg=*/ false, Scale)) + /*HasBaseReg=*/ false, Scale, + AddrSpace)) goto decline_post_inc; } } @@ -2218,10 +2237,12 @@ /// return true. bool LSRInstance::reconcileNewOffset(LSRUse &LU, int64_t NewOffset, bool HasBaseReg, - LSRUse::KindType Kind, Type *AccessTy) { + LSRUse::KindType Kind, Type *AccessTy, + unsigned AddrSpace) { int64_t NewMinOffset = LU.MinOffset; int64_t NewMaxOffset = LU.MaxOffset; Type *NewAccessTy = AccessTy; + unsigned NewAddrSpace = AddrSpace; // Check for a mismatched kind. It's tempting to collapse mismatched kinds to // something conservative, however this can pessimize in the case that one of @@ -2232,18 +2253,27 @@ // Check for a mismatched access type, and fall back conservatively as needed. // TODO: Be less conservative when the type is similar and can use the same // addressing modes. - if (Kind == LSRUse::Address && AccessTy != LU.AccessTy) - NewAccessTy = Type::getVoidTy(AccessTy->getContext()); + if (Kind == LSRUse::Address) { + if (AccessTy != LU.AccessTy) + NewAccessTy = Type::getVoidTy(AccessTy->getContext()); + + if (AddrSpace != LU.AddrSpace) + NewAddrSpace = ~0u; + } + + // Conservatively assume HasBaseReg is true for now. if (NewOffset < LU.MinOffset) { - if (!isAlwaysFoldable(TTI, Kind, NewAccessTy, /*BaseGV=*/nullptr, + if (!isAlwaysFoldable(TTI, Kind, NewAccessTy, NewAddrSpace, + /*BaseGV=*/nullptr, LU.MaxOffset - NewOffset, HasBaseReg)) return false; NewMinOffset = NewOffset; } else if (NewOffset > LU.MaxOffset) { - if (!isAlwaysFoldable(TTI, Kind, NewAccessTy, /*BaseGV=*/nullptr, - NewOffset - LU.MinOffset, HasBaseReg)) + if (!isAlwaysFoldable(TTI, Kind, NewAccessTy, NewAddrSpace, + /*BaseGV=*/nullptr, NewOffset - LU.MinOffset, + HasBaseReg)) return false; NewMaxOffset = NewOffset; } @@ -2252,6 +2282,7 @@ LU.MinOffset = NewMinOffset; LU.MaxOffset = NewMaxOffset; LU.AccessTy = NewAccessTy; + LU.AddrSpace = NewAddrSpace; if (NewOffset != LU.Offsets.back()) LU.Offsets.push_back(NewOffset); return true; @@ -2262,12 +2293,12 @@ /// Either reuse an existing use or create a new one, as needed. std::pair LSRInstance::getUse(const SCEV *&Expr, - LSRUse::KindType Kind, Type *AccessTy) { + LSRUse::KindType Kind, Type *AccessTy, unsigned AddrSpace) { const SCEV *Copy = Expr; int64_t Offset = ExtractImmediate(Expr, SE); // Basic uses can't accept any offset, for example. - if (!isAlwaysFoldable(TTI, Kind, AccessTy, /*BaseGV=*/ nullptr, + if (!isAlwaysFoldable(TTI, Kind, AccessTy, AddrSpace, /*BaseGV=*/ nullptr, Offset, /*HasBaseReg=*/ true)) { Expr = Copy; Offset = 0; @@ -2279,7 +2310,8 @@ // A use already existed with this base. size_t LUIdx = P.first->second; LSRUse &LU = Uses[LUIdx]; - if (reconcileNewOffset(LU, Offset, /*HasBaseReg=*/true, Kind, AccessTy)) + if (reconcileNewOffset(LU, Offset, /*HasBaseReg=*/true, Kind, + AccessTy, AddrSpace)) // Reuse this use. return std::make_pair(LUIdx, Offset); } @@ -2287,7 +2319,7 @@ // Create a new use. size_t LUIdx = Uses.size(); P.first->second = LUIdx; - Uses.push_back(LSRUse(Kind, AccessTy)); + Uses.push_back(LSRUse(Kind, AccessTy, AddrSpace)); LSRUse &LU = Uses[LUIdx]; // We don't need to track redundant offsets, but we don't need to go out @@ -2831,9 +2863,11 @@ if (IncConst->getValue()->getValue().getMinSignedBits() > 64) return false; + unsigned AddrSpace = ~0u; + Type *AccessTy = getAccessType(UserInst, AddrSpace); int64_t IncOffset = IncConst->getValue()->getSExtValue(); if (!isAlwaysFoldable(TTI, LSRUse::Address, - getAccessType(UserInst), /*BaseGV=*/ nullptr, + AccessTy, AddrSpace, /*BaseGV=*/ nullptr, IncOffset, /*HaseBaseReg=*/ false)) return false; @@ -2962,9 +2996,10 @@ LSRUse::KindType Kind = LSRUse::Basic; Type *AccessTy = nullptr; + unsigned AddrSpace = ~0u; if (isAddressUse(LF.UserInst, LF.OperandValToReplace)) { Kind = LSRUse::Address; - AccessTy = getAccessType(LF.UserInst); + AccessTy = getAccessType(LF.UserInst, AddrSpace); } const SCEV *S = IU.getExpr(U); @@ -3007,7 +3042,7 @@ } // Set up the initial formula for this use. - std::pair P = getUse(S, Kind, AccessTy); + std::pair P = getUse(S, Kind, AccessTy, AddrSpace); LF.LUIdx = P.first; LF.Offset = P.second; LSRUse &LU = Uses[LF.LUIdx]; @@ -3067,7 +3102,8 @@ /// the list, and return true. Return false otherwise. bool LSRInstance::InsertFormula(LSRUse &LU, unsigned LUIdx, const Formula &F) { // Do not insert formula that we will not be able to expand. - assert(isLegalUse(TTI, LU.MinOffset, LU.MaxOffset, LU.Kind, LU.AccessTy, F) && + assert(isLegalUse(TTI, LU.MinOffset, LU.MaxOffset, LU.Kind, + LU.AccessTy, LU.AddrSpace, F) && "Formula is illegal"); if (!LU.InsertFormula(F)) return false; @@ -3148,7 +3184,8 @@ LSRFixup &LF = getNewFixup(); LF.UserInst = const_cast(UserInst); LF.OperandValToReplace = U; - std::pair P = getUse(S, LSRUse::Basic, nullptr); + std::pair P = getUse(S, LSRUse::Basic, + nullptr, ~0u); LF.LUIdx = P.first; LF.Offset = P.second; LSRUse &LU = Uses[LF.LUIdx]; @@ -3252,7 +3289,7 @@ // Don't pull a constant into a register if the constant could be folded // into an immediate field. if (isAlwaysFoldable(TTI, SE, LU.MinOffset, LU.MaxOffset, LU.Kind, - LU.AccessTy, *J, Base.getNumRegs() > 1)) + LU.AccessTy, *J, Base.getNumRegs() > 1, LU.AddrSpace)) continue; // Collect all operands except *J. @@ -3265,7 +3302,8 @@ // be folded into an immediate field. if (InnerAddOps.size() == 1 && isAlwaysFoldable(TTI, SE, LU.MinOffset, LU.MaxOffset, LU.Kind, - LU.AccessTy, InnerAddOps[0], Base.getNumRegs() > 1)) + LU.AccessTy, InnerAddOps[0], Base.getNumRegs() > 1, + LU.AddrSpace)) continue; const SCEV *InnerSum = SE.getAddExpr(InnerAddOps); @@ -3370,7 +3408,8 @@ return; Formula F = Base; F.BaseGV = GV; - if (!isLegalUse(TTI, LU.MinOffset, LU.MaxOffset, LU.Kind, LU.AccessTy, F)) + if (!isLegalUse(TTI, LU.MinOffset, LU.MaxOffset, LU.Kind, + LU.AccessTy, LU.AddrSpace, F)) return; if (IsScaledReg) F.ScaledReg = G; @@ -3401,7 +3440,7 @@ Formula F = Base; F.BaseOffset = (uint64_t)Base.BaseOffset - Offset; if (isLegalUse(TTI, LU.MinOffset - Offset, LU.MaxOffset - Offset, LU.Kind, - LU.AccessTy, F)) { + LU.AccessTy, LU.AddrSpace, F)) { // Add the offset to the base register. const SCEV *NewG = SE.getAddExpr(SE.getConstant(G->getType(), Offset), G); // If it cancelled out, drop the base register, otherwise update it. @@ -3426,7 +3465,8 @@ return; Formula F = Base; F.BaseOffset = (uint64_t)F.BaseOffset + Imm; - if (!isLegalUse(TTI, LU.MinOffset, LU.MaxOffset, LU.Kind, LU.AccessTy, F)) + if (!isLegalUse(TTI, LU.MinOffset, LU.MaxOffset, LU.Kind, + LU.AccessTy, LU.AddrSpace, F)) return; if (IsScaledReg) F.ScaledReg = G; @@ -3497,7 +3537,7 @@ F.BaseOffset = NewBaseOffset; // Check that this scale is legal. - if (!isLegalUse(TTI, Offset, Offset, LU.Kind, LU.AccessTy, F)) + if (!isLegalUse(TTI, Offset, Offset, LU.Kind, LU.AccessTy, LU.AddrSpace, F)) continue; // Compensate for the use having MinOffset built into it. @@ -3558,12 +3598,12 @@ Base.HasBaseReg = Base.BaseRegs.size() > 1; // Check whether this scale is going to be legal. if (!isLegalUse(TTI, LU.MinOffset, LU.MaxOffset, LU.Kind, LU.AccessTy, - Base)) { + LU.AddrSpace, Base)) { // As a special-case, handle special out-of-loop Basic users specially. // TODO: Reconsider this special case. if (LU.Kind == LSRUse::Basic && isLegalUse(TTI, LU.MinOffset, LU.MaxOffset, LSRUse::Special, - LU.AccessTy, Base) && + LU.AccessTy, LU.AddrSpace, Base) && LU.AllFixupsOutsideLoop) LU.Kind = LSRUse::Special; else @@ -3762,7 +3802,7 @@ Formula NewF = F; NewF.BaseOffset = Offset; if (!isLegalUse(TTI, LU.MinOffset, LU.MaxOffset, LU.Kind, LU.AccessTy, - NewF)) + LU.AddrSpace, NewF)) continue; NewF.ScaledReg = SE.getAddExpr(NegImmS, NewF.ScaledReg); @@ -3788,7 +3828,7 @@ Formula NewF = F; NewF.BaseOffset = (uint64_t)NewF.BaseOffset + Imm; if (!isLegalUse(TTI, LU.MinOffset, LU.MaxOffset, - LU.Kind, LU.AccessTy, NewF)) { + LU.Kind, LU.AccessTy, LU.AddrSpace, NewF)) { if (!TTI.isLegalAddImmediate((uint64_t)NewF.UnfoldedOffset + Imm)) continue; NewF = F; @@ -4066,7 +4106,7 @@ continue; if (!reconcileNewOffset(*LUThatHas, F.BaseOffset, /*HasBaseReg=*/ false, - LU.Kind, LU.AccessTy)) + LU.Kind, LU.AccessTy, LU.AddrSpace)) continue; DEBUG(dbgs() << " Deleting use "; LU.print(dbgs()); dbgs() << '\n'); @@ -4097,7 +4137,8 @@ for (size_t i = 0, e = LUThatHas->Formulae.size(); i != e; ++i) { Formula &F = LUThatHas->Formulae[i]; if (!isLegalUse(TTI, LUThatHas->MinOffset, LUThatHas->MaxOffset, - LUThatHas->Kind, LUThatHas->AccessTy, F)) { + LUThatHas->Kind, LUThatHas->AccessTy, + LUThatHas->AddrSpace, F)) { DEBUG(dbgs() << " Deleting "; F.print(dbgs()); dbgs() << '\n'); LUThatHas->DeleteFormula(F); @@ -4874,7 +4915,7 @@ for (const LSRUse &LU : Uses) { for (const Formula &F : LU.Formulae) assert(isLegalUse(TTI, LU.MinOffset, LU.MaxOffset, LU.Kind, LU.AccessTy, - F) && "Illegal formula generated!"); + LU.AddrSpace, F) && "Illegal formula generated!"); }; #endif Index: test/Transforms/LoopStrengthReduce/R600/different-addrspace-addressing-mode-loops.ll =================================================================== --- /dev/null +++ test/Transforms/LoopStrengthReduce/R600/different-addrspace-addressing-mode-loops.ll @@ -0,0 +1,156 @@ +; RUN: opt -S -mtriple=amdgcn-- -mcpu=bonaire -loop-reduce < %s | FileCheck -check-prefix=OPT %s + +; Test that loops with different maximum offsets for different address +; spaces are correctly handled. + +target datalayout = "e-p:32:32-p1:64:64-p2:64:64-p3:32:32-p4:64:64-p5:32:32-p24:64:64-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64" + +; OPT-LABEL: @test_global_addressing_loop_uniform_index_max_offset_i32( +; OPT: {{^}}.lr.ph: +; OPT: %lsr.iv2 = phi i8 addrspace(1)* [ %scevgep3, %.lr.ph ], [ %arg1, %.lr.ph.preheader ] +; OPT: %scevgep4 = getelementptr i8, i8 addrspace(1)* %lsr.iv2, i64 4095 +; OPT: load i8, i8 addrspace(1)* %scevgep4, align 1 +define void @test_global_addressing_loop_uniform_index_max_offset_i32(i32 addrspace(1)* noalias nocapture %arg0, i8 addrspace(1)* noalias nocapture readonly %arg1, i32 %n) #0 { +bb: + %tmp = icmp sgt i32 %n, 0 + br i1 %tmp, label %.lr.ph.preheader, label %._crit_edge + +.lr.ph.preheader: ; preds = %bb + br label %.lr.ph + +._crit_edge.loopexit: ; preds = %.lr.ph + br label %._crit_edge + +._crit_edge: ; preds = %._crit_edge.loopexit, %bb + ret void + +.lr.ph: ; preds = %.lr.ph, %.lr.ph.preheader + %indvars.iv = phi i64 [ %indvars.iv.next, %.lr.ph ], [ 0, %.lr.ph.preheader ] + %tmp1 = add nuw nsw i64 %indvars.iv, 4095 + %tmp2 = getelementptr inbounds i8, i8 addrspace(1)* %arg1, i64 %tmp1 + %tmp3 = load i8, i8 addrspace(1)* %tmp2, align 1 + %tmp4 = sext i8 %tmp3 to i32 + %tmp5 = getelementptr inbounds i32, i32 addrspace(1)* %arg0, i64 %indvars.iv + %tmp6 = load i32, i32 addrspace(1)* %tmp5, align 4 + %tmp7 = add nsw i32 %tmp6, %tmp4 + store i32 %tmp7, i32 addrspace(1)* %tmp5, align 4 + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + %lftr.wideiv = trunc i64 %indvars.iv.next to i32 + %exitcond = icmp eq i32 %lftr.wideiv, %n + br i1 %exitcond, label %._crit_edge.loopexit, label %.lr.ph +} + +; OPT-LABEL: @test_global_addressing_loop_uniform_index_max_offset_p1_i32( +; OPT: {{^}}.lr.ph.preheader: +; OPT: %scevgep2 = getelementptr i8, i8 addrspace(1)* %arg1, i64 4096 +; OPT: br label %.lr.ph + +; OPT: {{^}}.lr.ph: +; OPT: %lsr.iv3 = phi i8 addrspace(1)* [ %scevgep4, %.lr.ph ], [ %scevgep2, %.lr.ph.preheader ] +; OPT: %scevgep4 = getelementptr i8, i8 addrspace(1)* %lsr.iv3, i64 1 +define void @test_global_addressing_loop_uniform_index_max_offset_p1_i32(i32 addrspace(1)* noalias nocapture %arg0, i8 addrspace(1)* noalias nocapture readonly %arg1, i32 %n) #0 { +bb: + %tmp = icmp sgt i32 %n, 0 + br i1 %tmp, label %.lr.ph.preheader, label %._crit_edge + +.lr.ph.preheader: ; preds = %bb + br label %.lr.ph + +._crit_edge.loopexit: ; preds = %.lr.ph + br label %._crit_edge + +._crit_edge: ; preds = %._crit_edge.loopexit, %bb + ret void + +.lr.ph: ; preds = %.lr.ph, %.lr.ph.preheader + %indvars.iv = phi i64 [ %indvars.iv.next, %.lr.ph ], [ 0, %.lr.ph.preheader ] + %tmp1 = add nuw nsw i64 %indvars.iv, 4096 + %tmp2 = getelementptr inbounds i8, i8 addrspace(1)* %arg1, i64 %tmp1 + %tmp3 = load i8, i8 addrspace(1)* %tmp2, align 1 + %tmp4 = sext i8 %tmp3 to i32 + %tmp5 = getelementptr inbounds i32, i32 addrspace(1)* %arg0, i64 %indvars.iv + %tmp6 = load i32, i32 addrspace(1)* %tmp5, align 4 + %tmp7 = add nsw i32 %tmp6, %tmp4 + store i32 %tmp7, i32 addrspace(1)* %tmp5, align 4 + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + %lftr.wideiv = trunc i64 %indvars.iv.next to i32 + %exitcond = icmp eq i32 %lftr.wideiv, %n + br i1 %exitcond, label %._crit_edge.loopexit, label %.lr.ph +} + +; OPT-LABEL: @test_local_addressing_loop_uniform_index_max_offset_i32( +; OPT: {{^}}.lr.ph +; OPT: %lsr.iv2 = phi i8 addrspace(3)* [ %scevgep3, %.lr.ph ], [ %arg1, %.lr.ph.preheader ] +; OPT: %scevgep4 = getelementptr i8, i8 addrspace(3)* %lsr.iv2, i32 65535 +; OPT: %tmp4 = load i8, i8 addrspace(3)* %scevgep4, align 1 +define void @test_local_addressing_loop_uniform_index_max_offset_i32(i32 addrspace(1)* noalias nocapture %arg0, i8 addrspace(3)* noalias nocapture readonly %arg1, i32 %n) #0 { +bb: + %tmp = icmp sgt i32 %n, 0 + br i1 %tmp, label %.lr.ph.preheader, label %._crit_edge + +.lr.ph.preheader: ; preds = %bb + br label %.lr.ph + +._crit_edge.loopexit: ; preds = %.lr.ph + br label %._crit_edge + +._crit_edge: ; preds = %._crit_edge.loopexit, %bb + ret void + +.lr.ph: ; preds = %.lr.ph, %.lr.ph.preheader + %indvars.iv = phi i64 [ %indvars.iv.next, %.lr.ph ], [ 0, %.lr.ph.preheader ] + %tmp1 = add nuw nsw i64 %indvars.iv, 65535 + %tmp2 = trunc i64 %tmp1 to i32 + %tmp3 = getelementptr inbounds i8, i8 addrspace(3)* %arg1, i32 %tmp2 + %tmp4 = load i8, i8 addrspace(3)* %tmp3, align 1 + %tmp5 = sext i8 %tmp4 to i32 + %tmp6 = getelementptr inbounds i32, i32 addrspace(1)* %arg0, i64 %indvars.iv + %tmp7 = load i32, i32 addrspace(1)* %tmp6, align 4 + %tmp8 = add nsw i32 %tmp7, %tmp5 + store i32 %tmp8, i32 addrspace(1)* %tmp6, align 4 + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + %lftr.wideiv = trunc i64 %indvars.iv.next to i32 + %exitcond = icmp eq i32 %lftr.wideiv, %n + br i1 %exitcond, label %._crit_edge.loopexit, label %.lr.ph +} + +; OPT-LABEL: @test_local_addressing_loop_uniform_index_max_offset_p1_i32( +; OPT: {{^}}.lr.ph.preheader: +; OPT: %scevgep2 = getelementptr i8, i8 addrspace(3)* %arg1, i32 65536 +; OPT: br label %.lr.ph + +; OPT: {{^}}.lr.ph: +; OPT: %lsr.iv3 = phi i8 addrspace(3)* [ %scevgep4, %.lr.ph ], [ %scevgep2, %.lr.ph.preheader ] +; OPT: %scevgep4 = getelementptr i8, i8 addrspace(3)* %lsr.iv3, i32 1 +define void @test_local_addressing_loop_uniform_index_max_offset_p1_i32(i32 addrspace(1)* noalias nocapture %arg0, i8 addrspace(3)* noalias nocapture readonly %arg1, i32 %n) #0 { +bb: + %tmp = icmp sgt i32 %n, 0 + br i1 %tmp, label %.lr.ph.preheader, label %._crit_edge + +.lr.ph.preheader: ; preds = %bb + br label %.lr.ph + +._crit_edge.loopexit: ; preds = %.lr.ph + br label %._crit_edge + +._crit_edge: ; preds = %._crit_edge.loopexit, %bb + ret void + +.lr.ph: ; preds = %.lr.ph, %.lr.ph.preheader + %indvars.iv = phi i64 [ %indvars.iv.next, %.lr.ph ], [ 0, %.lr.ph.preheader ] + %tmp1 = add nuw nsw i64 %indvars.iv, 65536 + %tmp2 = trunc i64 %tmp1 to i32 + %tmp3 = getelementptr inbounds i8, i8 addrspace(3)* %arg1, i32 %tmp2 + %tmp4 = load i8, i8 addrspace(3)* %tmp3, align 1 + %tmp5 = sext i8 %tmp4 to i32 + %tmp6 = getelementptr inbounds i32, i32 addrspace(1)* %arg0, i64 %indvars.iv + %tmp7 = load i32, i32 addrspace(1)* %tmp6, align 4 + %tmp8 = add nsw i32 %tmp7, %tmp5 + store i32 %tmp8, i32 addrspace(1)* %tmp6, align 4 + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + %lftr.wideiv = trunc i64 %indvars.iv.next to i32 + %exitcond = icmp eq i32 %lftr.wideiv, %n + br i1 %exitcond, label %._crit_edge.loopexit, label %.lr.ph +} + +attributes #0 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="hawaii" "unsafe-fp-math"="false" "use-soft-float"="false" } Index: test/Transforms/LoopStrengthReduce/R600/lit.local.cfg =================================================================== --- /dev/null +++ test/Transforms/LoopStrengthReduce/R600/lit.local.cfg @@ -0,0 +1,3 @@ +if not 'R600' in config.root.targets: + config.unsupported = True + Index: test/Transforms/LoopStrengthReduce/R600/lsr-postinc-pos-addrspace.ll =================================================================== --- /dev/null +++ test/Transforms/LoopStrengthReduce/R600/lsr-postinc-pos-addrspace.ll @@ -0,0 +1,113 @@ +; RUN: llc -march=amdgcn -mcpu=bonaire -print-lsr-output < %s 2>&1 | FileCheck %s + +; Test various conditions where OptimizeLoopTermCond doesn't look at a +; memory instruction use and fails to find the address space. + +target datalayout = "e-p:32:32-p1:64:64-p2:64:64-p3:32:32-p4:64:64-p5:32:32-p24:64:64-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64" + +; CHECK-LABEL: @local_cmp_user( +; CHECK: bb11: +; CHECK: %lsr.iv1 = phi i32 [ %lsr.iv.next2, %bb ], [ -2, %entry ] +; CHECK: %lsr.iv = phi i32 [ %lsr.iv.next, %bb ], [ undef, %entry ] + +; CHECK: bb: +; CHECK: %lsr.iv.next = add i32 %lsr.iv, -1 +; CHECK: %lsr.iv.next2 = add i32 %lsr.iv1, 2 +; CHECK: %scevgep = getelementptr i8, i8 addrspace(3)* %t, i32 %lsr.iv.next2 +; CHECK: %c1 = icmp ult i8 addrspace(3)* %scevgep, undef +define void @local_cmp_user() nounwind { +entry: + br label %bb11 + +bb11: + %i = phi i32 [ 0, %entry ], [ %i.next, %bb ] + %ii = shl i32 %i, 1 + %c0 = icmp eq i32 %i, undef + br i1 %c0, label %bb13, label %bb + +bb: + %t = load i8 addrspace(3)*, i8 addrspace(3)* addrspace(3)* undef + %p = getelementptr i8, i8 addrspace(3)* %t, i32 %ii + %c1 = icmp ult i8 addrspace(3)* %p, undef + %i.next = add i32 %i, 1 + br i1 %c1, label %bb11, label %bb13 + +bb13: + unreachable +} + +; CHECK-LABEL: @global_cmp_user( +; CHECK: %lsr.iv.next = add i64 %lsr.iv, -1 +; CHECK: %lsr.iv.next2 = add i64 %lsr.iv1, 2 +; CHECK: %scevgep = getelementptr i8, i8 addrspace(1)* %t, i64 %lsr.iv.next2 +define void @global_cmp_user() nounwind { +entry: + br label %bb11 + +bb11: + %i = phi i64 [ 0, %entry ], [ %i.next, %bb ] + %ii = shl i64 %i, 1 + %c0 = icmp eq i64 %i, undef + br i1 %c0, label %bb13, label %bb + +bb: + %t = load i8 addrspace(1)*, i8 addrspace(1)* addrspace(1)* undef + %p = getelementptr i8, i8 addrspace(1)* %t, i64 %ii + %c1 = icmp ult i8 addrspace(1)* %p, undef + %i.next = add i64 %i, 1 + br i1 %c1, label %bb11, label %bb13 + +bb13: + unreachable +} + +; CHECK-LABEL: @global_gep_user( +; CHECK: %p = getelementptr i8, i8 addrspace(1)* %t, i32 %lsr.iv1 +; CHECK: %lsr.iv.next = add i32 %lsr.iv, -1 +; CHECK: %lsr.iv.next2 = add i32 %lsr.iv1, 2 +define void @global_gep_user() nounwind { +entry: + br label %bb11 + +bb11: + %i = phi i32 [ 0, %entry ], [ %i.next, %bb ] + %ii = shl i32 %i, 1 + %c0 = icmp eq i32 %i, undef + br i1 %c0, label %bb13, label %bb + +bb: + %t = load i8 addrspace(1)*, i8 addrspace(1)* addrspace(1)* undef + %p = getelementptr i8, i8 addrspace(1)* %t, i32 %ii + %c1 = icmp ult i8 addrspace(1)* %p, undef + %i.next = add i32 %i, 1 + br i1 %c1, label %bb11, label %bb13 + +bb13: + unreachable +} + +; CHECK-LABEL: @global_sext_scale_user( +; CHECK: %p = getelementptr i8, i8 addrspace(1)* %t, i64 %ii.ext +; CHECK: %lsr.iv.next = add i32 %lsr.iv, -1 +; CHECK: %lsr.iv.next2 = add i32 %lsr.iv1, 2 +define void @global_sext_scale_user() nounwind { +entry: + br label %bb11 + +bb11: + %i = phi i32 [ 0, %entry ], [ %i.next, %bb ] + %ii = shl i32 %i, 1 + %ii.ext = sext i32 %ii to i64 + %c0 = icmp eq i32 %i, undef + br i1 %c0, label %bb13, label %bb + +bb: + %t = load i8 addrspace(1)*, i8 addrspace(1)* addrspace(1)* undef + %p = getelementptr i8, i8 addrspace(1)* %t, i64 %ii.ext + %c1 = icmp ult i8 addrspace(1)* %p, undef + %i.next = add i32 %i, 1 + br i1 %c1, label %bb11, label %bb13 + +bb13: + unreachable +}