Index: include/llvm/Analysis/TargetTransformInfo.h =================================================================== --- include/llvm/Analysis/TargetTransformInfo.h +++ include/llvm/Analysis/TargetTransformInfo.h @@ -523,6 +523,9 @@ /// \return The width of the largest scalar or vector register type. unsigned getRegisterBitWidth(bool Vector) const; + /// \brief Return true if it should be considered for address type promotion. + bool shouldConsiderAddressTypePromotion(Instruction *I) const; + /// \return The size of a cache line in bytes. unsigned getCacheLineSize() const; @@ -797,6 +800,7 @@ Type *Ty) = 0; virtual unsigned getNumberOfRegisters(bool Vector) = 0; virtual unsigned getRegisterBitWidth(bool Vector) = 0; + virtual bool shouldConsiderAddressTypePromotion(Instruction *I) = 0; virtual unsigned getCacheLineSize() = 0; virtual unsigned getPrefetchDistance() = 0; virtual unsigned getMinPrefetchStride() = 0; @@ -1024,6 +1028,10 @@ return Impl.getRegisterBitWidth(Vector); } + bool shouldConsiderAddressTypePromotion(Instruction *I) override { + return Impl.shouldConsiderAddressTypePromotion(I); + } + unsigned getCacheLineSize() override { return Impl.getCacheLineSize(); } Index: include/llvm/Analysis/TargetTransformInfoImpl.h =================================================================== --- include/llvm/Analysis/TargetTransformInfoImpl.h +++ include/llvm/Analysis/TargetTransformInfoImpl.h @@ -303,6 +303,8 @@ unsigned getRegisterBitWidth(bool Vector) { return 32; } + bool shouldConsiderAddressTypePromotion(Instruction *I) { return false; } + unsigned getCacheLineSize() { return 0; } unsigned getPrefetchDistance() { return 0; } Index: lib/Analysis/TargetTransformInfo.cpp =================================================================== --- lib/Analysis/TargetTransformInfo.cpp +++ lib/Analysis/TargetTransformInfo.cpp @@ -269,6 +269,11 @@ return TTIImpl->getRegisterBitWidth(Vector); } +bool TargetTransformInfo::shouldConsiderAddressTypePromotion( + Instruction *I) const { + return TTIImpl->shouldConsiderAddressTypePromotion(I); +} + unsigned TargetTransformInfo::getCacheLineSize() const { return TTIImpl->getCacheLineSize(); } Index: lib/CodeGen/CodeGenPrepare.cpp =================================================================== --- lib/CodeGen/CodeGenPrepare.cpp +++ lib/CodeGen/CodeGenPrepare.cpp @@ -142,6 +142,8 @@ typedef SmallPtrSet SetOfInstrs; typedef PointerIntPair TypeIsSExt; typedef DenseMap InstrToOrigTy; +typedef SmallVector SExts; +typedef DenseMap ValueToSExts; class TypePromotionTransaction; class CodeGenPrepare : public FunctionPass { @@ -170,6 +172,15 @@ /// promotion for the current function. InstrToOrigTy PromotedInsts; + /// Keep track of extensions removed. + SetOfInstrs RemovedExtInsts; + + /// Keep track of SExt chains based on their initial value. + DenseMap SeenChainsForSExt; + + /// Keep track of SExt promoted. + ValueToSExts ValToSExtendedUses; + /// True if CFG is modified in any way. bool ModifiedDT; @@ -211,7 +222,7 @@ Type *AccessTy, unsigned AS); bool optimizeInlineAsmInst(CallInst *CS); bool optimizeCallInst(CallInst *CI, bool& ModifiedDT); - bool moveExtToFormExtLoad(Instruction *&I); + bool optimizeExt(Instruction *&I); bool optimizeExtUses(Instruction *I); bool optimizeLoadExt(LoadInst *I); bool optimizeSelectInst(SelectInst *SI); @@ -226,6 +237,7 @@ const SmallVectorImpl &Exts, SmallVectorImpl &ProfitablyMovedExts, unsigned CreatedInstsCost = 0); + bool mergeSExts(Function &F); bool splitBranchCondition(Function &F); bool simplifyOffsetableRelocate(Instruction &I); bool splitIndirectCriticalEdges(Function &F); @@ -310,6 +322,9 @@ bool MadeChange = true; while (MadeChange) { MadeChange = false; + SeenChainsForSExt.clear(); + ValToSExtendedUses.clear(); + RemovedExtInsts.clear(); for (Function::iterator I = F.begin(); I != F.end(); ) { BasicBlock *BB = &*I++; bool ModifiedDTOnIteration = false; @@ -319,6 +334,9 @@ if (ModifiedDTOnIteration) break; } + if (!ValToSExtendedUses.empty()) + MadeChange |= mergeSExts(F); + EverMadeChange |= MadeChange; } @@ -1020,7 +1038,7 @@ } /// SinkCast - Sink the specified cast instruction into its user blocks -static bool SinkCast(CastInst *CI) { +static bool SinkCast(CastInst *CI, SetOfInstrs &RemovedExtInsts) { BasicBlock *DefBB = CI->getParent(); /// InsertedCasts - Only insert a cast in each block once. @@ -1075,6 +1093,7 @@ // If we removed all uses, nuke the cast. if (CI->use_empty()) { CI->eraseFromParent(); + RemovedExtInsts.insert(CI); MadeChange = true; } @@ -1087,7 +1106,9 @@ /// /// Return true if any changes are made. /// -static bool OptimizeNoopCopyExpression(CastInst *CI, const TargetLowering &TLI, +static bool OptimizeNoopCopyExpression(CastInst *CI, + SetOfInstrs &RemovedExtInsts, + const TargetLowering &TLI, const DataLayout &DL) { // Sink only "cheap" (or nop) address-space casts. This is a weaker condition // than sinking only nop casts, but is helpful on some platforms. @@ -1123,7 +1144,7 @@ if (SrcVT != DstVT) return false; - return SinkCast(CI); + return SinkCast(CI, RemovedExtInsts); } /// Try to combine CI into a call to the llvm.uadd.with.overflow intrinsic if @@ -2793,17 +2814,21 @@ OperandsHider Hider; /// Keep track of the uses replaced, if any. UsesReplacer *Replacer; + /// Keep track of instructions removed. + SetOfInstrs &RemovedExtInsts; public: /// \brief Remove all reference of \p Inst and optinally replace all its /// uses with New. /// \pre If !Inst->use_empty(), then New != nullptr - InstructionRemover(Instruction *Inst, Value *New = nullptr) + InstructionRemover(Instruction *Inst, SetOfInstrs &RemovedExtInsts, + Value *New = nullptr) : TypePromotionAction(Inst), Inserter(Inst), Hider(Inst), - Replacer(nullptr) { + Replacer(nullptr), RemovedExtInsts(RemovedExtInsts) { if (New) Replacer = new UsesReplacer(Inst, New); DEBUG(dbgs() << "Do: InstructionRemover: " << *Inst << "\n"); + RemovedExtInsts.insert(Inst); Inst->removeFromParent(); } @@ -2820,6 +2845,7 @@ if (Replacer) Replacer->undo(); Hider.undo(); + RemovedExtInsts.erase(Inst); } }; @@ -2828,6 +2854,10 @@ /// The restoration point is a pointer to an action instead of an iterator /// because the iterator may be invalidated but not the pointer. typedef const TypePromotionAction *ConstRestorationPt; + + TypePromotionTransaction(SetOfInstrs &RemovedExtInsts) + : RemovedExtInsts(RemovedExtInsts) {} + /// Advocate every changes made in that transaction. void commit(); /// Undo all the changes made after the given point. @@ -2859,6 +2889,7 @@ /// The ordered list of actions made so far. SmallVector, 16> Actions; typedef SmallVectorImpl>::iterator CommitPt; + SetOfInstrs &RemovedExtInsts; }; void TypePromotionTransaction::setOperand(Instruction *Inst, unsigned Idx, @@ -2869,8 +2900,8 @@ void TypePromotionTransaction::eraseInstruction(Instruction *Inst, Value *NewVal) { - Actions.push_back( - make_unique(Inst, NewVal)); + Actions.push_back(make_unique( + Inst, RemovedExtInsts, NewVal)); } void TypePromotionTransaction::replaceAllUsesWith(Instruction *Inst, @@ -4097,7 +4128,7 @@ bool IsNumUsesConsensusValid = false; SmallVector AddrModeInsts; ExtAddrMode AddrMode; - TypePromotionTransaction TPT; + TypePromotionTransaction TPT(RemovedExtInsts); TypePromotionTransaction::ConstRestorationPt LastKnownGood = TPT.getRestorationPoint(); while (!worklist.empty()) { @@ -4492,20 +4523,6 @@ /// them. /// /// \return true if some promotion happened, false otherwise. -/// -/// Example: -/// \code -/// %ld = load i32* %addr -/// %add = add nuw i32 %ld, 4 -/// %zext = zext i32 %add to i64 -/// \endcode -/// => -/// \code -/// %ld = load i32* %addr -/// %zext = zext i32 %ld to i64 -/// %add = add nuw i64 %zext, 4 -/// \endcode -/// Thanks to the promotion, we can match zext(load i32*) to i64. bool CodeGenPrepare::tryToPromoteExts( TypePromotionTransaction &TPT, const SmallVectorImpl &Exts, SmallVectorImpl &ProfitablyMovedExts, @@ -4601,6 +4618,47 @@ return Promoted; } +bool CodeGenPrepare::mergeSExts(Function &F) { + DominatorTree DT(F); + bool Changed = false; + for (auto &Entry : ValToSExtendedUses) { + SExts &Insts = Entry.second; + SExts CurPts; + for (Instruction *Inst : Insts) { + if (RemovedExtInsts.count(Inst) || !isa(Inst) || + Inst->getOperand(0) != Entry.first) + continue; + + bool inserted = false; + for (auto &Pt : CurPts) { + if (DT.dominates(Inst, Pt)) { + Pt->replaceAllUsesWith(Inst); + RemovedExtInsts.insert(Pt); + Pt->eraseFromParent(); + Pt = Inst; + inserted = true; + Changed = true; + break; + } + if (!DT.dominates(Pt, Inst)) + // Give up if we need to merge in a common dominator as the + // expermients show it is not profitable. + continue; + + Inst->replaceAllUsesWith(Pt); + RemovedExtInsts.insert(Inst); + Inst->eraseFromParent(); + inserted = true; + Changed = true; + break; + } + if (!inserted) + CurPts.push_back(Inst); + } + } + return Changed; +} + /// Return true, if an ext(load) can be formed from an extension in /// \p MovedExts. bool CodeGenPrepare::canFormExtLd( @@ -4646,49 +4704,169 @@ /// Move a zext or sext fed by a load into the same basic block as the load, /// unless conditions are unfavorable. This allows SelectionDAG to fold the /// extend into the load. -/// \p I[in/out] the extension may be modified during the process if some -/// promotions apply. /// -bool CodeGenPrepare::moveExtToFormExtLoad(Instruction *&I) { - // ExtLoad formation infrastructure requires TLI to be effective. +/// E.g., +/// \code +/// %ld = load i32* %addr +/// %add = add nuw i32 %ld, 4 +/// %zext = zext i32 %add to i64 +// \endcode +/// => +/// \code +/// %ld = load i32* %addr +/// %zext = zext i32 %ld to i64 +/// %add = add nuw i64 %zext, 4 +/// \encode +/// Thanks to the code motion, we can match zext(load i32*) to i64. +/// +/// Also, try to promote the computations used to obtain a sign extended +/// value used into memory accesses. +/// E.g., +/// \code +/// a = add nsw i32 b, 3 +/// d = sext i32 a to i64 +/// e = getelementptr ..., i64 d +/// \endcode +/// => +/// \code +/// f = sext i32 b to i64 +/// a = add nsw i64 f, 3 +/// e = getelementptr ..., i64 a +/// \endcode +/// +/// \p Inst[in/out] the extension may be modified during the process if some +/// promotions apply. +bool CodeGenPrepare::optimizeExt(Instruction *&Inst) { + // ExtLoad formation and address type promotion infrastructure requires TLI to + // be effective. if (!TLI) return false; - // Try to promote a chain of computation if it allows to form - // an extended load. - TypePromotionTransaction TPT; + bool Changed = false; + bool IsUsedInComplextGEP = false; + + /// See if it is an interesting sext operations for the address type + /// promotion before trying to promote it, i.e., the ones with the right + /// type and used in memory accesses. + bool ATPConsiderable = TTI->shouldConsiderAddressTypePromotion(Inst); + if (ATPConsiderable) { + for (const User *U : Inst->users()) { + if (const Instruction *I = dyn_cast(U)) + // A getelementptr is considered as "complex" if it has more than 2 + // operands. We will promote a SExt used in such complex GEP as we + // expect some computation to be merged if they are done on 64 bits. + if (I->getNumOperands() > 2) { + IsUsedInComplextGEP = true; + break; + } + } + } + + TypePromotionTransaction TPT(RemovedExtInsts); TypePromotionTransaction::ConstRestorationPt LastKnownGood = TPT.getRestorationPoint(); SmallVector Exts; SmallVector LastMovedExts; - Exts.push_back(I); + Exts.push_back(Inst); bool HasPromoted = tryToPromoteExts(TPT, Exts, LastMovedExts); // Look for a load being extended. LoadInst *LI = nullptr; - Instruction *OldExt = I; - if (!canFormExtLd(LastMovedExts, LI, I, HasPromoted)) { - I = OldExt; + Instruction *ExtFedByLoad; + + // Try to promote a chain of computation if it allows to form + // an extended load. + if (canFormExtLd(LastMovedExts, LI, ExtFedByLoad, HasPromoted)) { + assert(LI && ExtFedByLoad && "Expect a valid load and extension"); + TPT.commit(); + // Move the extend into the same block as the load + ExtFedByLoad->removeFromParent(); + ExtFedByLoad->insertAfter(LI); + // CGP does not check if the zext would be speculatively executed when moved + // to the same basic block as the load. Preserving its original location + // would pessimize the debugging experience, as well as negatively impact + // the quality of sample pgo. We don't want to use "line 0" as that has a + // size cost in the line-table section and logically the zext can be seen as + // part of the load. Therefore we conservatively reuse the same debug + // location for the load and the zext. + ExtFedByLoad->setDebugLoc(LI->getDebugLoc()); + ++NumExtsMoved; + Changed = true; + Inst = ExtFedByLoad; + LastKnownGood = TPT.getRestorationPoint(); + ATPConsiderable = TTI->shouldConsiderAddressTypePromotion(Inst); + } + + if (!ATPConsiderable) { TPT.rollback(LastKnownGood); - return false; + return Changed; } - // Move the extend into the same block as the load, so that SelectionDAG - // can fold it. - TPT.commit(); - I->removeFromParent(); - I->insertAfter(LI); - // CGP does not check if the zext would be speculatively executed when moved - // to the same basic block as the load. Preserving its original location would - // pessimize the debugging experience, as well as negatively impact the - // quality of sample pgo. We don't want to use "line 0" as that has a - // size cost in the line-table section and logically the zext can be seen as - // part of the load. Therefore we conservatively reuse the same debug location - // for the load and the zext. - I->setDebugLoc(LI->getDebugLoc()); - ++NumExtsMoved; - return true; + // Continue promoting SExts in cases : + // 1. SExt is used in a getelementptr with more than 2 operand => + // likely we can merge some computation if they are done on 64 bits. + // 2. The header of the SExt chain is used as a header of other SExt chains => + // code sharing is possible. + SmallPtrSet UnhandledExts; + bool AllSeenFirst = true; + for (auto I : LastMovedExts) { + Value *HeadOfChain = I->getOperand(0); + DenseMap::iterator AlreadySeen = + SeenChainsForSExt.find(HeadOfChain); + + // If there is an unhandled SExt which has the same header, try to promote + // it as well. + if (AlreadySeen != SeenChainsForSExt.end()) { + if (AlreadySeen->second != nullptr) + UnhandledExts.insert(AlreadySeen->second); + AllSeenFirst = false; + } + } + + if (!AllSeenFirst || (IsUsedInComplextGEP && LastMovedExts.size() == 1)) { + TPT.commit(); + if (HasPromoted) + Changed = true; + for (auto I : LastMovedExts) { + Value *HeadOfChain = I->getOperand(0); + SeenChainsForSExt[HeadOfChain] = nullptr; + ValToSExtendedUses[HeadOfChain].push_back(I); + } + // Update Inst as promotion happen. + Inst = LastMovedExts.pop_back_val(); + } else { + // This is the first chain visited from the header, keep the current chain + // as unhandled. + for (auto I : LastMovedExts) { + Value *HeadOfChain = I->getOperand(0); + SeenChainsForSExt[HeadOfChain] = Inst; + } + // Defer to promote this until we encounter another SExt chain derived from + // the same header. + TPT.rollback(LastKnownGood); + } + + if (!AllSeenFirst && !UnhandledExts.empty()) + for (auto VisitedSExt : UnhandledExts) { + if (RemovedExtInsts.count(VisitedSExt)) + continue; + TypePromotionTransaction TPT(RemovedExtInsts); + SmallVector Exts; + SmallVector Chains; + Exts.push_back(VisitedSExt); + bool HasPromoted = tryToPromoteExts(TPT, Exts, Chains); + TPT.commit(); + if (HasPromoted) + Changed = true; + for (auto I : Chains) { + // Mark this as handled. + Value *HeadOfChain = I->getOperand(0); + SeenChainsForSExt[HeadOfChain] = nullptr; + ValToSExtendedUses[HeadOfChain].push_back(I); + } + } + return Changed; } bool CodeGenPrepare::optimizeExtUses(Instruction *I) { @@ -5790,7 +5968,7 @@ if (isa(CI->getOperand(0))) return false; - if (TLI && OptimizeNoopCopyExpression(CI, *TLI, *DL)) + if (TLI && OptimizeNoopCopyExpression(CI, RemovedExtInsts, *TLI, *DL)) return true; if (isa(I) || isa(I)) { @@ -5800,9 +5978,9 @@ TLI->getTypeAction(CI->getContext(), TLI->getValueType(*DL, CI->getType())) == TargetLowering::TypeExpandInteger) { - return SinkCast(CI); + return SinkCast(CI, RemovedExtInsts); } else { - bool MadeChange = moveExtToFormExtLoad(I); + bool MadeChange = optimizeExt(I); return MadeChange | optimizeExtUses(I); } } Index: lib/Target/AArch64/AArch64TargetMachine.cpp =================================================================== --- lib/Target/AArch64/AArch64TargetMachine.cpp +++ lib/Target/AArch64/AArch64TargetMachine.cpp @@ -118,7 +118,7 @@ static cl::opt EnableAddressTypePromotion("aarch64-enable-type-promotion", cl::Hidden, cl::desc("Enable the type promotion pass"), - cl::init(true)); + cl::init(false)); static cl::opt EnableGEPOpt("aarch64-enable-gep-opt", cl::Hidden, Index: lib/Target/AArch64/AArch64TargetTransformInfo.h =================================================================== --- lib/Target/AArch64/AArch64TargetTransformInfo.h +++ lib/Target/AArch64/AArch64TargetTransformInfo.h @@ -121,6 +121,8 @@ ArrayRef Indices, unsigned Alignment, unsigned AddressSpace); + bool shouldConsiderAddressTypePromotion(Instruction *I); + unsigned getCacheLineSize(); unsigned getPrefetchDistance(); Index: lib/Target/AArch64/AArch64TargetTransformInfo.cpp =================================================================== --- lib/Target/AArch64/AArch64TargetTransformInfo.cpp +++ lib/Target/AArch64/AArch64TargetTransformInfo.cpp @@ -630,6 +630,19 @@ return true; } +bool AArch64TTIImpl::shouldConsiderAddressTypePromotion(Instruction *I) { + if (!isa(I)) + return false; + Type *ConsideredSExtType = + Type::getInt64Ty(I->getParent()->getParent()->getContext()); + if (I->getType() != ConsideredSExtType) + return false; + for (const User *U : I->users()) + if (isa(U)) + return true; + return false; +} + unsigned AArch64TTIImpl::getCacheLineSize() { return ST->getCacheLineSize(); } Index: test/CodeGen/AArch64/aarch64-codegen-prepare-atp.ll =================================================================== --- /dev/null +++ test/CodeGen/AArch64/aarch64-codegen-prepare-atp.ll @@ -0,0 +1,68 @@ +; RUN: opt -codegenprepare < %s -S | FileCheck %s + +target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128" +target triple = "aarch64--linux-gnu" + +%struct.match_state = type { i64, i64 } + +; %add is also promoted by forking an extra sext. +define void @promoteTwoOne(i32 %i, i32 %j, i64* %P1, i64* %P2 ) { +; CHECK-LABEL: @promoteTwoOne +; CHECK-LABEL: entry: +; CHECK: %[[SEXT1:.*]] = sext i32 %i to i64 +; CHECK: %[[SEXT2:.*]] = sext i32 %j to i64 +; CHECK: %add = add nsw i64 %[[SEXT1]], %[[SEXT2]] +entry: + %add = add nsw i32 %i, %j + %s = sext i32 %add to i64 + %addr1 = getelementptr inbounds i64, i64* %P1, i64 %s + store i64 %s, i64* %addr1 + %s2 = sext i32 %i to i64 + %addr2 = getelementptr inbounds i64, i64* %P2, i64 %s2 + store i64 %s2, i64* %addr2 + ret void +} + +; Both %add1 and %add2 are promoted by forking extra sexts. +define void @promoteTwoTwo(i32 %i, i32 %j, i32 %k, i64* %P1, i64* %P2) { +; CHECK-LABEL: @promoteTwoTwo +; CHECK-LABEL:entry: +; CHECK: %[[SEXT1:.*]] = sext i32 %j to i64 +; CHECK: %[[SEXT2:.*]] = sext i32 %i to i64 +; CHECK: %add1 = add nsw i64 %[[SEXT1]], %[[SEXT2]] +; CHECK: %[[SEXT3:.*]] = sext i32 %k to i64 +; CHECK: %add2 = add nsw i64 %[[SEXT1]], %[[SEXT3]] +entry: + %add1 = add nsw i32 %j, %i + %s = sext i32 %add1 to i64 + %addr1 = getelementptr inbounds i64, i64* %P1, i64 %s + store i64 %s, i64* %addr1 + %add2 = add nsw i32 %j, %k + %s2 = sext i32 %add2 to i64 + %addr2 = getelementptr inbounds i64, i64* %P2, i64 %s2 + store i64 %s2, i64* %addr2 + ret void +} + +define i64 @promoteGEPSunk(i1 %cond, i64* %base, i32 %i) { +; CHECK-LABEL: @promoteGEPSunk +; CHECK-LABEL: entry: +; CHECK: %[[SEXT:.*]] = sext i32 %i to i64 +; CHECK: %add = add nsw i64 %[[SEXT]], 1 +; CHECK: %add2 = add nsw i64 %[[SEXT]], 2 +entry: + %add = add nsw i32 %i, 1 + %s = sext i32 %add to i64 + %addr = getelementptr inbounds i64, i64* %base, i64 %s + %add2 = add nsw i32 %i, 2 + %s2 = sext i32 %add2 to i64 + %addr2 = getelementptr inbounds i64, i64* %base, i64 %s2 + br i1 %cond, label %if.then, label %if.then2 +if.then: + %v = load i64, i64* %addr + %v2 = load i64, i64* %addr2 + %r = add i64 %v, %v2 + ret i64 %r +if.then2: + ret i64 0; +} Index: test/CodeGen/AArch64/arm64-addr-type-promotion.ll =================================================================== --- test/CodeGen/AArch64/arm64-addr-type-promotion.ll +++ test/CodeGen/AArch64/arm64-addr-type-promotion.ll @@ -10,14 +10,17 @@ ; CHECK: fullGtU ; CHECK: adrp [[PAGE:x[0-9]+]], _block@GOTPAGE ; CHECK: ldr [[ADDR:x[0-9]+]], {{\[}}[[PAGE]], _block@GOTPAGEOFF] +; CHECK: sxtw [[I1:x[0-9]+]], w0 +; CHECK: sxtw [[I2:x[0-9]+]], w1 ; CHECK-NEXT: ldr [[BLOCKBASE:x[0-9]+]], {{\[}}[[ADDR]]] -; CHECK-NEXT: ldrb [[BLOCKVAL1:w[0-9]+]], {{\[}}[[BLOCKBASE]], w0, sxtw] -; CHECK-NEXT: ldrb [[BLOCKVAL2:w[0-9]+]], {{\[}}[[BLOCKBASE]], w1, sxtw] +; CHECK-NEXT: ldrb [[BLOCKVAL1:w[0-9]+]], {{\[}}[[BLOCKBASE]], [[I1]]] +; CHECK-NEXT: ldrb [[BLOCKVAL2:w[0-9]+]], {{\[}}[[BLOCKBASE]], [[I2]]] + ; CHECK-NEXT: cmp [[BLOCKVAL1]], [[BLOCKVAL2]] ; CHECK-NEXT: b.ne ; Next BB -; CHECK: add [[BLOCKBASE2:x[0-9]+]], [[BLOCKBASE]], w1, sxtw -; CHECK-NEXT: add [[BLOCKBASE1:x[0-9]+]], [[BLOCKBASE]], w0, sxtw +; CHECK: add [[BLOCKBASE2:x[0-9]+]], [[BLOCKBASE]], [[I2]] +; CHECK-NEXT: add [[BLOCKBASE1:x[0-9]+]], [[BLOCKBASE]], [[I1]] ; CHECK-NEXT: ldrb [[LOADEDVAL1:w[0-9]+]], {{\[}}[[BLOCKBASE1]], #1] ; CHECK-NEXT: ldrb [[LOADEDVAL2:w[0-9]+]], {{\[}}[[BLOCKBASE2]], #1] ; CHECK-NEXT: cmp [[LOADEDVAL1]], [[LOADEDVAL2]]