Index: include/llvm/Analysis/TargetTransformInfo.h =================================================================== --- include/llvm/Analysis/TargetTransformInfo.h +++ include/llvm/Analysis/TargetTransformInfo.h @@ -526,6 +526,12 @@ /// \return The width of the largest scalar or vector register type. unsigned getRegisterBitWidth(bool Vector) const; + /// \return True if it should be considered for address type promotion. + /// \p AllowPromotionWithoutCommonHeader Set true if promoting \p I is + /// profitable without finding other extensions fed by the same input. + bool shouldConsiderAddressTypePromotion( + const Instruction &I, bool &AllowPromotionWithoutCommonHeader) const; + /// \return The size of a cache line in bytes. unsigned getCacheLineSize() const; @@ -800,6 +806,8 @@ Type *Ty) = 0; virtual unsigned getNumberOfRegisters(bool Vector) = 0; virtual unsigned getRegisterBitWidth(bool Vector) = 0; + virtual bool shouldConsiderAddressTypePromotion( + const Instruction &I, bool &AllowPromotionWithoutCommonHeader) = 0; virtual unsigned getCacheLineSize() = 0; virtual unsigned getPrefetchDistance() = 0; virtual unsigned getMinPrefetchStride() = 0; @@ -1026,7 +1034,11 @@ unsigned getRegisterBitWidth(bool Vector) override { return Impl.getRegisterBitWidth(Vector); } - + bool shouldConsiderAddressTypePromotion( + const Instruction &I, bool &AllowPromotionWithoutCommonHeader) override { + return Impl.shouldConsiderAddressTypePromotion( + I, AllowPromotionWithoutCommonHeader); + } unsigned getCacheLineSize() override { return Impl.getCacheLineSize(); } Index: include/llvm/Analysis/TargetTransformInfoImpl.h =================================================================== --- include/llvm/Analysis/TargetTransformInfoImpl.h +++ include/llvm/Analysis/TargetTransformInfoImpl.h @@ -303,6 +303,13 @@ unsigned getRegisterBitWidth(bool Vector) { return 32; } + bool + shouldConsiderAddressTypePromotion(const Instruction &I, + bool &AllowPromotionWithoutCommonHeader) { + AllowPromotionWithoutCommonHeader = false; + return false; + } + unsigned getCacheLineSize() { return 0; } unsigned getPrefetchDistance() { return 0; } Index: lib/Analysis/TargetTransformInfo.cpp =================================================================== --- lib/Analysis/TargetTransformInfo.cpp +++ lib/Analysis/TargetTransformInfo.cpp @@ -269,6 +269,12 @@ return TTIImpl->getRegisterBitWidth(Vector); } +bool TargetTransformInfo::shouldConsiderAddressTypePromotion( + const Instruction &I, bool &AllowPromotionWithoutCommonHeader) const { + return TTIImpl->shouldConsiderAddressTypePromotion( + I, AllowPromotionWithoutCommonHeader); +} + unsigned TargetTransformInfo::getCacheLineSize() const { return TTIImpl->getCacheLineSize(); } Index: lib/CodeGen/CodeGenPrepare.cpp =================================================================== --- lib/CodeGen/CodeGenPrepare.cpp +++ lib/CodeGen/CodeGenPrepare.cpp @@ -138,10 +138,17 @@ "force-split-store", cl::Hidden, cl::init(false), cl::desc("Force store splitting no matter what the target query says.")); +static cl::opt +EnableTypePromotionMerge("cgp-type-promotion-merge", cl::Hidden, + cl::desc("Enable merging of redundant sexts when one is dominating" + " the other."), cl::init(true)); + namespace { typedef SmallPtrSet SetOfInstrs; typedef PointerIntPair TypeIsSExt; typedef DenseMap InstrToOrigTy; +typedef SmallVector SExts; +typedef DenseMap ValueToSExts; class TypePromotionTransaction; class CodeGenPrepare : public FunctionPass { @@ -170,6 +177,15 @@ /// promotion for the current function. InstrToOrigTy PromotedInsts; + /// Keep track of instructions removed during promotion. + SetOfInstrs RemovedInsts; + + /// Keep track of sext chains based on their initial value. + DenseMap SeenChainsForSExt; + + /// Keep track of SExt promoted. + ValueToSExts ValToSExtendedUses; + /// True if CFG is modified in any way. bool ModifiedDT; @@ -211,7 +227,7 @@ Type *AccessTy, unsigned AS); bool optimizeInlineAsmInst(CallInst *CS); bool optimizeCallInst(CallInst *CI, bool& ModifiedDT); - bool moveExtToFormExtLoad(Instruction *&I); + bool optimizeExt(Instruction *&I); bool optimizeExtUses(Instruction *I); bool optimizeLoadExt(LoadInst *I); bool optimizeSelectInst(SelectInst *SI); @@ -226,6 +242,12 @@ const SmallVectorImpl &Exts, SmallVectorImpl &ProfitablyMovedExts, unsigned CreatedInstsCost = 0); + bool mergeSExts(Function &F); + bool performAddressTypePromotion( + Instruction *&Inst, + bool AllowPromotionWithoutCommonHeader, + bool HasPromoted, TypePromotionTransaction &TPT, + SmallVectorImpl &SpeculativelyMovedExts); bool splitBranchCondition(Function &F); bool simplifyOffsetableRelocate(Instruction &I); bool splitIndirectCriticalEdges(Function &F); @@ -310,6 +332,9 @@ bool MadeChange = true; while (MadeChange) { MadeChange = false; + SeenChainsForSExt.clear(); + ValToSExtendedUses.clear(); + RemovedInsts.clear(); for (Function::iterator I = F.begin(); I != F.end(); ) { BasicBlock *BB = &*I++; bool ModifiedDTOnIteration = false; @@ -319,6 +344,13 @@ if (ModifiedDTOnIteration) break; } + if (EnableTypePromotionMerge && !ValToSExtendedUses.empty()) + MadeChange |= mergeSExts(F); + + // Really free removed instructions during promotion. + for (Instruction *I : RemovedInsts) + delete I; + EverMadeChange |= MadeChange; } @@ -2793,25 +2825,30 @@ OperandsHider Hider; /// Keep track of the uses replaced, if any. UsesReplacer *Replacer; + /// Keep track of instructions removed. + SetOfInstrs &RemovedInsts; public: /// \brief Remove all reference of \p Inst and optinally replace all its /// uses with New. + /// \p RemovedInsts Keep track of the instructions removed by this Action. /// \pre If !Inst->use_empty(), then New != nullptr - InstructionRemover(Instruction *Inst, Value *New = nullptr) + InstructionRemover(Instruction *Inst, SetOfInstrs &RemovedInsts, + Value *New = nullptr) : TypePromotionAction(Inst), Inserter(Inst), Hider(Inst), - Replacer(nullptr) { + Replacer(nullptr), RemovedInsts(RemovedInsts) { if (New) Replacer = new UsesReplacer(Inst, New); DEBUG(dbgs() << "Do: InstructionRemover: " << *Inst << "\n"); + RemovedInsts.insert(Inst); + /// The instructions removed here will be freed after completing + /// optimizeBlock() for all blocks as we need to keep track of the + /// removed instructions during promotion. Inst->removeFromParent(); } ~InstructionRemover() override { delete Replacer; } - /// \brief Really remove the instruction. - void commit() override { delete Inst; } - /// \brief Resurrect the instruction and reassign it to the proper uses if /// new value was provided when build this action. void undo() override { @@ -2820,6 +2857,7 @@ if (Replacer) Replacer->undo(); Hider.undo(); + RemovedInsts.erase(Inst); } }; @@ -2828,6 +2866,10 @@ /// The restoration point is a pointer to an action instead of an iterator /// because the iterator may be invalidated but not the pointer. typedef const TypePromotionAction *ConstRestorationPt; + + TypePromotionTransaction(SetOfInstrs &RemovedInsts) + : RemovedInsts(RemovedInsts) {} + /// Advocate every changes made in that transaction. void commit(); /// Undo all the changes made after the given point. @@ -2859,6 +2901,7 @@ /// The ordered list of actions made so far. SmallVector, 16> Actions; typedef SmallVectorImpl>::iterator CommitPt; + SetOfInstrs &RemovedInsts; }; void TypePromotionTransaction::setOperand(Instruction *Inst, unsigned Idx, @@ -2870,7 +2913,8 @@ void TypePromotionTransaction::eraseInstruction(Instruction *Inst, Value *NewVal) { Actions.push_back( - make_unique(Inst, NewVal)); + make_unique(Inst, + RemovedInsts, NewVal)); } void TypePromotionTransaction::replaceAllUsesWith(Instruction *Inst, @@ -4097,7 +4141,7 @@ bool IsNumUsesConsensusValid = false; SmallVector AddrModeInsts; ExtAddrMode AddrMode; - TypePromotionTransaction TPT; + TypePromotionTransaction TPT(RemovedInsts); TypePromotionTransaction::ConstRestorationPt LastKnownGood = TPT.getRestorationPoint(); while (!worklist.empty()) { @@ -4492,20 +4536,6 @@ /// them. /// /// \return true if some promotion happened, false otherwise. -/// -/// Example: -/// \code -/// %ld = load i32* %addr -/// %add = add nuw i32 %ld, 4 -/// %zext = zext i32 %add to i64 -/// \endcode -/// => -/// \code -/// %ld = load i32* %addr -/// %zext = zext i32 %ld to i64 -/// %add = add nuw i64 %zext, 4 -/// \endcode -/// Thanks to the promotion, we can match zext(load i32*) to i64. bool CodeGenPrepare::tryToPromoteExts( TypePromotionTransaction &TPT, const SmallVectorImpl &Exts, SmallVectorImpl &ProfitablyMovedExts, @@ -4601,6 +4631,46 @@ return Promoted; } +/// Merging redundant sexts when one is dominating the other. +bool CodeGenPrepare::mergeSExts(Function &F) { + DominatorTree DT(F); + bool Changed = false; + for (auto &Entry : ValToSExtendedUses) { + SExts &Insts = Entry.second; + SExts CurPts; + for (Instruction *Inst : Insts) { + if (RemovedInsts.count(Inst) || !isa(Inst) || + Inst->getOperand(0) != Entry.first) + continue; + bool inserted = false; + for (auto &Pt : CurPts) { + if (DT.dominates(Inst, Pt)) { + Pt->replaceAllUsesWith(Inst); + RemovedInsts.insert(Pt); + Pt->removeFromParent(); + Pt = Inst; + inserted = true; + Changed = true; + break; + } + if (!DT.dominates(Pt, Inst)) + // Give up if we need to merge in a common dominator as the + // expermients show it is not profitable. + continue; + Inst->replaceAllUsesWith(Pt); + RemovedInsts.insert(Inst); + Inst->removeFromParent(); + inserted = true; + Changed = true; + break; + } + if (!inserted) + CurPts.push_back(Inst); + } + } + return Changed; +} + /// Return true, if an ext(load) can be formed from an extension in /// \p MovedExts. bool CodeGenPrepare::canFormExtLd( @@ -4646,49 +4716,164 @@ /// Move a zext or sext fed by a load into the same basic block as the load, /// unless conditions are unfavorable. This allows SelectionDAG to fold the /// extend into the load. -/// \p I[in/out] the extension may be modified during the process if some -/// promotions apply. /// -bool CodeGenPrepare::moveExtToFormExtLoad(Instruction *&I) { - // ExtLoad formation infrastructure requires TLI to be effective. +/// E.g., +/// \code +/// %ld = load i32* %addr +/// %add = add nuw i32 %ld, 4 +/// %zext = zext i32 %add to i64 +// \endcode +/// => +/// \code +/// %ld = load i32* %addr +/// %zext = zext i32 %ld to i64 +/// %add = add nuw i64 %zext, 4 +/// \encode +/// Note that the promotion in %add to i64 is done in tryToPromoteExts(), which +/// allow us to match zext(load i32*) to i64. +/// +/// Also, try to promote the computations used to obtain a sign extended +/// value used into memory accesses. +/// E.g., +/// \code +/// a = add nsw i32 b, 3 +/// d = sext i32 a to i64 +/// e = getelementptr ..., i64 d +/// \endcode +/// => +/// \code +/// f = sext i32 b to i64 +/// a = add nsw i64 f, 3 +/// e = getelementptr ..., i64 a +/// \endcode +/// +/// \p Inst[in/out] the extension may be modified during the process if some +/// promotions apply. +bool CodeGenPrepare::optimizeExt(Instruction *&Inst) { + // ExtLoad formation and address type promotion infrastructure requires TLI to + // be effective. if (!TLI) return false; - // Try to promote a chain of computation if it allows to form - // an extended load. - TypePromotionTransaction TPT; + bool AllowPromotionWithoutCommonHeader = false; + /// See if it is an interesting sext operations for the address type + /// promotion before trying to promote it, e.g., the ones with the right + /// type and used in memory accesses. + bool ATPConsiderable = TTI->shouldConsiderAddressTypePromotion( + *Inst, AllowPromotionWithoutCommonHeader); + TypePromotionTransaction TPT(RemovedInsts); TypePromotionTransaction::ConstRestorationPt LastKnownGood = TPT.getRestorationPoint(); SmallVector Exts; - SmallVector LastMovedExts; - Exts.push_back(I); + SmallVector SpeculativelyMovedExts; + Exts.push_back(Inst); - bool HasPromoted = tryToPromoteExts(TPT, Exts, LastMovedExts); + bool HasPromoted = tryToPromoteExts(TPT, Exts, SpeculativelyMovedExts); // Look for a load being extended. LoadInst *LI = nullptr; - Instruction *OldExt = I; - if (!canFormExtLd(LastMovedExts, LI, I, HasPromoted)) { - I = OldExt; + Instruction *ExtFedByLoad; + + // Try to promote a chain of computation if it allows to form an extended + // load. + if (canFormExtLd(SpeculativelyMovedExts, LI, ExtFedByLoad, HasPromoted)) { + assert(LI && ExtFedByLoad && "Expect a valid load and extension"); + TPT.commit(); + // Move the extend into the same block as the load + ExtFedByLoad->removeFromParent(); + ExtFedByLoad->insertAfter(LI); + // CGP does not check if the zext would be speculatively executed when moved + // to the same basic block as the load. Preserving its original location + // would pessimize the debugging experience, as well as negatively impact + // the quality of sample pgo. We don't want to use "line 0" as that has a + // size cost in the line-table section and logically the zext can be seen as + // part of the load. Therefore we conservatively reuse the same debug + // location for the load and the zext. + ExtFedByLoad->setDebugLoc(LI->getDebugLoc()); + ++NumExtsMoved; + Inst = ExtFedByLoad; + return true; + } + + // Continue promoting SExts if known as considerable depending on targets. + if (ATPConsiderable && + performAddressTypePromotion(Inst, AllowPromotionWithoutCommonHeader, + HasPromoted, TPT, SpeculativelyMovedExts)) + return true; + else { TPT.rollback(LastKnownGood); return false; } +} - // Move the extend into the same block as the load, so that SelectionDAG - // can fold it. - TPT.commit(); - I->removeFromParent(); - I->insertAfter(LI); - // CGP does not check if the zext would be speculatively executed when moved - // to the same basic block as the load. Preserving its original location would - // pessimize the debugging experience, as well as negatively impact the - // quality of sample pgo. We don't want to use "line 0" as that has a - // size cost in the line-table section and logically the zext can be seen as - // part of the load. Therefore we conservatively reuse the same debug location - // for the load and the zext. - I->setDebugLoc(LI->getDebugLoc()); - ++NumExtsMoved; - return true; +// Perform address type promotion if doing so is profitable. +// If AllowPromotionWithoutCommonHeader == false, we should find other sext +// instructions that sign extended the same initial value. However, if +// AllowPromotionWithoutCommonHeader == true, we expect promoting the +// extension is just profitable. +bool CodeGenPrepare::performAddressTypePromotion( + Instruction *&Inst, bool AllowPromotionWithoutCommonHeader, + bool HasPromoted, TypePromotionTransaction &TPT, + SmallVectorImpl &SpeculativelyMovedExts) { + bool Promoted = false; + SmallPtrSet UnhandledExts; + bool AllSeenFirst = true; + for (auto I : SpeculativelyMovedExts) { + Value *HeadOfChain = I->getOperand(0); + DenseMap::iterator AlreadySeen = + SeenChainsForSExt.find(HeadOfChain); + // If there is an unhandled SExt which has the same header, try to promote + // it as well. + if (AlreadySeen != SeenChainsForSExt.end()) { + if (AlreadySeen->second != nullptr) + UnhandledExts.insert(AlreadySeen->second); + AllSeenFirst = false; + } + } + + if (!AllSeenFirst || (AllowPromotionWithoutCommonHeader && + SpeculativelyMovedExts.size() == 1)) { + TPT.commit(); + if (HasPromoted) + Promoted = true; + for (auto I : SpeculativelyMovedExts) { + Value *HeadOfChain = I->getOperand(0); + SeenChainsForSExt[HeadOfChain] = nullptr; + ValToSExtendedUses[HeadOfChain].push_back(I); + } + // Update Inst as promotion happen. + Inst = SpeculativelyMovedExts.pop_back_val(); + } else { + // This is the first chain visited from the header, keep the current chain + // as unhandled. Defer to promote this until we encounter another SExt + // chain derived from the same header. + for (auto I : SpeculativelyMovedExts) { + Value *HeadOfChain = I->getOperand(0); + SeenChainsForSExt[HeadOfChain] = Inst; + } + return false; + } + + if (!AllSeenFirst && !UnhandledExts.empty()) + for (auto VisitedSExt : UnhandledExts) { + if (RemovedInsts.count(VisitedSExt)) + continue; + TypePromotionTransaction TPT(RemovedInsts); + SmallVector Exts; + SmallVector Chains; + Exts.push_back(VisitedSExt); + bool HasPromoted = tryToPromoteExts(TPT, Exts, Chains); + TPT.commit(); + if (HasPromoted) + Promoted = true; + for (auto I : Chains) { + Value *HeadOfChain = I->getOperand(0); + // Mark this as handled. + SeenChainsForSExt[HeadOfChain] = nullptr; + ValToSExtendedUses[HeadOfChain].push_back(I); + } + } + return Promoted; } bool CodeGenPrepare::optimizeExtUses(Instruction *I) { @@ -5802,7 +5987,7 @@ TargetLowering::TypeExpandInteger) { return SinkCast(CI); } else { - bool MadeChange = moveExtToFormExtLoad(I); + bool MadeChange = optimizeExt(I); return MadeChange | optimizeExtUses(I); } } Index: lib/Target/AArch64/AArch64TargetMachine.cpp =================================================================== --- lib/Target/AArch64/AArch64TargetMachine.cpp +++ lib/Target/AArch64/AArch64TargetMachine.cpp @@ -118,7 +118,7 @@ static cl::opt EnableAddressTypePromotion("aarch64-enable-type-promotion", cl::Hidden, cl::desc("Enable the type promotion pass"), - cl::init(true)); + cl::init(false)); static cl::opt EnableGEPOpt("aarch64-enable-gep-opt", cl::Hidden, Index: lib/Target/AArch64/AArch64TargetTransformInfo.h =================================================================== --- lib/Target/AArch64/AArch64TargetTransformInfo.h +++ lib/Target/AArch64/AArch64TargetTransformInfo.h @@ -121,6 +121,10 @@ ArrayRef Indices, unsigned Alignment, unsigned AddressSpace); + bool + shouldConsiderAddressTypePromotion(const Instruction &I, + bool &AllowPromotionWithoutCommonHeader); + unsigned getCacheLineSize(); unsigned getPrefetchDistance(); Index: lib/Target/AArch64/AArch64TargetTransformInfo.cpp =================================================================== --- lib/Target/AArch64/AArch64TargetTransformInfo.cpp +++ lib/Target/AArch64/AArch64TargetTransformInfo.cpp @@ -626,6 +626,38 @@ return true; } +/// See if \p I should be considered for address type promotion. We check if \p +/// I is a sext with right type and used in memory accesses. If it used in a +/// "complex" getelementptr, we allow it to be promoted without finding other +/// sext instructions that sign extended the same initial value. A getelementptr +/// is considered as "complex" if it has more than 2 operands. +bool AArch64TTIImpl::shouldConsiderAddressTypePromotion( + const Instruction &I, bool &AllowPromotionWithoutCommonHeader) { + bool Considerable = false; + AllowPromotionWithoutCommonHeader = false; + if (!isa(&I)) + return false; + Type *ConsideredSExtType = + Type::getInt64Ty(I.getParent()->getParent()->getContext()); + if (I.getType() != ConsideredSExtType) + return false; + // See if the sext is the one with the right type and used in at least one + // GetElementPtrInst. + for (const User *U : I.users()) { + if (const GetElementPtrInst *GEPInst = dyn_cast(U)) { + Considerable = true; + // A getelementptr is considered as "complex" if it has more than 2 + // operands. We will promote a SExt used in such complex GEP as we + // expect some computation to be merged if they are done on 64 bits. + if (GEPInst->getNumOperands() > 2) { + AllowPromotionWithoutCommonHeader = true; + break; + } + } + } + return Considerable; +} + unsigned AArch64TTIImpl::getCacheLineSize() { return ST->getCacheLineSize(); } Index: test/CodeGen/AArch64/aarch64-codegen-prepare-atp.ll =================================================================== --- /dev/null +++ test/CodeGen/AArch64/aarch64-codegen-prepare-atp.ll @@ -0,0 +1,68 @@ +; RUN: opt -codegenprepare < %s -S | FileCheck %s + +target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128" +target triple = "aarch64--linux-gnu" + +%struct.match_state = type { i64, i64 } + +; %add is also promoted by forking an extra sext. +define void @promoteTwoOne(i32 %i, i32 %j, i64* %P1, i64* %P2 ) { +; CHECK-LABEL: @promoteTwoOne +; CHECK-LABEL: entry: +; CHECK: %[[SEXT1:.*]] = sext i32 %i to i64 +; CHECK: %[[SEXT2:.*]] = sext i32 %j to i64 +; CHECK: %add = add nsw i64 %[[SEXT1]], %[[SEXT2]] +entry: + %add = add nsw i32 %i, %j + %s = sext i32 %add to i64 + %addr1 = getelementptr inbounds i64, i64* %P1, i64 %s + store i64 %s, i64* %addr1 + %s2 = sext i32 %i to i64 + %addr2 = getelementptr inbounds i64, i64* %P2, i64 %s2 + store i64 %s2, i64* %addr2 + ret void +} + +; Both %add1 and %add2 are promoted by forking extra sexts. +define void @promoteTwoTwo(i32 %i, i32 %j, i32 %k, i64* %P1, i64* %P2) { +; CHECK-LABEL: @promoteTwoTwo +; CHECK-LABEL:entry: +; CHECK: %[[SEXT1:.*]] = sext i32 %j to i64 +; CHECK: %[[SEXT2:.*]] = sext i32 %i to i64 +; CHECK: %add1 = add nsw i64 %[[SEXT1]], %[[SEXT2]] +; CHECK: %[[SEXT3:.*]] = sext i32 %k to i64 +; CHECK: %add2 = add nsw i64 %[[SEXT1]], %[[SEXT3]] +entry: + %add1 = add nsw i32 %j, %i + %s = sext i32 %add1 to i64 + %addr1 = getelementptr inbounds i64, i64* %P1, i64 %s + store i64 %s, i64* %addr1 + %add2 = add nsw i32 %j, %k + %s2 = sext i32 %add2 to i64 + %addr2 = getelementptr inbounds i64, i64* %P2, i64 %s2 + store i64 %s2, i64* %addr2 + ret void +} + +define i64 @promoteGEPSunk(i1 %cond, i64* %base, i32 %i) { +; CHECK-LABEL: @promoteGEPSunk +; CHECK-LABEL: entry: +; CHECK: %[[SEXT:.*]] = sext i32 %i to i64 +; CHECK: %add = add nsw i64 %[[SEXT]], 1 +; CHECK: %add2 = add nsw i64 %[[SEXT]], 2 +entry: + %add = add nsw i32 %i, 1 + %s = sext i32 %add to i64 + %addr = getelementptr inbounds i64, i64* %base, i64 %s + %add2 = add nsw i32 %i, 2 + %s2 = sext i32 %add2 to i64 + %addr2 = getelementptr inbounds i64, i64* %base, i64 %s2 + br i1 %cond, label %if.then, label %if.then2 +if.then: + %v = load i64, i64* %addr + %v2 = load i64, i64* %addr2 + %r = add i64 %v, %v2 + ret i64 %r +if.then2: + ret i64 0; +} Index: test/CodeGen/AArch64/arm64-addr-type-promotion.ll =================================================================== --- test/CodeGen/AArch64/arm64-addr-type-promotion.ll +++ test/CodeGen/AArch64/arm64-addr-type-promotion.ll @@ -10,14 +10,17 @@ ; CHECK: fullGtU ; CHECK: adrp [[PAGE:x[0-9]+]], _block@GOTPAGE ; CHECK: ldr [[ADDR:x[0-9]+]], {{\[}}[[PAGE]], _block@GOTPAGEOFF] +; CHECK: sxtw [[I1:x[0-9]+]], w0 +; CHECK: sxtw [[I2:x[0-9]+]], w1 ; CHECK-NEXT: ldr [[BLOCKBASE:x[0-9]+]], {{\[}}[[ADDR]]] -; CHECK-NEXT: ldrb [[BLOCKVAL1:w[0-9]+]], {{\[}}[[BLOCKBASE]], w0, sxtw] -; CHECK-NEXT: ldrb [[BLOCKVAL2:w[0-9]+]], {{\[}}[[BLOCKBASE]], w1, sxtw] +; CHECK-NEXT: ldrb [[BLOCKVAL1:w[0-9]+]], {{\[}}[[BLOCKBASE]], [[I1]]] +; CHECK-NEXT: ldrb [[BLOCKVAL2:w[0-9]+]], {{\[}}[[BLOCKBASE]], [[I2]]] + ; CHECK-NEXT: cmp [[BLOCKVAL1]], [[BLOCKVAL2]] ; CHECK-NEXT: b.ne ; Next BB -; CHECK: add [[BLOCKBASE2:x[0-9]+]], [[BLOCKBASE]], w1, sxtw -; CHECK-NEXT: add [[BLOCKBASE1:x[0-9]+]], [[BLOCKBASE]], w0, sxtw +; CHECK: add [[BLOCKBASE2:x[0-9]+]], [[BLOCKBASE]], [[I2]] +; CHECK-NEXT: add [[BLOCKBASE1:x[0-9]+]], [[BLOCKBASE]], [[I1]] ; CHECK-NEXT: ldrb [[LOADEDVAL1:w[0-9]+]], {{\[}}[[BLOCKBASE1]], #1] ; CHECK-NEXT: ldrb [[LOADEDVAL2:w[0-9]+]], {{\[}}[[BLOCKBASE2]], #1] ; CHECK-NEXT: cmp [[LOADEDVAL1]], [[LOADEDVAL2]]