Index: lib/CodeGen/CodeGenPrepare.cpp =================================================================== --- lib/CodeGen/CodeGenPrepare.cpp +++ lib/CodeGen/CodeGenPrepare.cpp @@ -212,10 +212,12 @@ bool dupRetToEnableTailCallOpts(BasicBlock *BB); bool placeDbgValues(Function &F); bool sinkAndCmp(Function &F); - bool extLdPromotion(TypePromotionTransaction &TPT, LoadInst *&LI, - Instruction *&Inst, - const SmallVectorImpl &Exts, - unsigned CreatedInstCost); + bool canFormExtLd(const SmallVectorImpl &LastMovedExts, + LoadInst *&LI, Instruction *&Inst, bool HasPromoted); + bool tryToPromoteExts(TypePromotionTransaction &TPT, + const SmallVectorImpl &Exts, + SmallVectorImpl &LastMovedExts, + unsigned CreatedInstsCost = 0); bool splitBranchCondition(Function &F); bool simplifyOffsetableRelocate(Instruction &I); }; @@ -4181,14 +4183,14 @@ return MadeChange; } -/// \brief Check if all the uses of \p Inst are equivalent (or free) zero or +/// \brief Check if all the uses of \p Val are equivalent (or free) zero or /// sign extensions. -static bool hasSameExtUse(Instruction *Inst, const TargetLowering &TLI) { - assert(!Inst->use_empty() && "Input must have at least one use"); - const Instruction *FirstUser = cast(*Inst->user_begin()); +static bool hasSameExtUse(Value *Val, const TargetLowering &TLI) { + assert(!Val->use_empty() && "Input must have at least one use"); + const Instruction *FirstUser = cast(*Val->user_begin()); bool IsSExt = isa(FirstUser); Type *ExtTy = FirstUser->getType(); - for (const User *U : Inst->users()) { + for (const User *U : Val->users()) { const Instruction *UI = cast(U); if ((IsSExt && !isa(UI)) || (!IsSExt && !isa(UI))) return false; @@ -4198,11 +4200,11 @@ continue; // If IsSExt is true, we are in this situation: - // a = Inst + // a = Val // b = sext ty1 a to ty2 // c = sext ty1 a to ty3 // Assuming ty2 is shorter than ty3, this could be turned into: - // a = Inst + // a = Val // b = sext ty1 a to ty2 // c = sext ty2 b to ty3 // However, the last sext is not free. @@ -4229,16 +4231,12 @@ return true; } -/// \brief Try to form ExtLd by promoting \p Exts until they reach a -/// load instruction. -/// If an ext(load) can be formed, it is returned via \p LI for the load -/// and \p Inst for the extension. -/// Otherwise LI == nullptr and Inst == nullptr. -/// When some promotion happened, \p TPT contains the proper state to -/// revert them. +/// \brief Try to promote \p extensions in /p Exts until they reach an +/// unpromotable/unprofitable point. Save the last extensions which reach to an +/// unpromotable/unprofitable point, in \p LastMovedExts. When some promotion +/// happened, \p TPT contains the proper state to revert them. /// -/// \return true when promoting was necessary to expose the ext(load) -/// opportunity, false otherwise. +/// \return true if some promotion happened, false otherwise. /// /// Example: /// \code @@ -4253,27 +4251,33 @@ /// %add = add nuw i64 %zext, 4 /// \encode /// Thanks to the promotion, we can match zext(load i32*) to i64. -bool CodeGenPrepare::extLdPromotion(TypePromotionTransaction &TPT, - LoadInst *&LI, Instruction *&Inst, - const SmallVectorImpl &Exts, - unsigned CreatedInstsCost = 0) { - // Iterate over all the extensions to see if one form an ext(load). +bool CodeGenPrepare::tryToPromoteExts( + TypePromotionTransaction &TPT, const SmallVectorImpl &Exts, + SmallVectorImpl &LastMovedExts, unsigned CreatedInstsCost) { + bool Promoted = false; + + // Iterate over all the extensions to try to promote them. for (auto I : Exts) { - // Check if we directly have ext(load). - if ((LI = dyn_cast(I->getOperand(0)))) { - Inst = I; - // No promotion happened here. - return false; + // Early check if we directly have ext(load). + if (isa(I->getOperand(0))) { + LastMovedExts.push_back(I); + continue; } + // Check whether or not we want to do any promotion. if (!TLI || !TLI->enableExtLdPromotion() || DisableExtLdPromotion) - continue; + return false; + // Get the action to perform the promotion. - TypePromotionHelper::Action TPH = TypePromotionHelper::getAction( - I, InsertedInsts, *TLI, PromotedInsts); + TypePromotionHelper::Action TPH = + TypePromotionHelper::getAction(I, InsertedInsts, *TLI, PromotedInsts); // Check if we can promote. - if (!TPH) + if (!TPH) { + // Save the extension which cannot move up through its operand. + LastMovedExts.push_back(I); continue; + } + // Save the current state. TypePromotionTransaction::ConstRestorationPt LastKnownGood = TPT.getRestorationPoint(); @@ -4297,86 +4301,110 @@ if (!StressExtLdPromotion && (TotalCreatedInstsCost > 1 || !isPromotedInstructionLegal(*TLI, *DL, PromotedVal))) { - // The promotion is not profitable, rollback to the previous state. + // The promotion is not profitable, rollback to the previous state, and + // save the extension as a last extension which reach to an unprofitable + // point. We can move the extension through promotion until this point. TPT.rollback(LastKnownGood); + LastMovedExts.push_back(I); continue; } + // The promotion is profitable. - // Check if it exposes an ext(load). - (void)extLdPromotion(TPT, LI, Inst, NewExts, TotalCreatedInstsCost); - if (LI && (StressExtLdPromotion || NewCreatedInstsCost <= ExtCost || - // If we have created a new extension, i.e., now we have two - // extensions. We must make sure one of them is merged with - // the load, otherwise we may degrade the code quality. - (LI->hasOneUse() || hasSameExtUse(LI, *TLI)))) - // Promotion happened. - return true; - // If this does not help to expose an ext(load) then, rollback. - TPT.rollback(LastKnownGood); + Promoted = true; + + // Continue promoting NewExts until they reach an unpromotable/unprofitable + // instruction. + SmallVector NewlyMovedExts; + (void)tryToPromoteExts(TPT, NewExts, NewlyMovedExts, + TotalCreatedInstsCost); + bool NewPromoted = false; + for (auto ExtInst : NewlyMovedExts) { + Instruction *MovedExt = cast(ExtInst); + Value *ExtOperand = MovedExt->getOperand(0); + if (isa(ExtOperand) && + !(StressExtLdPromotion || NewCreatedInstsCost <= ExtCost || + (ExtOperand->hasOneUse() || hasSameExtUse(ExtOperand, *TLI)))) + continue; + LastMovedExts.push_back(MovedExt); + NewPromoted = true; + } + if (!NewPromoted) { + TPT.rollback(LastKnownGood); + LastMovedExts.push_back(I); + } } - // None of the extension can form an ext(load). - LI = nullptr; - Inst = nullptr; - return false; + return Promoted; } -/// Move a zext or sext fed by a load into the same basic block as the load, -/// unless conditions are unfavorable. This allows SelectionDAG to fold the -/// extend into the load. -/// \p I[in/out] the extension may be modified during the process if some -/// promotions apply. -/// -bool CodeGenPrepare::moveExtToFormExtLoad(Instruction *&I) { - // ExtLoad formation infrastructure requires TLI to be effective. - if (!TLI) - return false; +/// Return true, if an ext(load) can be formed from an extension in +/// \p LastMovedExts. +bool CodeGenPrepare::canFormExtLd( + const SmallVectorImpl &LastMovedExts, LoadInst *&LI, + Instruction *&Inst, bool HasPromoted) { + for (auto *PromotedExtInst : LastMovedExts) { + if (isa(PromotedExtInst->getOperand(0))) { + LI = cast(PromotedExtInst->getOperand(0)); + Inst = PromotedExtInst; + break; + } + } - // Try to promote a chain of computation if it allows to form - // an extended load. - TypePromotionTransaction TPT; - TypePromotionTransaction::ConstRestorationPt LastKnownGood = - TPT.getRestorationPoint(); - SmallVector Exts; - Exts.push_back(I); - // Look for a load being extended. - LoadInst *LI = nullptr; - Instruction *OldExt = I; - bool HasPromoted = extLdPromotion(TPT, LI, I, Exts); - if (!LI || !I) { - assert(!HasPromoted && !LI && "If we did not match any load instruction " - "the code must remain the same"); - I = OldExt; + if (!LI) return false; - } // If they're already in the same block, there's nothing to do. // Make the cheap checks first if we did not promote. // If we promoted, we need to check if it is indeed profitable. - if (!HasPromoted && LI->getParent() == I->getParent()) + if (!HasPromoted && LI->getParent() == Inst->getParent()) return false; - EVT VT = TLI->getValueType(*DL, I->getType()); + EVT VT = TLI->getValueType(*DL, Inst->getType()); EVT LoadVT = TLI->getValueType(*DL, LI->getType()); // If the load has other users and the truncate is not free, this probably // isn't worthwhile. - if (!LI->hasOneUse() && - (TLI->isTypeLegal(LoadVT) || !TLI->isTypeLegal(VT)) && - !TLI->isTruncateFree(I->getType(), LI->getType())) { - I = OldExt; - TPT.rollback(LastKnownGood); + if (!LI->hasOneUse() && (TLI->isTypeLegal(LoadVT) || !TLI->isTypeLegal(VT)) && + !TLI->isTruncateFree(Inst->getType(), LI->getType())) return false; - } // Check whether the target supports casts folded into loads. unsigned LType; - if (isa(I)) + if (isa(Inst)) LType = ISD::ZEXTLOAD; else { - assert(isa(I) && "Unexpected ext type!"); + assert(isa(Inst) && "Unexpected ext type!"); LType = ISD::SEXTLOAD; } - if (!TLI->isLoadExtLegal(LType, VT, LoadVT)) { + + return TLI->isLoadExtLegal(LType, VT, LoadVT); +} + +/// Move a zext or sext fed by a load into the same basic block as the load, +/// unless conditions are unfavorable. This allows SelectionDAG to fold the +/// extend into the load. +/// \p I[in/out] the extension may be modified during the process if some +/// promotions apply. +/// +bool CodeGenPrepare::moveExtToFormExtLoad(Instruction *&I) { + // ExtLoad formation infrastructure requires TLI to be effective. + if (!TLI) + return false; + + // Try to promote a chain of computation if it allows to form + // an extended load. + TypePromotionTransaction TPT; + TypePromotionTransaction::ConstRestorationPt LastKnownGood = + TPT.getRestorationPoint(); + SmallVector Exts; + SmallVector LastMovedExts; + Exts.push_back(I); + + bool HasPromoted = tryToPromoteExts(TPT, Exts, LastMovedExts); + + // Look for a load being extended. + LoadInst *LI = nullptr; + Instruction *OldExt = I; + if (!canFormExtLd(LastMovedExts, LI, I, HasPromoted)) { I = OldExt; TPT.rollback(LastKnownGood); return false; @@ -4389,7 +4417,7 @@ I->insertAfter(LI); // CGP does not check if the zext would be speculatively executed when moved // to the same basic block as the load. Preserving its original location would - // pessimize the debugging experience, as well as negatively impact the + // pessimize the debugging experience, as well as negatively impact the // quality of sample pgo. We don't want to use "line 0" as that has a // size cost in the line-table section and logically the zext can be seen as // part of the load. Therefore we conservatively reuse the same debug location Index: test/CodeGen/AArch64/arm64-codegen-prepare-extload.ll =================================================================== --- test/CodeGen/AArch64/arm64-codegen-prepare-extload.ll +++ test/CodeGen/AArch64/arm64-codegen-prepare-extload.ll @@ -258,8 +258,7 @@ ; => We have one zext of %zextld left and we created one sext of %ld2. ; 2. We try to promote the operand of %sextaddza. ; a. This creates one sext of %zexta and one of %zextld -; b. The sext of %zexta does not lead to any load, it stays here, even if it -; could have been combine with the zext of %a. +; b. The sext of %zexta can be combined with the zext of %a. ; c. The sext of %zextld leads to %ld and can be combined with it. This is ; done by promoting %zextld. This is fine with the current heuristic: ; neutral. @@ -281,16 +280,14 @@ ; OPTALL: [[LD:%[a-zA-Z_0-9-]+]] = load i8, i8* %addr1 ; OPT-NEXT: [[ZEXTLD1_1:%[a-zA-Z_0-9-]+]] = zext i8 [[LD]] to i64 ; OPT-NEXT: [[ZEXTLD1_2:%[a-zA-Z_0-9-]+]] = zext i8 [[LD]] to i64 -; OPT-NEXT: [[ZEXTLD1_3:%[a-zA-Z_0-9-]+]] = zext i8 [[LD]] to i64 ; OPT-NEXT: [[LD2:%[a-zA-Z_0-9-]+]] = load i32, i32* %addr2 ; OPT-NEXT: [[SEXTLD2:%[a-zA-Z_0-9-]+]] = sext i32 [[LD2]] to i64 -; OPT-NEXT: [[RES:%[a-zA-Z_0-9-]+]] = add nsw i64 [[SEXTLD2]], [[ZEXTLD1_1]] -; We do not combine this one: see 2.b. -; OPT-NEXT: [[ZEXTA:%[a-zA-Z_0-9-]+]] = zext i8 %a to i32 -; OPT-NEXT: [[SEXTZEXTA:%[a-zA-Z_0-9-]+]] = sext i32 [[ZEXTA]] to i64 -; OPT-NEXT: [[RESZA:%[a-zA-Z_0-9-]+]] = add nsw i64 [[SEXTZEXTA]], [[ZEXTLD1_3]] +; OPT-NEXT: [[ZEXTLD1_3:%[a-zA-Z_0-9-]+]] = zext i8 [[LD]] to i64 +; OPT-NEXT: [[RES:%[a-zA-Z_0-9-]+]] = add nsw i64 [[SEXTLD2]], [[ZEXTLD1_3]] +; OPT-NEXT: [[ZEXTLD1_4:%[a-zA-Z_0-9-]+]] = zext i8 %a to i64 +; OPT-NEXT: [[RESZA:%[a-zA-Z_0-9-]+]] = add nsw i64 [[ZEXTLD1_4]], [[ZEXTLD1_2]] ; OPT-NEXT: [[SEXTB:%[a-zA-Z_0-9-]+]] = sext i32 %b to i64 -; OPT-NEXT: [[RESB:%[a-zA-Z_0-9-]+]] = add nsw i64 [[SEXTB]], [[ZEXTLD1_2]] +; OPT-NEXT: [[RESB:%[a-zA-Z_0-9-]+]] = add nsw i64 [[SEXTB]], [[ZEXTLD1_1]] ; ; DISABLE: [[ADD:%[a-zA-Z_0-9-]+]] = add nsw i32 ; DISABLE: [[RES:%[a-zA-Z_0-9-]+]] = sext i32 [[ADD]] to i64 Index: test/CodeGen/X86/codegen-prepare-extload.ll =================================================================== --- test/CodeGen/X86/codegen-prepare-extload.ll +++ test/CodeGen/X86/codegen-prepare-extload.ll @@ -264,8 +264,7 @@ ; => We have one zext of %zextld left and we created one sext of %ld2. ; 2. We try to promote the operand of %sextaddza. ; a. This creates one sext of %zexta and one of %zextld -; b. The sext of %zexta does not lead to any load, it stays here, even if it -; could have been combine with the zext of %a. +; b. The sext of %zexta can be combined with the zext of %a. ; c. The sext of %zextld leads to %ld and can be combined with it. This is ; done by promoting %zextld. This is fine with the current heuristic: ; neutral. @@ -287,16 +286,14 @@ ; OPTALL: [[LD:%[a-zA-Z_0-9-]+]] = load i8, i8* %addr1 ; OPT-NEXT: [[ZEXTLD1_1:%[a-zA-Z_0-9-]+]] = zext i8 [[LD]] to i64 ; OPT-NEXT: [[ZEXTLD1_2:%[a-zA-Z_0-9-]+]] = zext i8 [[LD]] to i64 -; OPT-NEXT: [[ZEXTLD1_3:%[a-zA-Z_0-9-]+]] = zext i8 [[LD]] to i64 ; OPT-NEXT: [[LD2:%[a-zA-Z_0-9-]+]] = load i32, i32* %addr2 ; OPT-NEXT: [[SEXTLD2:%[a-zA-Z_0-9-]+]] = sext i32 [[LD2]] to i64 -; OPT-NEXT: [[RES:%[a-zA-Z_0-9-]+]] = add nsw i64 [[SEXTLD2]], [[ZEXTLD1_1]] -; We do not combine this one: see 2.b. -; OPT-NEXT: [[ZEXTA:%[a-zA-Z_0-9-]+]] = zext i8 %a to i32 -; OPT-NEXT: [[SEXTZEXTA:%[a-zA-Z_0-9-]+]] = sext i32 [[ZEXTA]] to i64 -; OPT-NEXT: [[RESZA:%[a-zA-Z_0-9-]+]] = add nsw i64 [[SEXTZEXTA]], [[ZEXTLD1_3]] +; OPT-NEXT: [[ZEXTLD1_3:%[a-zA-Z_0-9-]+]] = zext i8 [[LD]] to i64 +; OPT-NEXT: [[RES:%[a-zA-Z_0-9-]+]] = add nsw i64 [[SEXTLD2]], [[ZEXTLD1_3]] +; OPT-NEXT: [[ZEXTLD1_4:%[a-zA-Z_0-9-]+]] = zext i8 %a to i64 +; OPT-NEXT: [[RESZA:%[a-zA-Z_0-9-]+]] = add nsw i64 [[ZEXTLD1_4]], [[ZEXTLD1_2]] ; OPT-NEXT: [[SEXTB:%[a-zA-Z_0-9-]+]] = sext i32 %b to i64 -; OPT-NEXT: [[RESB:%[a-zA-Z_0-9-]+]] = add nsw i64 [[SEXTB]], [[ZEXTLD1_2]] +; OPT-NEXT: [[RESB:%[a-zA-Z_0-9-]+]] = add nsw i64 [[SEXTB]], [[ZEXTLD1_1]] ; ; DISABLE: [[ADD:%[a-zA-Z_0-9-]+]] = add nsw i32 ; DISABLE: [[RES:%[a-zA-Z_0-9-]+]] = sext i32 [[ADD]] to i64