diff --git a/llvm/lib/CodeGen/TypePromotion.cpp b/llvm/lib/CodeGen/TypePromotion.cpp --- a/llvm/lib/CodeGen/TypePromotion.cpp +++ b/llvm/lib/CodeGen/TypePromotion.cpp @@ -168,7 +168,7 @@ // Is V an instruction thats result can trivially promoted, or has safe // wrapping. bool isLegalToPromote(Value *V); - bool TryToPromote(Value *V, unsigned PromotedWidth); + bool TryToPromote(Value *V, unsigned PromotedWidth, const LoopInfo &LI); public: static char ID; @@ -762,7 +762,8 @@ return false; } -bool TypePromotion::TryToPromote(Value *V, unsigned PromotedWidth) { +bool TypePromotion::TryToPromote(Value *V, unsigned PromotedWidth, + const LoopInfo &LI) { Type *OrigTy = V->getType(); TypeSize = OrigTy->getPrimitiveSizeInBits().getFixedSize(); SafeToPromote.clear(); @@ -856,6 +857,7 @@ unsigned ToPromote = 0; unsigned NonFreeArgs = 0; + unsigned NonLoopSources = 0, LoopSinks = 0; SmallPtrSet Blocks; for (auto *CV : CurrentVisited) { if (auto *I = dyn_cast(CV)) @@ -865,9 +867,16 @@ if (auto *Arg = dyn_cast(CV)) if (!Arg->hasZExtAttr() && !Arg->hasSExtAttr()) ++NonFreeArgs; + if (!isa(CV) || + !LI.getLoopFor(cast(CV)->getParent())) + ++NonLoopSources; continue; } + if (isa(CV)) + continue; + if (LI.getLoopFor(cast(CV)->getParent())) + ++LoopSinks; if (Sinks.count(cast(CV))) continue; ++ToPromote; @@ -875,8 +884,8 @@ // DAG optimizations should be able to handle these cases better, especially // for function arguments. - if (!isa(V) && (ToPromote < 2 || (Blocks.size() == 1 && - (NonFreeArgs > SafeWrap.size())))) + if (!isa(V) && !(LoopSinks && NonLoopSources) && + (ToPromote < 2 || (Blocks.size() == 1 && NonFreeArgs > SafeWrap.size()))) return false; IRPromoter Promoter(*Ctx, PromotedWidth, CurrentVisited, Sources, Sinks, @@ -958,7 +967,7 @@ << "register for ZExt type\n"); continue; } - MadeChange |= TryToPromote(Phi, PromoteWidth); + MadeChange |= TryToPromote(Phi, PromoteWidth, LI); } else if (auto *ICmp = dyn_cast(&I)) { // Search up from icmps to try to promote their operands. // Skip signed or pointer compares @@ -970,7 +979,7 @@ for (auto &Op : ICmp->operands()) { if (auto *OpI = dyn_cast(Op)) { if (auto PromotedWidth = GetPromoteWidth(OpI)) { - MadeChange |= TryToPromote(OpI, PromotedWidth); + MadeChange |= TryToPromote(OpI, PromotedWidth, LI); break; } } diff --git a/llvm/test/CodeGen/AArch64/typepromotion-cost.ll b/llvm/test/CodeGen/AArch64/typepromotion-cost.ll --- a/llvm/test/CodeGen/AArch64/typepromotion-cost.ll +++ b/llvm/test/CodeGen/AArch64/typepromotion-cost.ll @@ -16,7 +16,6 @@ ; CHECK-O2-NEXT: .LBB0_3: ; CHECK-O2-NEXT: mov w9, #1 ; CHECK-O2-NEXT: .LBB0_4: // %lor.end.sink.split -; CHECK-O2-NEXT: and w8, w8, #0xffff ; CHECK-O2-NEXT: cmp w8, w9 ; CHECK-O2-NEXT: cset w0, eq ; CHECK-O2-NEXT: ret @@ -32,13 +31,11 @@ ; CHECK-O3-NEXT: cbz x1, .LBB0_4 ; CHECK-O3-NEXT: // %bb.2: ; CHECK-O3-NEXT: mov w9, #2 -; CHECK-O3-NEXT: and w8, w8, #0xffff ; CHECK-O3-NEXT: cmp w8, w9 ; CHECK-O3-NEXT: cset w0, eq ; CHECK-O3-NEXT: ret ; CHECK-O3-NEXT: .LBB0_3: ; CHECK-O3-NEXT: mov w9, #1 -; CHECK-O3-NEXT: and w8, w8, #0xffff ; CHECK-O3-NEXT: cmp w8, w9 ; CHECK-O3-NEXT: cset w0, eq ; CHECK-O3-NEXT: ret @@ -69,10 +66,11 @@ define i8 @loopcmp(ptr nocapture noundef readonly %x, i8 noundef %y) { ; CHECK-O2-LABEL: loopcmp: ; CHECK-O2: // %bb.0: // %entry +; CHECK-O2-NEXT: and w9, w1, #0xff ; CHECK-O2-NEXT: .LBB1_1: // %while.cond ; CHECK-O2-NEXT: // =>This Inner Loop Header: Depth=1 ; CHECK-O2-NEXT: ldrb w8, [x0], #1 -; CHECK-O2-NEXT: cmp w8, w1, uxtb +; CHECK-O2-NEXT: cmp w8, w9 ; CHECK-O2-NEXT: b.lo .LBB1_1 ; CHECK-O2-NEXT: // %bb.2: // %while.end ; CHECK-O2-NEXT: mov w0, w8