Index: llvm/include/llvm/Analysis/ScalarEvolution.h =================================================================== --- llvm/include/llvm/Analysis/ScalarEvolution.h +++ llvm/include/llvm/Analysis/ScalarEvolution.h @@ -1304,6 +1304,12 @@ /// to be infinite, it must also be undefined. bool loopIsFiniteByAssumption(const Loop *L); + /// Return the set of Values that, if poison, will definitively result in S + /// being poison as well. The returned set may be incomplete, i.e. there can + /// be additional Values that also result in S being poison. + void getPoisonGeneratingValues(SmallPtrSetImpl &Result, + const SCEV *S); + class FoldID { const SCEV *Op = nullptr; const Type *Ty = nullptr; Index: llvm/include/llvm/Transforms/Utils/ScalarEvolutionExpander.h =================================================================== --- llvm/include/llvm/Transforms/Utils/ScalarEvolutionExpander.h +++ llvm/include/llvm/Transforms/Utils/ScalarEvolutionExpander.h @@ -438,7 +438,11 @@ Value *expandAddToGEP(const SCEV *Op, Type *Ty, Value *V); /// Find a previous Value in ExprValueMap for expand. - Value *FindValueInExprValueMap(const SCEV *S, const Instruction *InsertPt); + /// DropPoisonGeneratingInsts is populated with instructions for which + /// poison-generating flags must be dropped if the value is reused. + Value *FindValueInExprValueMap( + const SCEV *S, const Instruction *InsertPt, + SmallVectorImpl &DropPoisonGeneratingInsts); Value *expand(const SCEV *S); Index: llvm/lib/Analysis/ScalarEvolution.cpp =================================================================== --- llvm/lib/Analysis/ScalarEvolution.cpp +++ llvm/lib/Analysis/ScalarEvolution.cpp @@ -4165,6 +4165,14 @@ }); } +void ScalarEvolution::getPoisonGeneratingValues( + SmallPtrSetImpl &Result, const SCEV *S) { + SCEVPoisonCollector PC(/* LookThroughMaybePoisonBlocking */ false); + visitAll(S, PC); + for (const SCEVUnknown *SU : PC.MaybePoison) + Result.insert(SU->getValue()); +} + const SCEV * ScalarEvolution::getSequentialMinMaxExpr(SCEVTypes Kind, SmallVectorImpl &Ops) { Index: llvm/lib/Transforms/Utils/ScalarEvolutionExpander.cpp =================================================================== --- llvm/lib/Transforms/Utils/ScalarEvolutionExpander.cpp +++ llvm/lib/Transforms/Utils/ScalarEvolutionExpander.cpp @@ -1458,8 +1458,64 @@ return V; } -Value *SCEVExpander::FindValueInExprValueMap(const SCEV *S, - const Instruction *InsertPt) { +static bool +canReuseInstruction(ScalarEvolution &SE, const SCEV *S, Instruction *I, + SmallVectorImpl &DropPoisonGeneratingInsts) { + // If the instruction cannot be poison, it's always safe to reuse. + if (programUndefinedIfPoison(I)) + return true; + + // Otherwise, it is possible that I is more poisonous that S. Collect the + // poison-contributors of S, and then check whether I has any additional + // poison-contributors. Poison that is contributed through poison-generating + // flags is handled by dropping those flags instead. + SmallPtrSet PoisonVals; + SE.getPoisonGeneratingValues(PoisonVals, S); + + SmallVector Worklist; + SmallPtrSet Visited; + Worklist.push_back(I); + while (!Worklist.empty()) { + Value *V = Worklist.pop_back_val(); + if (!Visited.insert(V).second) + continue; + + // Avoid walking large instruction graphs. + if (Visited.size() > 16) + return false; + + // Either the value can't be poison, or the S would also be poison if it + // is. + if (PoisonVals.contains(V) || isGuaranteedNotToBePoison(V)) + continue; + + auto *I = dyn_cast(V); + if (!I) + return false; + + // FIXME: Ignore vscale, even though it technically could be poison. Do this + // because SCEV currently assumes it can't be poison. Remove this special + // case once we proper model when vscale can be poison. + if (auto *II = dyn_cast(I); + II && II->getIntrinsicID() == Intrinsic::vscale) + continue; + + if (canCreatePoison(cast(I), /*ConsiderFlagsAndMetadata*/ false)) + return false; + + // If the instruction can't create poison, we can recurse to its operands. + if (I->hasPoisonGeneratingFlagsOrMetadata()) + DropPoisonGeneratingInsts.push_back(I); + + for (Value *Op : I->operands()) + Worklist.push_back(Op); + } + return true; +} + +Value *SCEVExpander::FindValueInExprValueMap( + const SCEV *S, const Instruction *InsertPt, + SmallVectorImpl &DropPoisonGeneratingInsts) { // If the expansion is not in CanonicalMode, and the SCEV contains any // sub scAddRecExpr type SCEV, it is required to expand the SCEV literally. if (!CanonicalMode && SE.containsAddRecurrence(S)) @@ -1483,7 +1539,10 @@ SE.LI.getLoopFor(EntInst->getParent())->contains(InsertPt))) continue; - return V; + // Make sure reusing the instruction is poison-safe. + if (canReuseInstruction(SE, S, EntInst, DropPoisonGeneratingInsts)) + return V; + DropPoisonGeneratingInsts.clear(); } return nullptr; } @@ -1554,18 +1613,14 @@ Builder.SetInsertPoint(InsertPt); // Expand the expression into instructions. - Value *V = FindValueInExprValueMap(S, InsertPt); + SmallVector DropPoisonGeneratingInsts; + Value *V = FindValueInExprValueMap(S, InsertPt, DropPoisonGeneratingInsts); if (!V) { V = visit(S); V = fixupLCSSAFormFor(V); } else { - // If we're reusing an existing instruction, we are effectively CSEing two - // copies of the instruction (with potentially different flags). As such, - // we need to drop any poison generating flags unless we can prove that - // said flags must be valid for all new users. - if (auto *I = dyn_cast(V)) - if (I->hasPoisonGeneratingFlags() && !programUndefinedIfPoison(I)) - I->dropPoisonGeneratingFlags(); + for (Instruction *I : DropPoisonGeneratingInsts) + I->dropPoisonGeneratingFlagsAndMetadata(); } // Remember the expanded value for this SCEV at this location. // @@ -1773,7 +1828,8 @@ // ExprValueMap. Note that we don't currently model the cost of // needing to drop poison generating flags on the instruction if we // want to reuse it. We effectively assume that has zero cost. - return FindValueInExprValueMap(S, At) != nullptr; + SmallVector DropPoisonGeneratingInsts; + return FindValueInExprValueMap(S, At, DropPoisonGeneratingInsts) != nullptr; } template static InstructionCost costAndCollectOperands( Index: llvm/test/CodeGen/Thumb2/mve-blockplacement.ll =================================================================== --- llvm/test/CodeGen/Thumb2/mve-blockplacement.ll +++ llvm/test/CodeGen/Thumb2/mve-blockplacement.ll @@ -366,16 +366,18 @@ ; CHECK-NEXT: str r0, [sp, #12] @ 4-byte Spill ; CHECK-NEXT: movs r1, #4 ; CHECK-NEXT: strd r2, r12, [sp, #4] @ 8-byte Folded Spill -; CHECK-NEXT: add.w r1, r1, r4, lsr #1 ; CHECK-NEXT: add.w r3, r3, r4, lsr #1 -; CHECK-NEXT: bic r7, r1, #3 +; CHECK-NEXT: add.w r1, r1, r4, lsr #1 +; CHECK-NEXT: movw r4, #65532 +; CHECK-NEXT: vdup.32 q6, r3 +; CHECK-NEXT: movt r4, #32767 +; CHECK-NEXT: and.w r7, r1, r4 ; CHECK-NEXT: adr r1, .LCPI1_0 +; CHECK-NEXT: vdup.32 q7, r3 ; CHECK-NEXT: vldrw.u32 q0, [r1] ; CHECK-NEXT: adr r1, .LCPI1_1 ; CHECK-NEXT: vldrw.u32 q5, [r1] -; CHECK-NEXT: vdup.32 q6, r3 ; CHECK-NEXT: vadd.i32 q4, q0, lr -; CHECK-NEXT: vdup.32 q7, r3 ; CHECK-NEXT: b .LBB1_4 ; CHECK-NEXT: .LBB1_2: @ %for.body6.preheader ; CHECK-NEXT: @ in Loop: Header=BB1_4 Depth=1 Index: llvm/test/CodeGen/X86/AMX/amx-greedy-ra-spill-shape.ll =================================================================== --- llvm/test/CodeGen/X86/AMX/amx-greedy-ra-spill-shape.ll +++ llvm/test/CodeGen/X86/AMX/amx-greedy-ra-spill-shape.ll @@ -47,7 +47,7 @@ ; CHECK-NEXT: MOV16mr %stack.0, 1, $noreg, 20, $noreg, [[LEA64_32r]].sub_16bit :: (store (s512) into %stack.0 + 20, align 4) ; CHECK-NEXT: PLDTILECFGV %stack.0, 1, $noreg, 0, $noreg, implicit-def dead $tmm0, implicit-def dead $tmm1, implicit-def dead $tmm2, implicit-def dead $tmm3, implicit-def dead $tmm4, implicit-def dead $tmm5, implicit-def dead $tmm6, implicit-def dead $tmm7 :: (load (s512) from %stack.0, align 4) ; CHECK-NEXT: [[MOVSX64rr32_:%[0-9]+]]:gr64_nosp = MOVSX64rr32 [[COPY83]].sub_32bit - ; CHECK-NEXT: [[COPY83]].sub_32bit:gr64_with_sub_8bit = nsw SUB32rr [[COPY83]].sub_32bit, [[SUB32rr]], implicit-def dead $eflags + ; CHECK-NEXT: [[COPY83]].sub_32bit:gr64_with_sub_8bit = SUB32rr [[COPY83]].sub_32bit, [[SUB32rr]], implicit-def dead $eflags ; CHECK-NEXT: undef %14.sub_32bit:gr64_with_sub_8bit = MOVZX32rr16 [[COPY83]].sub_16bit ; CHECK-NEXT: ADD64mr %stack.1, 1, $noreg, 0, $noreg, %14, implicit-def dead $eflags :: (store (s64) into %stack.1) ; CHECK-NEXT: undef %61.sub_32bit:gr64_with_sub_8bit = COPY %14.sub_32bit Index: llvm/test/Transforms/IndVarSimplify/pr63763.ll =================================================================== --- llvm/test/Transforms/IndVarSimplify/pr63763.ll +++ llvm/test/Transforms/IndVarSimplify/pr63763.ll @@ -1,7 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 2 ; RUN: opt -S -passes='print,indvars' < %s 2>/dev/null | FileCheck %s -; FIXME: This is a miscompile. ; We should use %invariant.op.us rather than %invariant.op for the exit ; value expansion. They have the same SCEV, but %invariant.op is more ; poisonous. @@ -15,13 +14,16 @@ ; CHECK-NEXT: [[ADD:%.*]] = add nsw i32 [[SHL_I]], [[SEL]] ; CHECK-NEXT: [[SEXT:%.*]] = shl i32 [[ADD]], 24 ; CHECK-NEXT: [[CONV2:%.*]] = ashr exact i32 [[SEXT]], 24 -; CHECK-NEXT: [[INVARIANT_OP:%.*]] = sub i32 7, [[CONV2]] +; CHECK-NEXT: [[INVARIANT_OP:%.*]] = sub nsw i32 7, [[CONV2]] ; CHECK-NEXT: call void @use(i32 [[INVARIANT_OP]]) +; CHECK-NEXT: [[SEXT_US:%.*]] = shl i32 [[SEL]], 24 +; CHECK-NEXT: [[CONV2_US:%.*]] = ashr exact i32 [[SEXT_US]], 24 ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: ; CHECK-NEXT: br i1 true, label [[EXIT:%.*]], label [[LOOP]] ; CHECK: exit: -; CHECK-NEXT: ret i32 [[INVARIANT_OP]] +; CHECK-NEXT: [[INVARIANT_OP_US:%.*]] = sub nsw i32 7, [[CONV2_US]] +; CHECK-NEXT: ret i32 [[INVARIANT_OP_US]] ; entry: %sel = select i1 %c, i32 33, i32 0 Index: llvm/test/Transforms/LoopUnroll/runtime-exit-phi-scev-invalidation.ll =================================================================== --- llvm/test/Transforms/LoopUnroll/runtime-exit-phi-scev-invalidation.ll +++ llvm/test/Transforms/LoopUnroll/runtime-exit-phi-scev-invalidation.ll @@ -96,7 +96,7 @@ ; CHECK: inner.2.preheader: ; CHECK-NEXT: br label [[INNER_2]] ; CHECK: inner.2: -; CHECK-NEXT: [[OUTER_IV_NEXT]] = add nuw nsw i64 [[OUTER_IV]], 1 +; CHECK-NEXT: [[OUTER_IV_NEXT]] = add i64 [[OUTER_IV]], 1 ; CHECK-NEXT: br label [[OUTER_HEADER]] ; CHECK: exit: ; CHECK-NEXT: ret void