Index: llvm/lib/Transforms/Utils/ScalarEvolutionExpander.cpp =================================================================== --- llvm/lib/Transforms/Utils/ScalarEvolutionExpander.cpp +++ llvm/lib/Transforms/Utils/ScalarEvolutionExpander.cpp @@ -1833,20 +1833,40 @@ return V; } -/// Check whether value has nuw/nsw/exact set but SCEV does not. -/// TODO: In reality it is better to check the poison recursively -/// but this is better than nothing. -static bool SCEVLostPoisonFlags(const SCEV *S, const Instruction *I) { +/// Return true if we can reuse instruction 'I' for all users of 'S' without +/// introducing uses of 'I' which propagate poison in more cases. +static bool canReuseInstructionForAllUsers(const SCEV *S, + const Instruction *I) { if (isa(I)) { + if (!I->hasNoSignedWrap() && !I->hasNoUnsignedWrap()) + // 'I' never introduces new poison + return true; if (auto *NS = dyn_cast(S)) { + // We have a SCEV with flags we can check, are the instructions + // flags strictly weaker than the SCEVs? if (I->hasNoSignedWrap() && !NS->hasNoSignedWrap()) - return true; + return false; if (I->hasNoUnsignedWrap() && !NS->hasNoUnsignedWrap()) - return true; + return false; + return true; } + // In general, we may have used flags to optimize a binop into an + // alternate form. Thus, we must conclude that S conceptually has + // some unknown set of flags and thus that I must contradict them. + return false; } else if (isa(I) && I->isExact()) + return false; + else if (const auto *GEP = dyn_cast(I)) { + if (!GEP->isInBounds()) + return false; + // TODO: SCEV models as an add in most cases, we could duplicate + // logic from above for this. return true; - return false; + } + + // We have enumerated the instructions which have poison generating flags + // while also being SCEVable. Thus, if we get here, we can reuse. + return true; } ScalarEvolution::ValueOffsetPair @@ -1873,7 +1893,7 @@ SE.DT.dominates(EntInst, InsertPt) && (SE.LI.getLoopFor(EntInst->getParent()) == nullptr || SE.LI.getLoopFor(EntInst->getParent())->contains(InsertPt)) && - !SCEVLostPoisonFlags(S, EntInst)) + canReuseInstructionForAllUsers(S, EntInst)) return {V, Offset}; } } Index: llvm/test/CodeGen/PowerPC/common-chain.ll =================================================================== --- llvm/test/CodeGen/PowerPC/common-chain.ll +++ llvm/test/CodeGen/PowerPC/common-chain.ll @@ -786,158 +786,154 @@ ; CHECK-NEXT: cmpldi r8, 3 ; CHECK-NEXT: blt cr0, .LBB7_4 ; CHECK-NEXT: # %bb.2: # %for.body.preheader.new +; CHECK-NEXT: ld r17, -176(r1) # 8-byte Folded Reload +; CHECK-NEXT: ld r18, -168(r1) # 8-byte Folded Reload +; CHECK-NEXT: ld r19, -160(r1) # 8-byte Folded Reload +; CHECK-NEXT: sldi r11, r12, 1 +; CHECK-NEXT: add r0, r12, r11 ; CHECK-NEXT: rldicl r7, r7, 62, 2 ; CHECK-NEXT: sldi r10, r12, 2 -; CHECK-NEXT: ld r2, -168(r1) # 8-byte Folded Reload -; CHECK-NEXT: ld r31, -160(r1) # 8-byte Folded Reload -; CHECK-NEXT: std r7, -184(r1) # 8-byte Folded Spill -; CHECK-NEXT: mr r7, r4 -; CHECK-NEXT: ld r4, -176(r1) # 8-byte Folded Reload -; CHECK-NEXT: add r8, r4, r10 -; CHECK-NEXT: sldi r8, r8, 3 -; CHECK-NEXT: add r9, r5, r8 -; CHECK-NEXT: add r8, r2, r10 -; CHECK-NEXT: add r10, r31, r10 -; CHECK-NEXT: sldi r10, r10, 3 +; CHECK-NEXT: rldicl r7, r7, 2, 1 +; CHECK-NEXT: add r29, r17, r0 +; CHECK-NEXT: add r28, r18, r0 +; CHECK-NEXT: add r0, r19, r0 +; CHECK-NEXT: addi r7, r7, -4 +; CHECK-NEXT: add r8, r17, r10 +; CHECK-NEXT: sldi r31, r0, 3 +; CHECK-NEXT: add r0, r17, r11 ; CHECK-NEXT: sldi r8, r8, 3 -; CHECK-NEXT: add r30, r5, r10 -; CHECK-NEXT: add r29, r7, r10 -; CHECK-NEXT: add r28, r3, r10 -; CHECK-NEXT: sldi r10, r12, 1 -; CHECK-NEXT: add r8, r5, r8 -; CHECK-NEXT: add r11, r12, r10 -; CHECK-NEXT: add r0, r4, r11 -; CHECK-NEXT: sldi r0, r0, 3 -; CHECK-NEXT: add r27, r5, r0 -; CHECK-NEXT: add r0, r2, r11 -; CHECK-NEXT: add r11, r31, r11 -; CHECK-NEXT: sldi r11, r11, 3 +; CHECK-NEXT: rldicl r7, r7, 62, 2 +; CHECK-NEXT: sldi r29, r29, 3 +; CHECK-NEXT: sldi r28, r28, 3 ; CHECK-NEXT: sldi r0, r0, 3 -; CHECK-NEXT: add r25, r5, r11 -; CHECK-NEXT: add r24, r7, r11 -; CHECK-NEXT: add r23, r3, r11 -; CHECK-NEXT: add r11, r4, r10 +; CHECK-NEXT: addi r7, r7, 1 +; CHECK-NEXT: add r9, r5, r8 +; CHECK-NEXT: add r8, r18, r10 +; CHECK-NEXT: add r10, r19, r10 +; CHECK-NEXT: add r29, r5, r29 +; CHECK-NEXT: add r28, r5, r28 +; CHECK-NEXT: add r27, r5, r31 ; CHECK-NEXT: add r26, r5, r0 -; CHECK-NEXT: sldi r11, r11, 3 -; CHECK-NEXT: add r22, r5, r11 -; CHECK-NEXT: add r11, r2, r10 -; CHECK-NEXT: add r10, r31, r10 +; CHECK-NEXT: add r0, r18, r11 +; CHECK-NEXT: add r11, r19, r11 +; CHECK-NEXT: sldi r8, r8, 3 ; CHECK-NEXT: sldi r10, r10, 3 +; CHECK-NEXT: mtctr r7 +; CHECK-NEXT: sldi r7, r12, 5 +; CHECK-NEXT: sldi r0, r0, 3 ; CHECK-NEXT: sldi r11, r11, 3 -; CHECK-NEXT: add r20, r5, r10 -; CHECK-NEXT: add r19, r7, r10 -; CHECK-NEXT: add r18, r3, r10 -; CHECK-NEXT: add r10, r12, r4 -; CHECK-NEXT: add r21, r5, r11 -; CHECK-NEXT: sldi r11, r2, 3 -; CHECK-NEXT: sldi r10, r10, 3 -; CHECK-NEXT: add r17, r5, r10 -; CHECK-NEXT: add r10, r12, r2 -; CHECK-NEXT: sldi r10, r10, 3 -; CHECK-NEXT: add r16, r5, r10 -; CHECK-NEXT: add r10, r12, r31 -; CHECK-NEXT: sldi r31, r31, 3 -; CHECK-NEXT: sub r0, r11, r31 -; CHECK-NEXT: sldi r11, r4, 3 -; CHECK-NEXT: mr r4, r7 -; CHECK-NEXT: ld r7, -184(r1) # 8-byte Folded Reload -; CHECK-NEXT: sldi r10, r10, 3 -; CHECK-NEXT: add r15, r5, r10 -; CHECK-NEXT: add r14, r3, r10 -; CHECK-NEXT: sub r31, r11, r31 +; CHECK-NEXT: add r8, r5, r8 +; CHECK-NEXT: add r30, r5, r10 +; CHECK-NEXT: add r16, r3, r10 ; CHECK-NEXT: add r2, r4, r10 -; CHECK-NEXT: li r11, 0 ; CHECK-NEXT: mr r10, r12 -; CHECK-NEXT: rldicl r7, r7, 2, 1 -; CHECK-NEXT: addi r7, r7, -4 -; CHECK-NEXT: rldicl r7, r7, 62, 2 -; CHECK-NEXT: addi r7, r7, 1 -; CHECK-NEXT: mtctr r7 -; CHECK-NEXT: sldi r7, r12, 5 +; CHECK-NEXT: add r25, r5, r0 +; CHECK-NEXT: add r0, r12, r17 +; CHECK-NEXT: add r24, r5, r11 +; CHECK-NEXT: add r14, r4, r11 +; CHECK-NEXT: sldi r0, r0, 3 +; CHECK-NEXT: add r23, r5, r0 +; CHECK-NEXT: add r0, r12, r18 +; CHECK-NEXT: sldi r0, r0, 3 +; CHECK-NEXT: add r22, r5, r0 +; CHECK-NEXT: add r0, r12, r19 +; CHECK-NEXT: sldi r19, r19, 3 +; CHECK-NEXT: sldi r15, r0, 3 +; CHECK-NEXT: sldi r0, r18, 3 +; CHECK-NEXT: sldi r18, r17, 3 +; CHECK-NEXT: add r17, r3, r31 +; CHECK-NEXT: add r31, r4, r31 +; CHECK-NEXT: add r21, r5, r15 +; CHECK-NEXT: add r20, r3, r15 +; CHECK-NEXT: sub r0, r0, r19 +; CHECK-NEXT: sub r19, r18, r19 +; CHECK-NEXT: add r18, r3, r11 +; CHECK-NEXT: add r15, r4, r15 +; CHECK-NEXT: li r11, 0 ; CHECK-NEXT: .p2align 4 ; CHECK-NEXT: .LBB7_3: # %for.body ; CHECK-NEXT: # -; CHECK-NEXT: lfd f0, 0(r14) -; CHECK-NEXT: lfd f1, 0(r2) +; CHECK-NEXT: lfd f0, 0(r20) +; CHECK-NEXT: lfd f1, 0(r15) ; CHECK-NEXT: add r10, r10, r12 ; CHECK-NEXT: add r10, r10, r12 ; CHECK-NEXT: xsmuldp f0, f0, f1 -; CHECK-NEXT: lfd f1, 0(r15) +; CHECK-NEXT: lfd f1, 0(r21) ; CHECK-NEXT: add r10, r10, r12 ; CHECK-NEXT: add r10, r10, r12 ; CHECK-NEXT: xsadddp f0, f1, f0 -; CHECK-NEXT: stfd f0, 0(r15) -; CHECK-NEXT: add r15, r15, r7 -; CHECK-NEXT: lfdx f0, r14, r0 -; CHECK-NEXT: lfdx f1, r2, r0 +; CHECK-NEXT: stfd f0, 0(r21) +; CHECK-NEXT: add r21, r21, r7 +; CHECK-NEXT: lfdx f0, r20, r0 +; CHECK-NEXT: lfdx f1, r15, r0 ; CHECK-NEXT: xsmuldp f0, f0, f1 -; CHECK-NEXT: lfdx f1, r16, r11 +; CHECK-NEXT: lfdx f1, r22, r11 ; CHECK-NEXT: xsadddp f0, f1, f0 -; CHECK-NEXT: stfdx f0, r16, r11 -; CHECK-NEXT: lfdx f0, r14, r31 -; CHECK-NEXT: lfdx f1, r2, r31 -; CHECK-NEXT: add r14, r14, r7 -; CHECK-NEXT: add r2, r2, r7 +; CHECK-NEXT: stfdx f0, r22, r11 +; CHECK-NEXT: lfdx f0, r20, r19 +; CHECK-NEXT: lfdx f1, r15, r19 +; CHECK-NEXT: add r20, r20, r7 +; CHECK-NEXT: add r15, r15, r7 ; CHECK-NEXT: xsmuldp f0, f0, f1 -; CHECK-NEXT: lfdx f1, r17, r11 +; CHECK-NEXT: lfdx f1, r23, r11 ; CHECK-NEXT: xsadddp f0, f1, f0 -; CHECK-NEXT: stfdx f0, r17, r11 +; CHECK-NEXT: stfdx f0, r23, r11 ; CHECK-NEXT: lfd f0, 0(r18) -; CHECK-NEXT: lfd f1, 0(r19) +; CHECK-NEXT: lfd f1, 0(r14) ; CHECK-NEXT: xsmuldp f0, f0, f1 -; CHECK-NEXT: lfdx f1, r20, r11 +; CHECK-NEXT: lfdx f1, r24, r11 ; CHECK-NEXT: xsadddp f0, f1, f0 -; CHECK-NEXT: stfdx f0, r20, r11 +; CHECK-NEXT: stfdx f0, r24, r11 ; CHECK-NEXT: lfdx f0, r18, r0 -; CHECK-NEXT: lfdx f1, r19, r0 -; CHECK-NEXT: xsmuldp f0, f0, f1 -; CHECK-NEXT: lfdx f1, r21, r11 -; CHECK-NEXT: xsadddp f0, f1, f0 -; CHECK-NEXT: stfdx f0, r21, r11 -; CHECK-NEXT: lfdx f0, r18, r31 -; CHECK-NEXT: lfdx f1, r19, r31 -; CHECK-NEXT: add r18, r18, r7 -; CHECK-NEXT: add r19, r19, r7 -; CHECK-NEXT: xsmuldp f0, f0, f1 -; CHECK-NEXT: lfdx f1, r22, r11 -; CHECK-NEXT: xsadddp f0, f1, f0 -; CHECK-NEXT: stfdx f0, r22, r11 -; CHECK-NEXT: lfd f0, 0(r23) -; CHECK-NEXT: lfd f1, 0(r24) +; CHECK-NEXT: lfdx f1, r14, r0 ; CHECK-NEXT: xsmuldp f0, f0, f1 ; CHECK-NEXT: lfdx f1, r25, r11 ; CHECK-NEXT: xsadddp f0, f1, f0 ; CHECK-NEXT: stfdx f0, r25, r11 -; CHECK-NEXT: lfdx f0, r23, r0 -; CHECK-NEXT: lfdx f1, r24, r0 +; CHECK-NEXT: lfdx f0, r18, r19 +; CHECK-NEXT: lfdx f1, r14, r19 +; CHECK-NEXT: add r18, r18, r7 +; CHECK-NEXT: add r14, r14, r7 ; CHECK-NEXT: xsmuldp f0, f0, f1 ; CHECK-NEXT: lfdx f1, r26, r11 ; CHECK-NEXT: xsadddp f0, f1, f0 ; CHECK-NEXT: stfdx f0, r26, r11 -; CHECK-NEXT: lfdx f0, r23, r31 -; CHECK-NEXT: lfdx f1, r24, r31 -; CHECK-NEXT: add r23, r23, r7 -; CHECK-NEXT: add r24, r24, r7 +; CHECK-NEXT: lfd f0, 0(r17) +; CHECK-NEXT: lfd f1, 0(r31) ; CHECK-NEXT: xsmuldp f0, f0, f1 ; CHECK-NEXT: lfdx f1, r27, r11 ; CHECK-NEXT: xsadddp f0, f1, f0 ; CHECK-NEXT: stfdx f0, r27, r11 -; CHECK-NEXT: lfd f0, 0(r28) -; CHECK-NEXT: lfd f1, 0(r29) +; CHECK-NEXT: lfdx f0, r17, r0 +; CHECK-NEXT: lfdx f1, r31, r0 +; CHECK-NEXT: xsmuldp f0, f0, f1 +; CHECK-NEXT: lfdx f1, r28, r11 +; CHECK-NEXT: xsadddp f0, f1, f0 +; CHECK-NEXT: stfdx f0, r28, r11 +; CHECK-NEXT: lfdx f0, r17, r19 +; CHECK-NEXT: lfdx f1, r31, r19 +; CHECK-NEXT: add r17, r17, r7 +; CHECK-NEXT: add r31, r31, r7 +; CHECK-NEXT: xsmuldp f0, f0, f1 +; CHECK-NEXT: lfdx f1, r29, r11 +; CHECK-NEXT: xsadddp f0, f1, f0 +; CHECK-NEXT: stfdx f0, r29, r11 +; CHECK-NEXT: lfd f0, 0(r16) +; CHECK-NEXT: lfd f1, 0(r2) ; CHECK-NEXT: xsmuldp f0, f0, f1 ; CHECK-NEXT: lfdx f1, r30, r11 ; CHECK-NEXT: xsadddp f0, f1, f0 ; CHECK-NEXT: stfdx f0, r30, r11 -; CHECK-NEXT: lfdx f0, r28, r0 -; CHECK-NEXT: lfdx f1, r29, r0 +; CHECK-NEXT: lfdx f0, r16, r0 +; CHECK-NEXT: lfdx f1, r2, r0 ; CHECK-NEXT: xsmuldp f0, f0, f1 ; CHECK-NEXT: lfdx f1, r8, r11 ; CHECK-NEXT: xsadddp f0, f1, f0 ; CHECK-NEXT: stfdx f0, r8, r11 -; CHECK-NEXT: lfdx f0, r28, r31 -; CHECK-NEXT: lfdx f1, r29, r31 -; CHECK-NEXT: add r28, r28, r7 -; CHECK-NEXT: add r29, r29, r7 +; CHECK-NEXT: lfdx f0, r16, r19 +; CHECK-NEXT: lfdx f1, r2, r19 +; CHECK-NEXT: add r16, r16, r7 +; CHECK-NEXT: add r2, r2, r7 ; CHECK-NEXT: xsmuldp f0, f0, f1 ; CHECK-NEXT: lfdx f1, r9, r11 ; CHECK-NEXT: xsadddp f0, f1, f0