diff --git a/llvm/lib/Transforms/Utils/ScalarEvolutionExpander.cpp b/llvm/lib/Transforms/Utils/ScalarEvolutionExpander.cpp --- a/llvm/lib/Transforms/Utils/ScalarEvolutionExpander.cpp +++ b/llvm/lib/Transforms/Utils/ScalarEvolutionExpander.cpp @@ -37,9 +37,9 @@ using namespace llvm; cl::opt llvm::SCEVCheapExpansionBudget( - "scev-cheap-expansion-budget", cl::Hidden, cl::init(4), + "scev-cheap-expansion-budget", cl::Hidden, cl::init(8), cl::desc("When performing SCEV expansion only if it is cheap to do, this " - "controls the budget that is considered cheap (default = 4)")); + "controls the budget that is considered cheap (default = 8)")); using namespace PatternMatch; diff --git a/llvm/test/CodeGen/PowerPC/common-chain.ll b/llvm/test/CodeGen/PowerPC/common-chain.ll --- a/llvm/test/CodeGen/PowerPC/common-chain.ll +++ b/llvm/test/CodeGen/PowerPC/common-chain.ll @@ -771,235 +771,239 @@ ; CHECK-NEXT: std r30, -16(r1) # 8-byte Folded Spill ; CHECK-NEXT: std r31, -8(r1) # 8-byte Folded Spill ; CHECK-NEXT: std r2, -152(r1) # 8-byte Folded Spill -; CHECK-NEXT: std r9, -160(r1) # 8-byte Folded Spill -; CHECK-NEXT: std r8, -176(r1) # 8-byte Folded Spill -; CHECK-NEXT: std r7, -168(r1) # 8-byte Folded Spill -; CHECK-NEXT: ble cr0, .LBB7_7 +; CHECK-NEXT: ble cr0, .LBB7_8 ; CHECK-NEXT: # %bb.1: # %for.body.preheader ; CHECK-NEXT: sldi r6, r6, 2 -; CHECK-NEXT: li r7, 1 -; CHECK-NEXT: mr r12, r10 +; CHECK-NEXT: mr r0, r8 +; CHECK-NEXT: li r8, 1 +; CHECK-NEXT: mr r27, r7 +; CHECK-NEXT: mr r7, r10 ; CHECK-NEXT: cmpdi r6, 1 -; CHECK-NEXT: iselgt r7, r6, r7 -; CHECK-NEXT: addi r8, r7, -1 -; CHECK-NEXT: clrldi r6, r7, 63 -; CHECK-NEXT: cmpldi r8, 3 -; CHECK-NEXT: blt cr0, .LBB7_4 +; CHECK-NEXT: iselgt r8, r6, r8 +; CHECK-NEXT: addi r11, r8, -1 +; CHECK-NEXT: clrldi r6, r8, 63 +; CHECK-NEXT: cmpldi r11, 3 +; CHECK-NEXT: blt cr0, .LBB7_5 ; CHECK-NEXT: # %bb.2: # %for.body.preheader.new -; CHECK-NEXT: rldicl r7, r7, 62, 2 -; CHECK-NEXT: sldi r10, r12, 2 -; CHECK-NEXT: ld r2, -168(r1) # 8-byte Folded Reload -; CHECK-NEXT: rldicl r7, r7, 2, 1 -; CHECK-NEXT: std r7, -184(r1) # 8-byte Folded Spill -; CHECK-NEXT: ld r7, -160(r1) # 8-byte Folded Reload -; CHECK-NEXT: add r8, r7, r10 -; CHECK-NEXT: mr r22, r7 -; CHECK-NEXT: mr r7, r4 -; CHECK-NEXT: mr r4, r3 -; CHECK-NEXT: ld r3, -176(r1) # 8-byte Folded Reload +; CHECK-NEXT: rldicl r8, r8, 62, 2 +; CHECK-NEXT: sldi r11, r10, 2 +; CHECK-NEXT: mr r16, r9 +; CHECK-NEXT: std r9, -160(r1) # 8-byte Folded Spill +; CHECK-NEXT: std r3, -184(r1) # 8-byte Folded Spill +; CHECK-NEXT: std r5, -208(r1) # 8-byte Folded Spill +; CHECK-NEXT: std r4, -192(r1) # 8-byte Folded Spill +; CHECK-NEXT: sldi r12, r10, 5 +; CHECK-NEXT: rldicl r7, r8, 2, 1 +; CHECK-NEXT: add r8, r9, r11 +; CHECK-NEXT: mr r9, r27 +; CHECK-NEXT: std r7, -200(r1) # 8-byte Folded Spill +; CHECK-NEXT: mr r7, r0 +; CHECK-NEXT: add r0, r0, r11 +; CHECK-NEXT: add r11, r27, r11 +; CHECK-NEXT: std r9, -168(r1) # 8-byte Folded Spill ; CHECK-NEXT: sldi r8, r8, 3 -; CHECK-NEXT: add r9, r5, r8 -; CHECK-NEXT: add r8, r3, r10 -; CHECK-NEXT: add r10, r2, r10 -; CHECK-NEXT: sldi r10, r10, 3 -; CHECK-NEXT: sldi r8, r8, 3 -; CHECK-NEXT: add r30, r5, r10 -; CHECK-NEXT: add r29, r7, r10 -; CHECK-NEXT: add r28, r4, r10 -; CHECK-NEXT: sldi r10, r12, 1 ; CHECK-NEXT: add r8, r5, r8 -; CHECK-NEXT: add r11, r12, r10 -; CHECK-NEXT: add r0, r22, r11 +; CHECK-NEXT: sldi r11, r11, 3 +; CHECK-NEXT: add r29, r5, r11 ; CHECK-NEXT: sldi r0, r0, 3 -; CHECK-NEXT: add r27, r5, r0 -; CHECK-NEXT: add r0, r3, r11 -; CHECK-NEXT: add r11, r2, r11 +; CHECK-NEXT: add r28, r4, r11 +; CHECK-NEXT: add r27, r3, r11 +; CHECK-NEXT: sldi r11, r10, 1 +; CHECK-NEXT: add r30, r5, r0 +; CHECK-NEXT: std r7, -176(r1) # 8-byte Folded Spill +; CHECK-NEXT: add r0, r10, r11 +; CHECK-NEXT: add r26, r16, r0 +; CHECK-NEXT: add r25, r7, r0 +; CHECK-NEXT: add r0, r9, r0 +; CHECK-NEXT: sldi r0, r0, 3 +; CHECK-NEXT: sldi r26, r26, 3 +; CHECK-NEXT: sldi r25, r25, 3 +; CHECK-NEXT: add r24, r5, r0 +; CHECK-NEXT: add r23, r4, r0 +; CHECK-NEXT: add r22, r3, r0 +; CHECK-NEXT: add r0, r16, r11 +; CHECK-NEXT: add r26, r5, r26 +; CHECK-NEXT: add r25, r5, r25 +; CHECK-NEXT: sldi r0, r0, 3 +; CHECK-NEXT: add r21, r5, r0 +; CHECK-NEXT: add r0, r7, r11 +; CHECK-NEXT: add r11, r9, r11 ; CHECK-NEXT: sldi r11, r11, 3 ; CHECK-NEXT: sldi r0, r0, 3 -; CHECK-NEXT: add r25, r5, r11 -; CHECK-NEXT: add r24, r7, r11 -; CHECK-NEXT: add r23, r4, r11 -; CHECK-NEXT: add r11, r22, r10 -; CHECK-NEXT: add r26, r5, r0 -; CHECK-NEXT: mr r0, r22 +; CHECK-NEXT: add r19, r5, r11 +; CHECK-NEXT: add r18, r4, r11 +; CHECK-NEXT: add r17, r3, r11 +; CHECK-NEXT: add r11, r10, r16 +; CHECK-NEXT: add r20, r5, r0 +; CHECK-NEXT: sldi r0, r7, 3 ; CHECK-NEXT: sldi r11, r11, 3 -; CHECK-NEXT: add r22, r5, r11 -; CHECK-NEXT: add r11, r3, r10 -; CHECK-NEXT: add r10, r2, r10 -; CHECK-NEXT: sldi r10, r10, 3 +; CHECK-NEXT: add r16, r5, r11 +; CHECK-NEXT: add r11, r10, r7 ; CHECK-NEXT: sldi r11, r11, 3 -; CHECK-NEXT: add r20, r5, r10 -; CHECK-NEXT: add r19, r7, r10 -; CHECK-NEXT: add r18, r4, r10 -; CHECK-NEXT: add r10, r12, r0 -; CHECK-NEXT: add r21, r5, r11 -; CHECK-NEXT: sldi r11, r2, 3 -; CHECK-NEXT: sldi r10, r10, 3 -; CHECK-NEXT: add r17, r5, r10 -; CHECK-NEXT: add r10, r12, r3 -; CHECK-NEXT: sldi r10, r10, 3 -; CHECK-NEXT: add r16, r5, r10 -; CHECK-NEXT: add r10, r12, r2 -; CHECK-NEXT: sldi r10, r10, 3 -; CHECK-NEXT: add r15, r5, r10 -; CHECK-NEXT: add r14, r7, r10 -; CHECK-NEXT: add r31, r4, r10 -; CHECK-NEXT: sldi r10, r3, 3 -; CHECK-NEXT: mr r3, r4 -; CHECK-NEXT: mr r4, r7 -; CHECK-NEXT: ld r7, -160(r1) # 8-byte Folded Reload -; CHECK-NEXT: sub r0, r10, r11 -; CHECK-NEXT: sldi r10, r7, 3 -; CHECK-NEXT: ld r7, -184(r1) # 8-byte Folded Reload -; CHECK-NEXT: sub r2, r10, r11 -; CHECK-NEXT: li r11, 0 -; CHECK-NEXT: mr r10, r12 -; CHECK-NEXT: addi r7, r7, -4 +; CHECK-NEXT: add r15, r5, r11 +; CHECK-NEXT: add r11, r10, r9 +; CHECK-NEXT: sldi r11, r11, 3 +; CHECK-NEXT: add r2, r3, r11 +; CHECK-NEXT: ld r3, -160(r1) # 8-byte Folded Reload +; CHECK-NEXT: add r14, r5, r11 +; CHECK-NEXT: add r31, r4, r11 +; CHECK-NEXT: sldi r11, r9, 3 +; CHECK-NEXT: sub r0, r0, r11 +; CHECK-NEXT: sldi r7, r3, 3 +; CHECK-NEXT: ld r3, -200(r1) # 8-byte Folded Reload +; CHECK-NEXT: sub r11, r7, r11 +; CHECK-NEXT: addi r7, r3, -4 +; CHECK-NEXT: ori r3, r3, 1 ; CHECK-NEXT: rldicl r7, r7, 62, 2 +; CHECK-NEXT: mulld r3, r10, r3 ; CHECK-NEXT: addi r7, r7, 1 ; CHECK-NEXT: mtctr r7 -; CHECK-NEXT: sldi r7, r12, 5 +; CHECK-NEXT: li r7, 0 +; CHECK-NEXT: std r10, -200(r1) # 8-byte Folded Spill +; CHECK-NEXT: std r3, -216(r1) # 8-byte Folded Spill ; CHECK-NEXT: .p2align 4 ; CHECK-NEXT: .LBB7_3: # %for.body ; CHECK-NEXT: # -; CHECK-NEXT: lfd f0, 0(r31) +; CHECK-NEXT: lfd f0, 0(r2) +; CHECK-NEXT: lfd f1, 0(r31) +; CHECK-NEXT: xsmuldp f0, f0, f1 ; CHECK-NEXT: lfd f1, 0(r14) -; CHECK-NEXT: add r10, r10, r12 -; CHECK-NEXT: add r10, r10, r12 -; CHECK-NEXT: xsmuldp f0, f0, f1 -; CHECK-NEXT: lfd f1, 0(r15) -; CHECK-NEXT: add r10, r10, r12 -; CHECK-NEXT: add r10, r10, r12 ; CHECK-NEXT: xsadddp f0, f1, f0 -; CHECK-NEXT: stfd f0, 0(r15) -; CHECK-NEXT: add r15, r15, r7 -; CHECK-NEXT: lfdx f0, r31, r0 -; CHECK-NEXT: lfdx f1, r14, r0 +; CHECK-NEXT: stfd f0, 0(r14) +; CHECK-NEXT: add r14, r14, r12 +; CHECK-NEXT: lfdx f0, r2, r0 +; CHECK-NEXT: lfdx f1, r31, r0 ; CHECK-NEXT: xsmuldp f0, f0, f1 -; CHECK-NEXT: lfdx f1, r16, r11 +; CHECK-NEXT: lfdx f1, r15, r7 ; CHECK-NEXT: xsadddp f0, f1, f0 -; CHECK-NEXT: stfdx f0, r16, r11 -; CHECK-NEXT: lfdx f0, r31, r2 -; CHECK-NEXT: lfdx f1, r14, r2 -; CHECK-NEXT: add r31, r31, r7 -; CHECK-NEXT: add r14, r14, r7 +; CHECK-NEXT: stfdx f0, r15, r7 +; CHECK-NEXT: lfdx f0, r2, r11 +; CHECK-NEXT: lfdx f1, r31, r11 +; CHECK-NEXT: add r2, r2, r12 +; CHECK-NEXT: add r31, r31, r12 ; CHECK-NEXT: xsmuldp f0, f0, f1 -; CHECK-NEXT: lfdx f1, r17, r11 +; CHECK-NEXT: lfdx f1, r16, r7 ; CHECK-NEXT: xsadddp f0, f1, f0 -; CHECK-NEXT: stfdx f0, r17, r11 -; CHECK-NEXT: lfd f0, 0(r18) -; CHECK-NEXT: lfd f1, 0(r19) +; CHECK-NEXT: stfdx f0, r16, r7 +; CHECK-NEXT: lfd f0, 0(r17) +; CHECK-NEXT: lfd f1, 0(r18) ; CHECK-NEXT: xsmuldp f0, f0, f1 -; CHECK-NEXT: lfdx f1, r20, r11 +; CHECK-NEXT: lfdx f1, r19, r7 ; CHECK-NEXT: xsadddp f0, f1, f0 -; CHECK-NEXT: stfdx f0, r20, r11 -; CHECK-NEXT: lfdx f0, r18, r0 -; CHECK-NEXT: lfdx f1, r19, r0 +; CHECK-NEXT: stfdx f0, r19, r7 +; CHECK-NEXT: lfdx f0, r17, r0 +; CHECK-NEXT: lfdx f1, r18, r0 ; CHECK-NEXT: xsmuldp f0, f0, f1 -; CHECK-NEXT: lfdx f1, r21, r11 +; CHECK-NEXT: lfdx f1, r20, r7 ; CHECK-NEXT: xsadddp f0, f1, f0 -; CHECK-NEXT: stfdx f0, r21, r11 -; CHECK-NEXT: lfdx f0, r18, r2 -; CHECK-NEXT: lfdx f1, r19, r2 -; CHECK-NEXT: add r18, r18, r7 -; CHECK-NEXT: add r19, r19, r7 +; CHECK-NEXT: stfdx f0, r20, r7 +; CHECK-NEXT: lfdx f0, r17, r11 +; CHECK-NEXT: lfdx f1, r18, r11 +; CHECK-NEXT: add r17, r17, r12 +; CHECK-NEXT: add r18, r18, r12 ; CHECK-NEXT: xsmuldp f0, f0, f1 -; CHECK-NEXT: lfdx f1, r22, r11 +; CHECK-NEXT: lfdx f1, r21, r7 ; CHECK-NEXT: xsadddp f0, f1, f0 -; CHECK-NEXT: stfdx f0, r22, r11 -; CHECK-NEXT: lfd f0, 0(r23) -; CHECK-NEXT: lfd f1, 0(r24) +; CHECK-NEXT: stfdx f0, r21, r7 +; CHECK-NEXT: lfd f0, 0(r22) +; CHECK-NEXT: lfd f1, 0(r23) ; CHECK-NEXT: xsmuldp f0, f0, f1 -; CHECK-NEXT: lfdx f1, r25, r11 +; CHECK-NEXT: lfdx f1, r24, r7 ; CHECK-NEXT: xsadddp f0, f1, f0 -; CHECK-NEXT: stfdx f0, r25, r11 -; CHECK-NEXT: lfdx f0, r23, r0 -; CHECK-NEXT: lfdx f1, r24, r0 +; CHECK-NEXT: stfdx f0, r24, r7 +; CHECK-NEXT: lfdx f0, r22, r0 +; CHECK-NEXT: lfdx f1, r23, r0 ; CHECK-NEXT: xsmuldp f0, f0, f1 -; CHECK-NEXT: lfdx f1, r26, r11 +; CHECK-NEXT: lfdx f1, r25, r7 ; CHECK-NEXT: xsadddp f0, f1, f0 -; CHECK-NEXT: stfdx f0, r26, r11 -; CHECK-NEXT: lfdx f0, r23, r2 -; CHECK-NEXT: lfdx f1, r24, r2 -; CHECK-NEXT: add r23, r23, r7 -; CHECK-NEXT: add r24, r24, r7 +; CHECK-NEXT: stfdx f0, r25, r7 +; CHECK-NEXT: lfdx f0, r22, r11 +; CHECK-NEXT: lfdx f1, r23, r11 +; CHECK-NEXT: add r22, r22, r12 +; CHECK-NEXT: add r23, r23, r12 ; CHECK-NEXT: xsmuldp f0, f0, f1 -; CHECK-NEXT: lfdx f1, r27, r11 +; CHECK-NEXT: lfdx f1, r26, r7 ; CHECK-NEXT: xsadddp f0, f1, f0 -; CHECK-NEXT: stfdx f0, r27, r11 -; CHECK-NEXT: lfd f0, 0(r28) -; CHECK-NEXT: lfd f1, 0(r29) +; CHECK-NEXT: stfdx f0, r26, r7 +; CHECK-NEXT: lfd f0, 0(r27) +; CHECK-NEXT: lfd f1, 0(r28) ; CHECK-NEXT: xsmuldp f0, f0, f1 -; CHECK-NEXT: lfdx f1, r30, r11 +; CHECK-NEXT: lfdx f1, r29, r7 ; CHECK-NEXT: xsadddp f0, f1, f0 -; CHECK-NEXT: stfdx f0, r30, r11 -; CHECK-NEXT: lfdx f0, r28, r0 -; CHECK-NEXT: lfdx f1, r29, r0 +; CHECK-NEXT: stfdx f0, r29, r7 +; CHECK-NEXT: lfdx f0, r27, r0 +; CHECK-NEXT: lfdx f1, r28, r0 ; CHECK-NEXT: xsmuldp f0, f0, f1 -; CHECK-NEXT: lfdx f1, r8, r11 +; CHECK-NEXT: lfdx f1, r30, r7 ; CHECK-NEXT: xsadddp f0, f1, f0 -; CHECK-NEXT: stfdx f0, r8, r11 -; CHECK-NEXT: lfdx f0, r28, r2 -; CHECK-NEXT: lfdx f1, r29, r2 -; CHECK-NEXT: add r28, r28, r7 -; CHECK-NEXT: add r29, r29, r7 +; CHECK-NEXT: stfdx f0, r30, r7 +; CHECK-NEXT: lfdx f0, r27, r11 +; CHECK-NEXT: lfdx f1, r28, r11 +; CHECK-NEXT: add r27, r27, r12 +; CHECK-NEXT: add r28, r28, r12 ; CHECK-NEXT: xsmuldp f0, f0, f1 -; CHECK-NEXT: lfdx f1, r9, r11 +; CHECK-NEXT: lfdx f1, r8, r7 ; CHECK-NEXT: xsadddp f0, f1, f0 -; CHECK-NEXT: stfdx f0, r9, r11 -; CHECK-NEXT: add r11, r11, r7 +; CHECK-NEXT: stfdx f0, r8, r7 +; CHECK-NEXT: add r7, r7, r12 ; CHECK-NEXT: bdnz .LBB7_3 -; CHECK-NEXT: .LBB7_4: # %for.cond.cleanup.loopexit.unr-lcssa +; CHECK-NEXT: # %bb.4: +; CHECK-NEXT: ld r27, -168(r1) # 8-byte Folded Reload +; CHECK-NEXT: ld r0, -176(r1) # 8-byte Folded Reload +; CHECK-NEXT: ld r9, -160(r1) # 8-byte Folded Reload +; CHECK-NEXT: ld r3, -184(r1) # 8-byte Folded Reload +; CHECK-NEXT: ld r4, -192(r1) # 8-byte Folded Reload +; CHECK-NEXT: ld r10, -200(r1) # 8-byte Folded Reload +; CHECK-NEXT: ld r5, -208(r1) # 8-byte Folded Reload +; CHECK-NEXT: ld r7, -216(r1) # 8-byte Folded Reload +; CHECK-NEXT: .LBB7_5: # %for.cond.cleanup.loopexit.unr-lcssa ; CHECK-NEXT: cmpldi r6, 0 -; CHECK-NEXT: beq cr0, .LBB7_7 -; CHECK-NEXT: # %bb.5: # %for.body.epil.preheader -; CHECK-NEXT: sldi r8, r12, 3 -; CHECK-NEXT: ld r12, -176(r1) # 8-byte Folded Reload -; CHECK-NEXT: ld r7, -160(r1) # 8-byte Folded Reload -; CHECK-NEXT: add r12, r10, r12 -; CHECK-NEXT: add r7, r10, r7 -; CHECK-NEXT: sldi r0, r12, 3 -; CHECK-NEXT: sldi r11, r7, 3 -; CHECK-NEXT: add r12, r5, r0 -; CHECK-NEXT: add r30, r4, r0 -; CHECK-NEXT: add r29, r3, r0 -; CHECK-NEXT: ld r0, -168(r1) # 8-byte Folded Reload -; CHECK-NEXT: add r7, r5, r11 +; CHECK-NEXT: beq cr0, .LBB7_8 +; CHECK-NEXT: # %bb.6: # %for.body.epil.preheader +; CHECK-NEXT: add r9, r7, r9 +; CHECK-NEXT: sldi r28, r10, 3 +; CHECK-NEXT: sldi r11, r9, 3 +; CHECK-NEXT: add r29, r5, r11 ; CHECK-NEXT: add r9, r4, r11 -; CHECK-NEXT: add r11, r3, r11 -; CHECK-NEXT: add r10, r10, r0 -; CHECK-NEXT: sldi r10, r10, 3 -; CHECK-NEXT: add r5, r5, r10 -; CHECK-NEXT: add r4, r4, r10 -; CHECK-NEXT: add r3, r3, r10 -; CHECK-NEXT: li r10, 0 +; CHECK-NEXT: add r10, r3, r11 +; CHECK-NEXT: add r11, r7, r0 +; CHECK-NEXT: sldi r0, r11, 3 +; CHECK-NEXT: add r11, r5, r0 +; CHECK-NEXT: add r12, r4, r0 +; CHECK-NEXT: add r30, r3, r0 +; CHECK-NEXT: add r0, r7, r27 +; CHECK-NEXT: sldi r0, r0, 3 +; CHECK-NEXT: add r5, r5, r0 +; CHECK-NEXT: add r4, r4, r0 +; CHECK-NEXT: add r3, r3, r0 +; CHECK-NEXT: li r0, 0 ; CHECK-NEXT: .p2align 4 -; CHECK-NEXT: .LBB7_6: # %for.body.epil +; CHECK-NEXT: .LBB7_7: # %for.body.epil ; CHECK-NEXT: # -; CHECK-NEXT: lfdx f0, r3, r10 -; CHECK-NEXT: lfdx f1, r4, r10 +; CHECK-NEXT: lfdx f0, r3, r0 +; CHECK-NEXT: lfdx f1, r4, r0 ; CHECK-NEXT: addi r6, r6, -1 ; CHECK-NEXT: cmpldi r6, 0 ; CHECK-NEXT: xsmuldp f0, f0, f1 ; CHECK-NEXT: lfd f1, 0(r5) ; CHECK-NEXT: xsadddp f0, f1, f0 ; CHECK-NEXT: stfd f0, 0(r5) -; CHECK-NEXT: add r5, r5, r8 -; CHECK-NEXT: lfdx f0, r29, r10 -; CHECK-NEXT: lfdx f1, r30, r10 +; CHECK-NEXT: add r5, r5, r28 +; CHECK-NEXT: lfdx f0, r30, r0 +; CHECK-NEXT: lfdx f1, r12, r0 ; CHECK-NEXT: xsmuldp f0, f0, f1 -; CHECK-NEXT: lfdx f1, r12, r10 +; CHECK-NEXT: lfdx f1, r11, r0 ; CHECK-NEXT: xsadddp f0, f1, f0 -; CHECK-NEXT: stfdx f0, r12, r10 -; CHECK-NEXT: lfdx f0, r11, r10 -; CHECK-NEXT: lfdx f1, r9, r10 +; CHECK-NEXT: stfdx f0, r11, r0 +; CHECK-NEXT: lfdx f0, r10, r0 +; CHECK-NEXT: lfdx f1, r9, r0 ; CHECK-NEXT: xsmuldp f0, f0, f1 -; CHECK-NEXT: lfdx f1, r7, r10 +; CHECK-NEXT: lfdx f1, r29, r0 ; CHECK-NEXT: xsadddp f0, f1, f0 -; CHECK-NEXT: stfdx f0, r7, r10 -; CHECK-NEXT: add r10, r10, r8 -; CHECK-NEXT: bne cr0, .LBB7_6 -; CHECK-NEXT: .LBB7_7: # %for.cond.cleanup +; CHECK-NEXT: stfdx f0, r29, r0 +; CHECK-NEXT: add r0, r0, r28 +; CHECK-NEXT: bne cr0, .LBB7_7 +; CHECK-NEXT: .LBB7_8: # %for.cond.cleanup ; CHECK-NEXT: ld r2, -152(r1) # 8-byte Folded Reload ; CHECK-NEXT: ld r31, -8(r1) # 8-byte Folded Reload ; CHECK-NEXT: ld r30, -16(r1) # 8-byte Folded Reload diff --git a/llvm/test/CodeGen/PowerPC/sms-cpy-1.ll b/llvm/test/CodeGen/PowerPC/sms-cpy-1.ll --- a/llvm/test/CodeGen/PowerPC/sms-cpy-1.ll +++ b/llvm/test/CodeGen/PowerPC/sms-cpy-1.ll @@ -26,32 +26,28 @@ ; CHECK-NEXT: li 7, -1 ; CHECK-NEXT: mtctr 3 ; CHECK-NEXT: lbz 5, 0(5) -; CHECK-NEXT: li 3, 1 ; CHECK-NEXT: bdz .LBB0_6 ; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: xori 6, 5, 84 ; CHECK-NEXT: clrldi 5, 7, 32 -; CHECK-NEXT: addi 3, 3, 1 ; CHECK-NEXT: addi 8, 7, -1 ; CHECK-NEXT: lbz 5, 0(5) ; CHECK-NEXT: bdz .LBB0_5 ; CHECK-NEXT: # %bb.2: ; CHECK-NEXT: cntlzw 6, 6 -; CHECK-NEXT: addi 3, 3, 1 ; CHECK-NEXT: srwi 7, 6, 5 ; CHECK-NEXT: xori 6, 5, 84 ; CHECK-NEXT: clrldi 5, 8, 32 ; CHECK-NEXT: addi 8, 8, -1 ; CHECK-NEXT: lbz 5, 0(5) ; CHECK-NEXT: bdz .LBB0_4 -; CHECK-NEXT: .p2align 4 +; CHECK-NEXT: .p2align 5 ; CHECK-NEXT: .LBB0_3: ; CHECK-NEXT: clrldi 10, 8, 32 ; CHECK-NEXT: cntlzw 9, 6 ; CHECK-NEXT: xori 6, 5, 84 ; CHECK-NEXT: addi 8, 8, -1 ; CHECK-NEXT: lbz 5, 0(10) -; CHECK-NEXT: addi 3, 3, 1 ; CHECK-NEXT: add 4, 4, 7 ; CHECK-NEXT: srwi 7, 9, 5 ; CHECK-NEXT: bdnz .LBB0_3 diff --git a/llvm/test/Transforms/IndVarSimplify/ARM/indvar-cost.ll b/llvm/test/Transforms/IndVarSimplify/ARM/indvar-cost.ll --- a/llvm/test/Transforms/IndVarSimplify/ARM/indvar-cost.ll +++ b/llvm/test/Transforms/IndVarSimplify/ARM/indvar-cost.ll @@ -3,7 +3,7 @@ ; RUN: opt -passes=indvars -mtriple=thumbv8m.main -S %s -o - | FileCheck %s --check-prefix=CHECK-T2 define dso_local arm_aapcscc void @arm_conv_fast_q15(i16* %pSrcA, i32 %srcALen, i16* %pSrcB, i32 %srcBLen, i16* %pDst, i16** %store.px, i16** %store.py, i32* %store.res) local_unnamed_addr { -; CHECK-T1-LABEL: @arm_conv_fast_q15( +; CHECK-T1-LABEL: define {{[^@]+}}@arm_conv_fast_q15( ; CHECK-T1-NEXT: entry: ; CHECK-T1-NEXT: [[CMP:%.*]] = icmp ult i32 [[SRCALEN:%.*]], [[SRCBLEN:%.*]] ; CHECK-T1-NEXT: [[SRCALEN_SRCBLEN:%.*]] = select i1 [[CMP]], i32 [[SRCALEN]], i32 [[SRCBLEN]] @@ -13,6 +13,8 @@ ; CHECK-T1-NEXT: [[CMP41080:%.*]] = icmp eq i32 [[SUB]], 0 ; CHECK-T1-NEXT: br i1 [[CMP41080]], label [[WHILE_END13:%.*]], label [[WHILE_COND5_PREHEADER_PREHEADER:%.*]] ; CHECK-T1: while.cond5.preheader.preheader: +; CHECK-T1-NEXT: [[TMP0:%.*]] = add i32 [[SRCALEN_SRCBLEN]], -2 +; CHECK-T1-NEXT: [[UMIN:%.*]] = call i32 @llvm.umin.i32(i32 [[TMP0]], i32 2) ; CHECK-T1-NEXT: br label [[WHILE_COND5_PREHEADER:%.*]] ; CHECK-T1: while.cond5.preheader: ; CHECK-T1-NEXT: [[COUNT_01084:%.*]] = phi i32 [ [[INC:%.*]], [[WHILE_END:%.*]] ], [ 1, [[WHILE_COND5_PREHEADER_PREHEADER]] ] @@ -26,11 +28,11 @@ ; CHECK-T1-NEXT: [[PY_11076:%.*]] = phi i16* [ [[INCDEC_PTR8:%.*]], [[WHILE_BODY7]] ], [ [[PY_01082]], [[WHILE_COND5_PREHEADER]] ] ; CHECK-T1-NEXT: [[PX_11075:%.*]] = phi i16* [ [[INCDEC_PTR:%.*]], [[WHILE_BODY7]] ], [ [[PSRCB_PSRCA]], [[WHILE_COND5_PREHEADER]] ] ; CHECK-T1-NEXT: [[INCDEC_PTR]] = getelementptr inbounds i16, i16* [[PX_11075]], i32 1 -; CHECK-T1-NEXT: [[TMP0:%.*]] = load i16, i16* [[PX_11075]], align 2 -; CHECK-T1-NEXT: [[CONV:%.*]] = sext i16 [[TMP0]] to i32 +; CHECK-T1-NEXT: [[TMP1:%.*]] = load i16, i16* [[PX_11075]], align 2 +; CHECK-T1-NEXT: [[CONV:%.*]] = sext i16 [[TMP1]] to i32 ; CHECK-T1-NEXT: [[INCDEC_PTR8]] = getelementptr inbounds i16, i16* [[PY_11076]], i32 -1 -; CHECK-T1-NEXT: [[TMP1:%.*]] = load i16, i16* [[PY_11076]], align 2 -; CHECK-T1-NEXT: [[CONV9:%.*]] = sext i16 [[TMP1]] to i32 +; CHECK-T1-NEXT: [[TMP2:%.*]] = load i16, i16* [[PY_11076]], align 2 +; CHECK-T1-NEXT: [[CONV9:%.*]] = sext i16 [[TMP2]] to i32 ; CHECK-T1-NEXT: [[MUL_I:%.*]] = mul nsw i32 [[CONV9]], [[CONV]] ; CHECK-T1-NEXT: [[SHR3_I:%.*]] = ashr i32 [[CONV]], 16 ; CHECK-T1-NEXT: [[SHR4_I:%.*]] = ashr i32 [[CONV9]], 16 @@ -42,8 +44,8 @@ ; CHECK-T1-NEXT: br i1 [[CMP6]], label [[WHILE_END]], label [[WHILE_BODY7]] ; CHECK-T1: while.end: ; CHECK-T1-NEXT: [[ADD6_I_LCSSA:%.*]] = phi i32 [ [[ADD6_I]], [[WHILE_BODY7]] ] -; CHECK-T1-NEXT: [[TMP2:%.*]] = lshr i32 [[ADD6_I_LCSSA]], 15 -; CHECK-T1-NEXT: [[CONV10:%.*]] = trunc i32 [[TMP2]] to i16 +; CHECK-T1-NEXT: [[TMP3:%.*]] = lshr i32 [[ADD6_I_LCSSA]], 15 +; CHECK-T1-NEXT: [[CONV10:%.*]] = trunc i32 [[TMP3]] to i16 ; CHECK-T1-NEXT: [[INCDEC_PTR11]] = getelementptr inbounds i16, i16* [[POUT_01081]], i32 1 ; CHECK-T1-NEXT: store i16 [[CONV10]], i16* [[POUT_01081]], align 2 ; CHECK-T1-NEXT: [[ADD_PTR]] = getelementptr inbounds i16, i16* [[PSRCA_PSRCB]], i32 [[COUNT_01084]] @@ -51,18 +53,18 @@ ; CHECK-T1-NEXT: [[DEC12]] = add i32 [[BLOCKSIZE1_01083]], -1 ; CHECK-T1-NEXT: [[CMP3:%.*]] = icmp ult i32 [[COUNT_01084]], 3 ; CHECK-T1-NEXT: [[CMP4:%.*]] = icmp ne i32 [[DEC12]], 0 -; CHECK-T1-NEXT: [[TMP3:%.*]] = and i1 [[CMP4]], [[CMP3]] -; CHECK-T1-NEXT: br i1 [[TMP3]], label [[WHILE_COND5_PREHEADER]], label [[WHILE_END13_LOOPEXIT:%.*]] +; CHECK-T1-NEXT: [[TMP4:%.*]] = and i1 [[CMP4]], [[CMP3]] +; CHECK-T1-NEXT: br i1 [[TMP4]], label [[WHILE_COND5_PREHEADER]], label [[WHILE_END13_LOOPEXIT:%.*]] ; CHECK-T1: while.end13.loopexit: ; CHECK-T1-NEXT: [[INCDEC_PTR11_LCSSA:%.*]] = phi i16* [ [[INCDEC_PTR11]], [[WHILE_END]] ] ; CHECK-T1-NEXT: [[ADD_PTR_LCSSA:%.*]] = phi i16* [ [[ADD_PTR]], [[WHILE_END]] ] ; CHECK-T1-NEXT: [[INC_LCSSA:%.*]] = phi i32 [ [[INC]], [[WHILE_END]] ] -; CHECK-T1-NEXT: [[DEC12_LCSSA:%.*]] = phi i32 [ [[DEC12]], [[WHILE_END]] ] +; CHECK-T1-NEXT: [[TMP5:%.*]] = sub i32 [[TMP0]], [[UMIN]] ; CHECK-T1-NEXT: br label [[WHILE_END13]] ; CHECK-T1: while.end13: ; CHECK-T1-NEXT: [[POUT_0_LCSSA:%.*]] = phi i16* [ [[PDST]], [[ENTRY:%.*]] ], [ [[INCDEC_PTR11_LCSSA]], [[WHILE_END13_LOOPEXIT]] ] ; CHECK-T1-NEXT: [[PY_0_LCSSA:%.*]] = phi i16* [ [[PSRCA_PSRCB]], [[ENTRY]] ], [ [[ADD_PTR_LCSSA]], [[WHILE_END13_LOOPEXIT]] ] -; CHECK-T1-NEXT: [[BLOCKSIZE1_0_LCSSA:%.*]] = phi i32 [ [[SUB]], [[ENTRY]] ], [ [[DEC12_LCSSA]], [[WHILE_END13_LOOPEXIT]] ] +; CHECK-T1-NEXT: [[BLOCKSIZE1_0_LCSSA:%.*]] = phi i32 [ [[SUB]], [[ENTRY]] ], [ [[TMP5]], [[WHILE_END13_LOOPEXIT]] ] ; CHECK-T1-NEXT: [[COUNT_0_LCSSA:%.*]] = phi i32 [ 1, [[ENTRY]] ], [ [[INC_LCSSA]], [[WHILE_END13_LOOPEXIT]] ] ; CHECK-T1-NEXT: [[CMP161068:%.*]] = icmp eq i32 [[BLOCKSIZE1_0_LCSSA]], 0 ; CHECK-T1-NEXT: br i1 [[CMP161068]], label [[EXIT:%.*]], label [[WHILE_BODY18_PREHEADER:%.*]] @@ -85,34 +87,34 @@ ; CHECK-T1-NEXT: [[PY_31056:%.*]] = phi i16* [ [[ADD_PTR_I884:%.*]], [[WHILE_BODY23]] ], [ [[PY_21070]], [[WHILE_BODY23_PREHEADER]] ] ; CHECK-T1-NEXT: [[PX_31055:%.*]] = phi i16* [ [[ADD_PTR_I890:%.*]], [[WHILE_BODY23]] ], [ [[PSRCB_PSRCA]], [[WHILE_BODY23_PREHEADER]] ] ; CHECK-T1-NEXT: [[ARRAYIDX_I907:%.*]] = getelementptr inbounds i16, i16* [[PX_31055]], i32 1 -; CHECK-T1-NEXT: [[TMP4:%.*]] = load i16, i16* [[ARRAYIDX_I907]], align 2 -; CHECK-T1-NEXT: [[TMP5:%.*]] = load i16, i16* [[PX_31055]], align 2 +; CHECK-T1-NEXT: [[TMP6:%.*]] = load i16, i16* [[ARRAYIDX_I907]], align 2 +; CHECK-T1-NEXT: [[TMP7:%.*]] = load i16, i16* [[PX_31055]], align 2 ; CHECK-T1-NEXT: [[ADD_PTR_I912:%.*]] = getelementptr inbounds i16, i16* [[PX_31055]], i32 2 ; CHECK-T1-NEXT: [[ARRAYIDX_I901:%.*]] = getelementptr inbounds i16, i16* [[PY_31056]], i32 1 -; CHECK-T1-NEXT: [[TMP6:%.*]] = load i16, i16* [[ARRAYIDX_I901]], align 2 -; CHECK-T1-NEXT: [[TMP7:%.*]] = load i16, i16* [[PY_31056]], align 2 +; CHECK-T1-NEXT: [[TMP8:%.*]] = load i16, i16* [[ARRAYIDX_I901]], align 2 +; CHECK-T1-NEXT: [[TMP9:%.*]] = load i16, i16* [[PY_31056]], align 2 ; CHECK-T1-NEXT: [[ADD_PTR_I906:%.*]] = getelementptr inbounds i16, i16* [[PY_31056]], i32 -2 -; CHECK-T1-NEXT: [[SHR_I892:%.*]] = sext i16 [[TMP5]] to i32 -; CHECK-T1-NEXT: [[SHR1_I893:%.*]] = sext i16 [[TMP6]] to i32 +; CHECK-T1-NEXT: [[SHR_I892:%.*]] = sext i16 [[TMP7]] to i32 +; CHECK-T1-NEXT: [[SHR1_I893:%.*]] = sext i16 [[TMP8]] to i32 ; CHECK-T1-NEXT: [[MUL_I894:%.*]] = mul nsw i32 [[SHR1_I893]], [[SHR_I892]] -; CHECK-T1-NEXT: [[SHR2_I895:%.*]] = sext i16 [[TMP4]] to i32 -; CHECK-T1-NEXT: [[SHR4_I897:%.*]] = sext i16 [[TMP7]] to i32 +; CHECK-T1-NEXT: [[SHR2_I895:%.*]] = sext i16 [[TMP6]] to i32 +; CHECK-T1-NEXT: [[SHR4_I897:%.*]] = sext i16 [[TMP9]] to i32 ; CHECK-T1-NEXT: [[MUL5_I898:%.*]] = mul nsw i32 [[SHR4_I897]], [[SHR2_I895]] ; CHECK-T1-NEXT: [[ADD_I899:%.*]] = add i32 [[MUL_I894]], [[SUM_11057]] ; CHECK-T1-NEXT: [[ADD6_I900:%.*]] = add i32 [[ADD_I899]], [[MUL5_I898]] ; CHECK-T1-NEXT: [[ARRAYIDX_I885:%.*]] = getelementptr inbounds i16, i16* [[PX_31055]], i32 3 -; CHECK-T1-NEXT: [[TMP8:%.*]] = load i16, i16* [[ARRAYIDX_I885]], align 2 -; CHECK-T1-NEXT: [[TMP9:%.*]] = load i16, i16* [[ADD_PTR_I912]], align 2 +; CHECK-T1-NEXT: [[TMP10:%.*]] = load i16, i16* [[ARRAYIDX_I885]], align 2 +; CHECK-T1-NEXT: [[TMP11:%.*]] = load i16, i16* [[ADD_PTR_I912]], align 2 ; CHECK-T1-NEXT: [[ADD_PTR_I890]] = getelementptr inbounds i16, i16* [[PX_31055]], i32 4 ; CHECK-T1-NEXT: [[ARRAYIDX_I879:%.*]] = getelementptr inbounds i16, i16* [[PY_31056]], i32 -1 -; CHECK-T1-NEXT: [[TMP10:%.*]] = load i16, i16* [[ARRAYIDX_I879]], align 2 -; CHECK-T1-NEXT: [[TMP11:%.*]] = load i16, i16* [[ADD_PTR_I906]], align 2 +; CHECK-T1-NEXT: [[TMP12:%.*]] = load i16, i16* [[ARRAYIDX_I879]], align 2 +; CHECK-T1-NEXT: [[TMP13:%.*]] = load i16, i16* [[ADD_PTR_I906]], align 2 ; CHECK-T1-NEXT: [[ADD_PTR_I884]] = getelementptr inbounds i16, i16* [[PY_31056]], i32 -4 -; CHECK-T1-NEXT: [[SHR_I870:%.*]] = sext i16 [[TMP9]] to i32 -; CHECK-T1-NEXT: [[SHR1_I871:%.*]] = sext i16 [[TMP10]] to i32 +; CHECK-T1-NEXT: [[SHR_I870:%.*]] = sext i16 [[TMP11]] to i32 +; CHECK-T1-NEXT: [[SHR1_I871:%.*]] = sext i16 [[TMP12]] to i32 ; CHECK-T1-NEXT: [[MUL_I872:%.*]] = mul nsw i32 [[SHR1_I871]], [[SHR_I870]] -; CHECK-T1-NEXT: [[SHR2_I873:%.*]] = sext i16 [[TMP8]] to i32 -; CHECK-T1-NEXT: [[SHR4_I875:%.*]] = sext i16 [[TMP11]] to i32 +; CHECK-T1-NEXT: [[SHR2_I873:%.*]] = sext i16 [[TMP10]] to i32 +; CHECK-T1-NEXT: [[SHR4_I875:%.*]] = sext i16 [[TMP13]] to i32 ; CHECK-T1-NEXT: [[MUL5_I876:%.*]] = mul nsw i32 [[SHR4_I875]], [[SHR2_I873]] ; CHECK-T1-NEXT: [[ADD_I877:%.*]] = add i32 [[ADD6_I900]], [[MUL_I872]] ; CHECK-T1-NEXT: [[ADD6_I878]] = add i32 [[ADD_I877]], [[MUL5_I876]] @@ -140,11 +142,11 @@ ; CHECK-T1-NEXT: [[PY_41064:%.*]] = phi i16* [ [[INCDEC_PTR39:%.*]], [[WHILE_BODY36]] ], [ [[ADD_PTR32]], [[WHILE_BODY36_PREHEADER]] ] ; CHECK-T1-NEXT: [[PX_41063:%.*]] = phi i16* [ [[INCDEC_PTR37:%.*]], [[WHILE_BODY36]] ], [ [[PX_3_LCSSA]], [[WHILE_BODY36_PREHEADER]] ] ; CHECK-T1-NEXT: [[INCDEC_PTR37]] = getelementptr inbounds i16, i16* [[PX_41063]], i32 1 -; CHECK-T1-NEXT: [[TMP12:%.*]] = load i16, i16* [[PX_41063]], align 2 -; CHECK-T1-NEXT: [[CONV38:%.*]] = sext i16 [[TMP12]] to i32 +; CHECK-T1-NEXT: [[TMP14:%.*]] = load i16, i16* [[PX_41063]], align 2 +; CHECK-T1-NEXT: [[CONV38:%.*]] = sext i16 [[TMP14]] to i32 ; CHECK-T1-NEXT: [[INCDEC_PTR39]] = getelementptr inbounds i16, i16* [[PY_41064]], i32 -1 -; CHECK-T1-NEXT: [[TMP13:%.*]] = load i16, i16* [[PY_41064]], align 2 -; CHECK-T1-NEXT: [[CONV40:%.*]] = sext i16 [[TMP13]] to i32 +; CHECK-T1-NEXT: [[TMP15:%.*]] = load i16, i16* [[PY_41064]], align 2 +; CHECK-T1-NEXT: [[CONV40:%.*]] = sext i16 [[TMP15]] to i32 ; CHECK-T1-NEXT: [[MUL_I863:%.*]] = mul nsw i32 [[CONV40]], [[CONV38]] ; CHECK-T1-NEXT: [[SHR3_I864:%.*]] = ashr i32 [[CONV38]], 16 ; CHECK-T1-NEXT: [[SHR4_I865:%.*]] = ashr i32 [[CONV40]], 16 @@ -159,8 +161,8 @@ ; CHECK-T1-NEXT: br label [[WHILE_END43]] ; CHECK-T1: while.end43: ; CHECK-T1-NEXT: [[SUM_2_LCSSA:%.*]] = phi i32 [ [[SUM_1_LCSSA]], [[WHILE_END31]] ], [ [[ADD6_I868_LCSSA]], [[WHILE_END43_LOOPEXIT]] ] -; CHECK-T1-NEXT: [[TMP14:%.*]] = lshr i32 [[SUM_2_LCSSA]], 15 -; CHECK-T1-NEXT: [[CONV45:%.*]] = trunc i32 [[TMP14]] to i16 +; CHECK-T1-NEXT: [[TMP16:%.*]] = lshr i32 [[SUM_2_LCSSA]], 15 +; CHECK-T1-NEXT: [[CONV45:%.*]] = trunc i32 [[TMP16]] to i16 ; CHECK-T1-NEXT: [[INCDEC_PTR46]] = getelementptr inbounds i16, i16* [[POUT_11069]], i32 1 ; CHECK-T1-NEXT: store i16 [[CONV45]], i16* [[POUT_11069]], align 2 ; CHECK-T1-NEXT: [[SUB47:%.*]] = add i32 [[COUNT_11072]], -1 @@ -174,7 +176,7 @@ ; CHECK-T1: exit: ; CHECK-T1-NEXT: ret void ; -; CHECK-T2-LABEL: @arm_conv_fast_q15( +; CHECK-T2-LABEL: define {{[^@]+}}@arm_conv_fast_q15( ; CHECK-T2-NEXT: entry: ; CHECK-T2-NEXT: [[CMP:%.*]] = icmp ult i32 [[SRCALEN:%.*]], [[SRCBLEN:%.*]] ; CHECK-T2-NEXT: [[SRCALEN_SRCBLEN:%.*]] = select i1 [[CMP]], i32 [[SRCALEN]], i32 [[SRCBLEN]] @@ -184,6 +186,8 @@ ; CHECK-T2-NEXT: [[CMP41080:%.*]] = icmp eq i32 [[SUB]], 0 ; CHECK-T2-NEXT: br i1 [[CMP41080]], label [[WHILE_END13:%.*]], label [[WHILE_COND5_PREHEADER_PREHEADER:%.*]] ; CHECK-T2: while.cond5.preheader.preheader: +; CHECK-T2-NEXT: [[TMP0:%.*]] = add i32 [[SRCALEN_SRCBLEN]], -2 +; CHECK-T2-NEXT: [[UMIN:%.*]] = call i32 @llvm.umin.i32(i32 [[TMP0]], i32 2) ; CHECK-T2-NEXT: br label [[WHILE_COND5_PREHEADER:%.*]] ; CHECK-T2: while.cond5.preheader: ; CHECK-T2-NEXT: [[COUNT_01084:%.*]] = phi i32 [ [[INC:%.*]], [[WHILE_END:%.*]] ], [ 1, [[WHILE_COND5_PREHEADER_PREHEADER]] ] @@ -197,11 +201,11 @@ ; CHECK-T2-NEXT: [[PY_11076:%.*]] = phi i16* [ [[INCDEC_PTR8:%.*]], [[WHILE_BODY7]] ], [ [[PY_01082]], [[WHILE_COND5_PREHEADER]] ] ; CHECK-T2-NEXT: [[PX_11075:%.*]] = phi i16* [ [[INCDEC_PTR:%.*]], [[WHILE_BODY7]] ], [ [[PSRCB_PSRCA]], [[WHILE_COND5_PREHEADER]] ] ; CHECK-T2-NEXT: [[INCDEC_PTR]] = getelementptr inbounds i16, i16* [[PX_11075]], i32 1 -; CHECK-T2-NEXT: [[TMP0:%.*]] = load i16, i16* [[PX_11075]], align 2 -; CHECK-T2-NEXT: [[CONV:%.*]] = sext i16 [[TMP0]] to i32 +; CHECK-T2-NEXT: [[TMP1:%.*]] = load i16, i16* [[PX_11075]], align 2 +; CHECK-T2-NEXT: [[CONV:%.*]] = sext i16 [[TMP1]] to i32 ; CHECK-T2-NEXT: [[INCDEC_PTR8]] = getelementptr inbounds i16, i16* [[PY_11076]], i32 -1 -; CHECK-T2-NEXT: [[TMP1:%.*]] = load i16, i16* [[PY_11076]], align 2 -; CHECK-T2-NEXT: [[CONV9:%.*]] = sext i16 [[TMP1]] to i32 +; CHECK-T2-NEXT: [[TMP2:%.*]] = load i16, i16* [[PY_11076]], align 2 +; CHECK-T2-NEXT: [[CONV9:%.*]] = sext i16 [[TMP2]] to i32 ; CHECK-T2-NEXT: [[MUL_I:%.*]] = mul nsw i32 [[CONV9]], [[CONV]] ; CHECK-T2-NEXT: [[SHR3_I:%.*]] = ashr i32 [[CONV]], 16 ; CHECK-T2-NEXT: [[SHR4_I:%.*]] = ashr i32 [[CONV9]], 16 @@ -213,8 +217,8 @@ ; CHECK-T2-NEXT: br i1 [[CMP6]], label [[WHILE_END]], label [[WHILE_BODY7]] ; CHECK-T2: while.end: ; CHECK-T2-NEXT: [[ADD6_I_LCSSA:%.*]] = phi i32 [ [[ADD6_I]], [[WHILE_BODY7]] ] -; CHECK-T2-NEXT: [[TMP2:%.*]] = lshr i32 [[ADD6_I_LCSSA]], 15 -; CHECK-T2-NEXT: [[CONV10:%.*]] = trunc i32 [[TMP2]] to i16 +; CHECK-T2-NEXT: [[TMP3:%.*]] = lshr i32 [[ADD6_I_LCSSA]], 15 +; CHECK-T2-NEXT: [[CONV10:%.*]] = trunc i32 [[TMP3]] to i16 ; CHECK-T2-NEXT: [[INCDEC_PTR11]] = getelementptr inbounds i16, i16* [[POUT_01081]], i32 1 ; CHECK-T2-NEXT: store i16 [[CONV10]], i16* [[POUT_01081]], align 2 ; CHECK-T2-NEXT: [[ADD_PTR]] = getelementptr inbounds i16, i16* [[PSRCA_PSRCB]], i32 [[COUNT_01084]] @@ -222,18 +226,18 @@ ; CHECK-T2-NEXT: [[DEC12]] = add i32 [[BLOCKSIZE1_01083]], -1 ; CHECK-T2-NEXT: [[CMP3:%.*]] = icmp ult i32 [[COUNT_01084]], 3 ; CHECK-T2-NEXT: [[CMP4:%.*]] = icmp ne i32 [[DEC12]], 0 -; CHECK-T2-NEXT: [[TMP3:%.*]] = and i1 [[CMP4]], [[CMP3]] -; CHECK-T2-NEXT: br i1 [[TMP3]], label [[WHILE_COND5_PREHEADER]], label [[WHILE_END13_LOOPEXIT:%.*]] +; CHECK-T2-NEXT: [[TMP4:%.*]] = and i1 [[CMP4]], [[CMP3]] +; CHECK-T2-NEXT: br i1 [[TMP4]], label [[WHILE_COND5_PREHEADER]], label [[WHILE_END13_LOOPEXIT:%.*]] ; CHECK-T2: while.end13.loopexit: ; CHECK-T2-NEXT: [[INCDEC_PTR11_LCSSA:%.*]] = phi i16* [ [[INCDEC_PTR11]], [[WHILE_END]] ] ; CHECK-T2-NEXT: [[ADD_PTR_LCSSA:%.*]] = phi i16* [ [[ADD_PTR]], [[WHILE_END]] ] ; CHECK-T2-NEXT: [[INC_LCSSA:%.*]] = phi i32 [ [[INC]], [[WHILE_END]] ] -; CHECK-T2-NEXT: [[DEC12_LCSSA:%.*]] = phi i32 [ [[DEC12]], [[WHILE_END]] ] +; CHECK-T2-NEXT: [[TMP5:%.*]] = sub i32 [[TMP0]], [[UMIN]] ; CHECK-T2-NEXT: br label [[WHILE_END13]] ; CHECK-T2: while.end13: ; CHECK-T2-NEXT: [[POUT_0_LCSSA:%.*]] = phi i16* [ [[PDST]], [[ENTRY:%.*]] ], [ [[INCDEC_PTR11_LCSSA]], [[WHILE_END13_LOOPEXIT]] ] ; CHECK-T2-NEXT: [[PY_0_LCSSA:%.*]] = phi i16* [ [[PSRCA_PSRCB]], [[ENTRY]] ], [ [[ADD_PTR_LCSSA]], [[WHILE_END13_LOOPEXIT]] ] -; CHECK-T2-NEXT: [[BLOCKSIZE1_0_LCSSA:%.*]] = phi i32 [ [[SUB]], [[ENTRY]] ], [ [[DEC12_LCSSA]], [[WHILE_END13_LOOPEXIT]] ] +; CHECK-T2-NEXT: [[BLOCKSIZE1_0_LCSSA:%.*]] = phi i32 [ [[SUB]], [[ENTRY]] ], [ [[TMP5]], [[WHILE_END13_LOOPEXIT]] ] ; CHECK-T2-NEXT: [[COUNT_0_LCSSA:%.*]] = phi i32 [ 1, [[ENTRY]] ], [ [[INC_LCSSA]], [[WHILE_END13_LOOPEXIT]] ] ; CHECK-T2-NEXT: [[CMP161068:%.*]] = icmp eq i32 [[BLOCKSIZE1_0_LCSSA]], 0 ; CHECK-T2-NEXT: br i1 [[CMP161068]], label [[EXIT:%.*]], label [[WHILE_BODY18_PREHEADER:%.*]] @@ -256,34 +260,34 @@ ; CHECK-T2-NEXT: [[PY_31056:%.*]] = phi i16* [ [[ADD_PTR_I884:%.*]], [[WHILE_BODY23]] ], [ [[PY_21070]], [[WHILE_BODY23_PREHEADER]] ] ; CHECK-T2-NEXT: [[PX_31055:%.*]] = phi i16* [ [[ADD_PTR_I890:%.*]], [[WHILE_BODY23]] ], [ [[PSRCB_PSRCA]], [[WHILE_BODY23_PREHEADER]] ] ; CHECK-T2-NEXT: [[ARRAYIDX_I907:%.*]] = getelementptr inbounds i16, i16* [[PX_31055]], i32 1 -; CHECK-T2-NEXT: [[TMP4:%.*]] = load i16, i16* [[ARRAYIDX_I907]], align 2 -; CHECK-T2-NEXT: [[TMP5:%.*]] = load i16, i16* [[PX_31055]], align 2 +; CHECK-T2-NEXT: [[TMP6:%.*]] = load i16, i16* [[ARRAYIDX_I907]], align 2 +; CHECK-T2-NEXT: [[TMP7:%.*]] = load i16, i16* [[PX_31055]], align 2 ; CHECK-T2-NEXT: [[ADD_PTR_I912:%.*]] = getelementptr inbounds i16, i16* [[PX_31055]], i32 2 ; CHECK-T2-NEXT: [[ARRAYIDX_I901:%.*]] = getelementptr inbounds i16, i16* [[PY_31056]], i32 1 -; CHECK-T2-NEXT: [[TMP6:%.*]] = load i16, i16* [[ARRAYIDX_I901]], align 2 -; CHECK-T2-NEXT: [[TMP7:%.*]] = load i16, i16* [[PY_31056]], align 2 +; CHECK-T2-NEXT: [[TMP8:%.*]] = load i16, i16* [[ARRAYIDX_I901]], align 2 +; CHECK-T2-NEXT: [[TMP9:%.*]] = load i16, i16* [[PY_31056]], align 2 ; CHECK-T2-NEXT: [[ADD_PTR_I906:%.*]] = getelementptr inbounds i16, i16* [[PY_31056]], i32 -2 -; CHECK-T2-NEXT: [[SHR_I892:%.*]] = sext i16 [[TMP5]] to i32 -; CHECK-T2-NEXT: [[SHR1_I893:%.*]] = sext i16 [[TMP6]] to i32 +; CHECK-T2-NEXT: [[SHR_I892:%.*]] = sext i16 [[TMP7]] to i32 +; CHECK-T2-NEXT: [[SHR1_I893:%.*]] = sext i16 [[TMP8]] to i32 ; CHECK-T2-NEXT: [[MUL_I894:%.*]] = mul nsw i32 [[SHR1_I893]], [[SHR_I892]] -; CHECK-T2-NEXT: [[SHR2_I895:%.*]] = sext i16 [[TMP4]] to i32 -; CHECK-T2-NEXT: [[SHR4_I897:%.*]] = sext i16 [[TMP7]] to i32 +; CHECK-T2-NEXT: [[SHR2_I895:%.*]] = sext i16 [[TMP6]] to i32 +; CHECK-T2-NEXT: [[SHR4_I897:%.*]] = sext i16 [[TMP9]] to i32 ; CHECK-T2-NEXT: [[MUL5_I898:%.*]] = mul nsw i32 [[SHR4_I897]], [[SHR2_I895]] ; CHECK-T2-NEXT: [[ADD_I899:%.*]] = add i32 [[MUL_I894]], [[SUM_11057]] ; CHECK-T2-NEXT: [[ADD6_I900:%.*]] = add i32 [[ADD_I899]], [[MUL5_I898]] ; CHECK-T2-NEXT: [[ARRAYIDX_I885:%.*]] = getelementptr inbounds i16, i16* [[PX_31055]], i32 3 -; CHECK-T2-NEXT: [[TMP8:%.*]] = load i16, i16* [[ARRAYIDX_I885]], align 2 -; CHECK-T2-NEXT: [[TMP9:%.*]] = load i16, i16* [[ADD_PTR_I912]], align 2 +; CHECK-T2-NEXT: [[TMP10:%.*]] = load i16, i16* [[ARRAYIDX_I885]], align 2 +; CHECK-T2-NEXT: [[TMP11:%.*]] = load i16, i16* [[ADD_PTR_I912]], align 2 ; CHECK-T2-NEXT: [[ADD_PTR_I890]] = getelementptr inbounds i16, i16* [[PX_31055]], i32 4 ; CHECK-T2-NEXT: [[ARRAYIDX_I879:%.*]] = getelementptr inbounds i16, i16* [[PY_31056]], i32 -1 -; CHECK-T2-NEXT: [[TMP10:%.*]] = load i16, i16* [[ARRAYIDX_I879]], align 2 -; CHECK-T2-NEXT: [[TMP11:%.*]] = load i16, i16* [[ADD_PTR_I906]], align 2 +; CHECK-T2-NEXT: [[TMP12:%.*]] = load i16, i16* [[ARRAYIDX_I879]], align 2 +; CHECK-T2-NEXT: [[TMP13:%.*]] = load i16, i16* [[ADD_PTR_I906]], align 2 ; CHECK-T2-NEXT: [[ADD_PTR_I884]] = getelementptr inbounds i16, i16* [[PY_31056]], i32 -4 -; CHECK-T2-NEXT: [[SHR_I870:%.*]] = sext i16 [[TMP9]] to i32 -; CHECK-T2-NEXT: [[SHR1_I871:%.*]] = sext i16 [[TMP10]] to i32 +; CHECK-T2-NEXT: [[SHR_I870:%.*]] = sext i16 [[TMP11]] to i32 +; CHECK-T2-NEXT: [[SHR1_I871:%.*]] = sext i16 [[TMP12]] to i32 ; CHECK-T2-NEXT: [[MUL_I872:%.*]] = mul nsw i32 [[SHR1_I871]], [[SHR_I870]] -; CHECK-T2-NEXT: [[SHR2_I873:%.*]] = sext i16 [[TMP8]] to i32 -; CHECK-T2-NEXT: [[SHR4_I875:%.*]] = sext i16 [[TMP11]] to i32 +; CHECK-T2-NEXT: [[SHR2_I873:%.*]] = sext i16 [[TMP10]] to i32 +; CHECK-T2-NEXT: [[SHR4_I875:%.*]] = sext i16 [[TMP13]] to i32 ; CHECK-T2-NEXT: [[MUL5_I876:%.*]] = mul nsw i32 [[SHR4_I875]], [[SHR2_I873]] ; CHECK-T2-NEXT: [[ADD_I877:%.*]] = add i32 [[ADD6_I900]], [[MUL_I872]] ; CHECK-T2-NEXT: [[ADD6_I878]] = add i32 [[ADD_I877]], [[MUL5_I876]] @@ -311,11 +315,11 @@ ; CHECK-T2-NEXT: [[PY_41064:%.*]] = phi i16* [ [[INCDEC_PTR39:%.*]], [[WHILE_BODY36]] ], [ [[ADD_PTR32]], [[WHILE_BODY36_PREHEADER]] ] ; CHECK-T2-NEXT: [[PX_41063:%.*]] = phi i16* [ [[INCDEC_PTR37:%.*]], [[WHILE_BODY36]] ], [ [[PX_3_LCSSA]], [[WHILE_BODY36_PREHEADER]] ] ; CHECK-T2-NEXT: [[INCDEC_PTR37]] = getelementptr inbounds i16, i16* [[PX_41063]], i32 1 -; CHECK-T2-NEXT: [[TMP12:%.*]] = load i16, i16* [[PX_41063]], align 2 -; CHECK-T2-NEXT: [[CONV38:%.*]] = sext i16 [[TMP12]] to i32 +; CHECK-T2-NEXT: [[TMP14:%.*]] = load i16, i16* [[PX_41063]], align 2 +; CHECK-T2-NEXT: [[CONV38:%.*]] = sext i16 [[TMP14]] to i32 ; CHECK-T2-NEXT: [[INCDEC_PTR39]] = getelementptr inbounds i16, i16* [[PY_41064]], i32 -1 -; CHECK-T2-NEXT: [[TMP13:%.*]] = load i16, i16* [[PY_41064]], align 2 -; CHECK-T2-NEXT: [[CONV40:%.*]] = sext i16 [[TMP13]] to i32 +; CHECK-T2-NEXT: [[TMP15:%.*]] = load i16, i16* [[PY_41064]], align 2 +; CHECK-T2-NEXT: [[CONV40:%.*]] = sext i16 [[TMP15]] to i32 ; CHECK-T2-NEXT: [[MUL_I863:%.*]] = mul nsw i32 [[CONV40]], [[CONV38]] ; CHECK-T2-NEXT: [[SHR3_I864:%.*]] = ashr i32 [[CONV38]], 16 ; CHECK-T2-NEXT: [[SHR4_I865:%.*]] = ashr i32 [[CONV40]], 16 @@ -330,8 +334,8 @@ ; CHECK-T2-NEXT: br label [[WHILE_END43]] ; CHECK-T2: while.end43: ; CHECK-T2-NEXT: [[SUM_2_LCSSA:%.*]] = phi i32 [ [[SUM_1_LCSSA]], [[WHILE_END31]] ], [ [[ADD6_I868_LCSSA]], [[WHILE_END43_LOOPEXIT]] ] -; CHECK-T2-NEXT: [[TMP14:%.*]] = lshr i32 [[SUM_2_LCSSA]], 15 -; CHECK-T2-NEXT: [[CONV45:%.*]] = trunc i32 [[TMP14]] to i16 +; CHECK-T2-NEXT: [[TMP16:%.*]] = lshr i32 [[SUM_2_LCSSA]], 15 +; CHECK-T2-NEXT: [[CONV45:%.*]] = trunc i32 [[TMP16]] to i16 ; CHECK-T2-NEXT: [[INCDEC_PTR46]] = getelementptr inbounds i16, i16* [[POUT_11069]], i32 1 ; CHECK-T2-NEXT: store i16 [[CONV45]], i16* [[POUT_11069]], align 2 ; CHECK-T2-NEXT: [[SUB47:%.*]] = add i32 [[COUNT_11072]], -1 diff --git a/llvm/test/Transforms/IndVarSimplify/X86/eliminate-trunc.ll b/llvm/test/Transforms/IndVarSimplify/X86/eliminate-trunc.ll --- a/llvm/test/Transforms/IndVarSimplify/X86/eliminate-trunc.ll +++ b/llvm/test/Transforms/IndVarSimplify/X86/eliminate-trunc.ll @@ -7,7 +7,7 @@ ; General case: without extra knowledge, trunc cannot be eliminated. define void @test_00(i64 %start, i32 %n) { ; -; CHECK-LABEL: @test_00( +; CHECK-LABEL: define {{[^@]+}}@test_00( ; CHECK-NEXT: entry: ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: @@ -34,7 +34,7 @@ define void @test_01(i32 %n) { ; -; CHECK-LABEL: @test_01( +; CHECK-LABEL: define {{[^@]+}}@test_01( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[SMAX:%.*]] = call i32 @llvm.smax.i32(i32 [[N:%.*]], i32 0) ; CHECK-NEXT: [[TMP0:%.*]] = add nuw i32 [[SMAX]], 1 @@ -63,7 +63,7 @@ ; Max value at which we can eliminate trunc: SINT_MAX - 1. define void @test_02(i32 %n) { ; -; CHECK-LABEL: @test_02( +; CHECK-LABEL: define {{[^@]+}}@test_02( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[SMAX:%.*]] = call i32 @llvm.smax.i32(i32 [[N:%.*]], i32 2147483646) ; CHECK-NEXT: [[TMP0:%.*]] = add nuw i32 [[SMAX]], 1 @@ -92,7 +92,7 @@ ; If we start from SINT_MAX then the predicate is always false. define void @test_03(i32 %n) { ; -; CHECK-LABEL: @test_03( +; CHECK-LABEL: define {{[^@]+}}@test_03( ; CHECK-NEXT: entry: ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: @@ -115,7 +115,7 @@ ; Minimum value at which we can apply the transform: SINT_MIN + 1. define void @test_04(i32 %n) { ; -; CHECK-LABEL: @test_04( +; CHECK-LABEL: define {{[^@]+}}@test_04( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[SMAX:%.*]] = call i32 @llvm.smax.i32(i32 [[N:%.*]], i32 -2147483647) ; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[SMAX]], 1 @@ -144,7 +144,7 @@ ; FIXME: Harmful LFTR should be thrown away. define void @test_05(i32 %n) { ; -; CHECK-LABEL: @test_05( +; CHECK-LABEL: define {{[^@]+}}@test_05( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[N:%.*]], 1 ; CHECK-NEXT: br label [[LOOP:%.*]] @@ -172,7 +172,7 @@ ; Trunc changes the actual value of the IV, so it is invalid to remove it: SINT_MIN - 1. define void @test_06(i32 %n) { ; -; CHECK-LABEL: @test_06( +; CHECK-LABEL: define {{[^@]+}}@test_06( ; CHECK-NEXT: entry: ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: @@ -194,7 +194,7 @@ ; General case: without extra knowledge, trunc cannot be eliminated. define void @test_00_unsigned(i64 %start, i32 %n) { -; CHECK-LABEL: @test_00_unsigned( +; CHECK-LABEL: define {{[^@]+}}@test_00_unsigned( ; CHECK-NEXT: entry: ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: @@ -220,7 +220,7 @@ ; FIXME: Harmful LFTR should be thrown away. define void @test_01_unsigned(i32 %n) { -; CHECK-LABEL: @test_01_unsigned( +; CHECK-LABEL: define {{[^@]+}}@test_01_unsigned( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[N:%.*]], 1 ; CHECK-NEXT: br label [[LOOP:%.*]] @@ -247,7 +247,7 @@ ; Max value at which we can eliminate trunc: UINT_MAX - 1. define void @test_02_unsigned(i32 %n) { -; CHECK-LABEL: @test_02_unsigned( +; CHECK-LABEL: define {{[^@]+}}@test_02_unsigned( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[UMAX:%.*]] = call i32 @llvm.umax.i32(i32 [[N:%.*]], i32 -2) ; CHECK-NEXT: [[TMP0:%.*]] = add nsw i32 [[UMAX]], 1 @@ -275,7 +275,7 @@ ; If we start from UINT_MAX then the predicate is always false. define void @test_03_unsigned(i32 %n) { -; CHECK-LABEL: @test_03_unsigned( +; CHECK-LABEL: define {{[^@]+}}@test_03_unsigned( ; CHECK-NEXT: entry: ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: @@ -297,7 +297,7 @@ ; Minimum value at which we can apply the transform: UINT_MIN. define void @test_04_unsigned(i32 %n) { -; CHECK-LABEL: @test_04_unsigned( +; CHECK-LABEL: define {{[^@]+}}@test_04_unsigned( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[N:%.*]], 1 ; CHECK-NEXT: br label [[LOOP:%.*]] @@ -324,7 +324,7 @@ ; Start from 1. define void @test_05_unsigned(i32 %n) { -; CHECK-LABEL: @test_05_unsigned( +; CHECK-LABEL: define {{[^@]+}}@test_05_unsigned( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[UMAX:%.*]] = call i32 @llvm.umax.i32(i32 [[N:%.*]], i32 1) ; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[UMAX]], 1 @@ -352,7 +352,7 @@ ; Trunc changes the actual value of the IV, so it is invalid to remove it: UINT_MIN - 1. define void @test_06_unsigned(i32 %n) { -; CHECK-LABEL: @test_06_unsigned( +; CHECK-LABEL: define {{[^@]+}}@test_06_unsigned( ; CHECK-NEXT: entry: ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: @@ -374,7 +374,7 @@ ; Do not eliminate trunc if it is used by something different from icmp. define void @test_07(i32* %p, i32 %n) { -; CHECK-LABEL: @test_07( +; CHECK-LABEL: define {{[^@]+}}@test_07( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[SMAX:%.*]] = call i32 @llvm.smax.i32(i32 [[N:%.*]], i32 0) ; CHECK-NEXT: [[TMP0:%.*]] = add nuw i32 [[SMAX]], 1 @@ -405,18 +405,21 @@ ; Check that we can eliminate both signed and unsigned compare. define void @test_08(i32 %n) { -; CHECK-LABEL: @test_08( +; CHECK-LABEL: define {{[^@]+}}@test_08( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[ZEXT:%.*]] = zext i32 [[N:%.*]] to i64 -; CHECK-NEXT: [[SEXT:%.*]] = sext i32 [[N]] to i64 +; CHECK-NEXT: [[SMAX:%.*]] = call i32 @llvm.smax.i32(i32 [[N:%.*]], i32 1) +; CHECK-NEXT: [[TMP0:%.*]] = add nsw i32 [[SMAX]], -1 +; CHECK-NEXT: [[UMAX:%.*]] = call i32 @llvm.umax.i32(i32 [[N]], i32 1) +; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[UMAX]], -1 +; CHECK-NEXT: [[UMIN:%.*]] = call i32 @llvm.umin.i32(i32 [[TMP0]], i32 [[TMP1]]) +; CHECK-NEXT: [[TMP2:%.*]] = add nuw i32 [[UMIN]], 2 +; CHECK-NEXT: [[WIDE_TRIP_COUNT:%.*]] = zext i32 [[TMP2]] to i64 ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: ; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 1, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ] ; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 -; CHECK-NEXT: [[TMP0:%.*]] = icmp slt i64 [[IV]], [[SEXT]] -; CHECK-NEXT: [[TMP1:%.*]] = icmp ult i64 [[IV]], [[ZEXT]] -; CHECK-NEXT: [[CMP:%.*]] = and i1 [[TMP0]], [[TMP1]] -; CHECK-NEXT: br i1 [[CMP]], label [[LOOP]], label [[EXIT:%.*]] +; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[IV_NEXT]], [[WIDE_TRIP_COUNT]] +; CHECK-NEXT: br i1 [[EXITCOND]], label [[LOOP]], label [[EXIT:%.*]] ; CHECK: exit: ; CHECK-NEXT: ret void ; @@ -436,7 +439,7 @@ ; Widen NE as unsigned. define void @test_09(i32 %n) { -; CHECK-LABEL: @test_09( +; CHECK-LABEL: define {{[^@]+}}@test_09( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[ZEXT:%.*]] = zext i32 [[N:%.*]] to i64 ; CHECK-NEXT: br label [[LOOP:%.*]] @@ -462,7 +465,7 @@ ; Widen NE as signed. define void @test_10(i32 %n) { -; CHECK-LABEL: @test_10( +; CHECK-LABEL: define {{[^@]+}}@test_10( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[N:%.*]], 100 ; CHECK-NEXT: [[TMP1:%.*]] = zext i32 [[TMP0]] to i64 @@ -492,7 +495,7 @@ } define void @test_11() { -; CHECK-LABEL: @test_11( +; CHECK-LABEL: define {{[^@]+}}@test_11( ; CHECK-NEXT: br label [[BB1:%.*]] ; CHECK: bb1: ; CHECK-NEXT: br i1 undef, label [[BB2:%.*]], label [[BB6:%.*]] @@ -541,7 +544,7 @@ ; Show that we can turn signed comparison to unsigned and use zext while ; comparing non-negative values. define void @test_12(i32* %p) { -; CHECK-LABEL: @test_12( +; CHECK-LABEL: define {{[^@]+}}@test_12( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[N:%.*]] = load i32, i32* [[P:%.*]], align 4, !range [[RNG0:![0-9]+]] ; CHECK-NEXT: [[SMAX:%.*]] = call i32 @llvm.smax.i32(i32 [[N]], i32 1) @@ -570,7 +573,7 @@ define void @test_13a(i32 %n) { ; -; CHECK-LABEL: @test_13a( +; CHECK-LABEL: define {{[^@]+}}@test_13a( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[ZEXT:%.*]] = zext i32 1024 to i64 ; CHECK-NEXT: br label [[LOOP:%.*]] @@ -596,7 +599,7 @@ define void @test_13b(i32 %n) { ; -; CHECK-LABEL: @test_13b( +; CHECK-LABEL: define {{[^@]+}}@test_13b( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[ZEXT:%.*]] = zext i32 1024 to i64 ; CHECK-NEXT: br label [[LOOP:%.*]] @@ -622,7 +625,7 @@ define void @test_13c(i32 %n) { ; -; CHECK-LABEL: @test_13c( +; CHECK-LABEL: define {{[^@]+}}@test_13c( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[ZEXT:%.*]] = zext i32 1024 to i64 ; CHECK-NEXT: br label [[LOOP:%.*]] @@ -648,7 +651,7 @@ define void @test_13d(i32 %n) { ; -; CHECK-LABEL: @test_13d( +; CHECK-LABEL: define {{[^@]+}}@test_13d( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[SEXT:%.*]] = sext i32 1024 to i64 ; CHECK-NEXT: br label [[LOOP:%.*]] diff --git a/llvm/test/Transforms/IndVarSimplify/X86/loop-invariant-conditions.ll b/llvm/test/Transforms/IndVarSimplify/X86/loop-invariant-conditions.ll --- a/llvm/test/Transforms/IndVarSimplify/X86/loop-invariant-conditions.ll +++ b/llvm/test/Transforms/IndVarSimplify/X86/loop-invariant-conditions.ll @@ -4,7 +4,7 @@ target triple = "x86_64-unknown-linux-gnu" define void @test1(i64 %start) { -; CHECK-LABEL: @test1( +; CHECK-LABEL: define {{[^@]+}}@test1( ; CHECK-NEXT: entry: ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: @@ -27,7 +27,7 @@ } define void @test1.next(i64 %start) { -; CHECK-LABEL: @test1.next( +; CHECK-LABEL: define {{[^@]+}}@test1.next( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP0:%.*]] = add nsw i64 [[START:%.*]], 1 ; CHECK-NEXT: br label [[LOOP:%.*]] @@ -51,7 +51,7 @@ } define void @test2(i64 %start) { -; CHECK-LABEL: @test2( +; CHECK-LABEL: define {{[^@]+}}@test2( ; CHECK-NEXT: entry: ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: @@ -74,7 +74,7 @@ } define void @test2.next(i64 %start) { -; CHECK-LABEL: @test2.next( +; CHECK-LABEL: define {{[^@]+}}@test2.next( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP0:%.*]] = add nsw i64 [[START:%.*]], 1 ; CHECK-NEXT: br label [[LOOP:%.*]] @@ -99,7 +99,7 @@ ; As long as the test dominates the backedge, we're good define void @test3(i64 %start) { -; CHECK-LABEL: @test3( +; CHECK-LABEL: define {{[^@]+}}@test3( ; CHECK-NEXT: entry: ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: @@ -134,7 +134,7 @@ } define void @test3.next(i64 %start) { -; CHECK-LABEL: @test3.next( +; CHECK-LABEL: define {{[^@]+}}@test3.next( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP0:%.*]] = add nsw i64 [[START:%.*]], 1 ; CHECK-NEXT: br label [[LOOP:%.*]] @@ -171,7 +171,7 @@ define void @test4(i64 %start) { -; CHECK-LABEL: @test4( +; CHECK-LABEL: define {{[^@]+}}@test4( ; CHECK-NEXT: entry: ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: @@ -206,7 +206,7 @@ } define void @test4.next(i64 %start) { -; CHECK-LABEL: @test4.next( +; CHECK-LABEL: define {{[^@]+}}@test4.next( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP0:%.*]] = add nsw i64 [[START:%.*]], 1 ; CHECK-NEXT: br label [[LOOP:%.*]] @@ -242,7 +242,7 @@ } define void @test5(i64 %start) { -; CHECK-LABEL: @test5( +; CHECK-LABEL: define {{[^@]+}}@test5( ; CHECK-NEXT: entry: ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: @@ -277,7 +277,7 @@ } define void @test5.next(i64 %start) { -; CHECK-LABEL: @test5.next( +; CHECK-LABEL: define {{[^@]+}}@test5.next( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP0:%.*]] = add nuw i64 [[START:%.*]], 1 ; CHECK-NEXT: br label [[LOOP:%.*]] @@ -314,7 +314,7 @@ define void @test6(i64 %start) { -; CHECK-LABEL: @test6( +; CHECK-LABEL: define {{[^@]+}}@test6( ; CHECK-NEXT: entry: ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: @@ -349,7 +349,7 @@ } define void @test6.next(i64 %start) { -; CHECK-LABEL: @test6.next( +; CHECK-LABEL: define {{[^@]+}}@test6.next( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP0:%.*]] = add nuw i64 [[START:%.*]], 1 ; CHECK-NEXT: br label [[LOOP:%.*]] @@ -385,7 +385,7 @@ } define void @test7(i64 %start, i64* %inc_ptr) { -; CHECK-LABEL: @test7( +; CHECK-LABEL: define {{[^@]+}}@test7( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[INC:%.*]] = load i64, i64* [[INC_PTR:%.*]], align 8, !range [[RNG0:![0-9]+]] ; CHECK-NEXT: [[OK:%.*]] = icmp sge i64 [[INC]], 0 @@ -416,7 +416,7 @@ } define void @test7.next(i64 %start, i64* %inc_ptr) { -; CHECK-LABEL: @test7.next( +; CHECK-LABEL: define {{[^@]+}}@test7.next( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[INC:%.*]] = load i64, i64* [[INC_PTR:%.*]], align 8, !range [[RNG0]] ; CHECK-NEXT: [[OK:%.*]] = icmp sge i64 [[INC]], 0 @@ -450,7 +450,7 @@ ; Negative test - we can't show that the internal branch executes, so we can't ; fold the test to a loop invariant one. define void @test1_neg(i64 %start) { -; CHECK-LABEL: @test1_neg( +; CHECK-LABEL: define {{[^@]+}}@test1_neg( ; CHECK-NEXT: entry: ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: @@ -493,7 +493,7 @@ ; Slightly subtle version of @test4 where the icmp dominates the backedge, ; but the exit branch doesn't. define void @test2_neg(i64 %start) { -; CHECK-LABEL: @test2_neg( +; CHECK-LABEL: define {{[^@]+}}@test2_neg( ; CHECK-NEXT: entry: ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: @@ -535,14 +535,16 @@ ; The branch has to exit the loop if the condition is true define void @test3_neg(i64 %start) { -; CHECK-LABEL: @test3_neg( +; CHECK-LABEL: define {{[^@]+}}@test3_neg( ; CHECK-NEXT: entry: +; CHECK-NEXT: [[SMAX:%.*]] = call i64 @llvm.smax.i64(i64 [[START:%.*]], i64 -1) +; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[SMAX]], 1 ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: -; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[START:%.*]], [[ENTRY:%.*]] ], [ [[INDVARS_IV_NEXT:%.*]], [[LOOP]] ] -; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nsw i64 [[INDVARS_IV]], 1 -; CHECK-NEXT: [[CMP1:%.*]] = icmp slt i64 [[INDVARS_IV]], -1 -; CHECK-NEXT: br i1 [[CMP1]], label [[LOOP]], label [[FOR_END:%.*]] +; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[START]], [[ENTRY:%.*]] ], [ [[INDVARS_IV_NEXT:%.*]], [[LOOP]] ] +; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add i64 [[INDVARS_IV]], 1 +; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[INDVARS_IV_NEXT]], [[TMP0]] +; CHECK-NEXT: br i1 [[EXITCOND]], label [[LOOP]], label [[FOR_END:%.*]] ; CHECK: for.end: ; CHECK-NEXT: ret void ; @@ -560,18 +562,20 @@ } define void @test4_neg(i64 %start) { -; CHECK-LABEL: @test4_neg( +; CHECK-LABEL: define {{[^@]+}}@test4_neg( ; CHECK-NEXT: entry: +; CHECK-NEXT: [[SMAX:%.*]] = call i64 @llvm.smax.i64(i64 [[START:%.*]], i64 0) +; CHECK-NEXT: [[TMP0:%.*]] = add nuw i64 [[SMAX]], 1 ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: -; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[START:%.*]], [[ENTRY:%.*]] ], [ [[INDVARS_IV_NEXT:%.*]], [[BACKEDGE:%.*]] ] +; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[START]], [[ENTRY:%.*]] ], [ [[INDVARS_IV_NEXT:%.*]], [[BACKEDGE:%.*]] ] ; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nsw i64 [[INDVARS_IV]], 1 ; CHECK-NEXT: [[CMP:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], 25 ; CHECK-NEXT: br i1 [[CMP]], label [[BACKEDGE]], label [[FOR_END:%.*]] ; CHECK: backedge: ; CHECK-NEXT: call void @foo() -; CHECK-NEXT: [[CMP1:%.*]] = icmp sgt i64 [[INDVARS_IV]], -1 -; CHECK-NEXT: br i1 [[CMP1]], label [[FOR_END]], label [[LOOP]] +; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[TMP0]] +; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_END]], label [[LOOP]] ; CHECK: for.end: ; CHECK-NEXT: ret void ; @@ -598,7 +602,7 @@ } define void @test5_neg(i64 %start, i64 %inc) { -; CHECK-LABEL: @test5_neg( +; CHECK-LABEL: define {{[^@]+}}@test5_neg( ; CHECK-NEXT: entry: ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: @@ -623,7 +627,7 @@ } define void @test8(i64 %start, i64* %inc_ptr) { -; CHECK-LABEL: @test8( +; CHECK-LABEL: define {{[^@]+}}@test8( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[INC:%.*]] = load i64, i64* [[INC_PTR:%.*]], align 8, !range [[RNG1:![0-9]+]] ; CHECK-NEXT: [[OK:%.*]] = icmp sge i64 [[INC]], 0 @@ -658,7 +662,7 @@ ; check to handle loops without preheaders, but invariant operands ; (we handle this today by inserting a preheader) define void @test9(i1 %cnd, i64 %start) { -; CHECK-LABEL: @test9( +; CHECK-LABEL: define {{[^@]+}}@test9( ; CHECK-NEXT: entry: ; CHECK-NEXT: br i1 [[CND:%.*]], label [[ENTRY1:%.*]], label [[ENTRY2:%.*]] ; CHECK: entry1: @@ -696,7 +700,7 @@ ; we have a "loop" which is known to run exactly one iteration but ; haven't yet simplified the uses of the IV define void @test10() { -; CHECK-LABEL: @test10( +; CHECK-LABEL: define {{[^@]+}}@test10( ; CHECK-NEXT: entry: ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: @@ -740,7 +744,7 @@ ; check that we can figure out that iv.next > 1 from the facts that iv >= 0 and ; iv.start != 0. define void @test11(i64* %inc_ptr) { -; CHECK-LABEL: @test11( +; CHECK-LABEL: define {{[^@]+}}@test11( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[INC:%.*]] = load i64, i64* [[INC_PTR:%.*]], align 8, !range [[RNG0]] ; CHECK-NEXT: [[NE_COND:%.*]] = icmp ne i64 [[INC]], 0 @@ -791,7 +795,7 @@ ; check that we can prove that a recurrency is greater than another recurrency ; in the same loop, with the same step, and with smaller starting value. define void @test12(i64* %inc_ptr) { -; CHECK-LABEL: @test12( +; CHECK-LABEL: define {{[^@]+}}@test12( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[INC:%.*]] = load i64, i64* [[INC_PTR:%.*]], align 8, !range [[RNG0]] ; CHECK-NEXT: br label [[LOOP:%.*]] diff --git a/llvm/test/Transforms/IndVarSimplify/exit_value_test2.ll b/llvm/test/Transforms/IndVarSimplify/exit_value_test2.ll --- a/llvm/test/Transforms/IndVarSimplify/exit_value_test2.ll +++ b/llvm/test/Transforms/IndVarSimplify/exit_value_test2.ll @@ -10,7 +10,7 @@ declare void @llvm.lifetime.end.p0i8(i64, i8* nocapture) define i32 @_Z3fooPKcjj(i8* nocapture readonly %s, i32 %len, i32 %c) { -; CHECK-LABEL: @_Z3fooPKcjj( +; CHECK-LABEL: define {{[^@]+}}@_Z3fooPKcjj( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[A:%.*]] = alloca i32, align 4 ; CHECK-NEXT: [[T:%.*]] = bitcast i32* [[A]] to i8* @@ -19,6 +19,9 @@ ; CHECK-NEXT: [[CMP8:%.*]] = icmp ugt i32 [[LEN:%.*]], 11 ; CHECK-NEXT: br i1 [[CMP8]], label [[WHILE_BODY_LR_PH:%.*]], label [[WHILE_END:%.*]] ; CHECK: while.body.lr.ph: +; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[LEN]], -12 +; CHECK-NEXT: [[TMP1:%.*]] = udiv i32 [[TMP0]], 12 +; CHECK-NEXT: [[TMP2:%.*]] = mul nuw i32 [[TMP1]], 12 ; CHECK-NEXT: br label [[WHILE_BODY:%.*]] ; CHECK: while.body: ; CHECK-NEXT: [[KEYLEN_010:%.*]] = phi i32 [ [[LEN]], [[WHILE_BODY_LR_PH]] ], [ [[SUB:%.*]], [[WHILE_BODY]] ] @@ -36,10 +39,10 @@ ; CHECK-NEXT: [[CMP:%.*]] = icmp ugt i32 [[SUB]], 11 ; CHECK-NEXT: br i1 [[CMP]], label [[WHILE_BODY]], label [[WHILE_COND_WHILE_END_CRIT_EDGE:%.*]] ; CHECK: while.cond.while.end_crit_edge: -; CHECK-NEXT: [[SUB_LCSSA:%.*]] = phi i32 [ [[SUB]], [[WHILE_BODY]] ] +; CHECK-NEXT: [[TMP3:%.*]] = sub i32 [[TMP0]], [[TMP2]] ; CHECK-NEXT: br label [[WHILE_END]] ; CHECK: while.end: -; CHECK-NEXT: [[KEYLEN_0_LCSSA:%.*]] = phi i32 [ [[SUB_LCSSA]], [[WHILE_COND_WHILE_END_CRIT_EDGE]] ], [ [[LEN]], [[ENTRY:%.*]] ] +; CHECK-NEXT: [[KEYLEN_0_LCSSA:%.*]] = phi i32 [ [[TMP3]], [[WHILE_COND_WHILE_END_CRIT_EDGE]] ], [ [[LEN]], [[ENTRY:%.*]] ] ; CHECK-NEXT: call void @_Z3mixRjj(i32* dereferenceable(4) [[A]], i32 [[KEYLEN_0_LCSSA]]) ; CHECK-NEXT: [[T4:%.*]] = load i32, i32* [[A]], align 4 ; CHECK-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[T]]) @@ -85,7 +88,7 @@ } define i32 @zero_backedge_count_test(i32 %unknown_init, i32* %unknown_mem) { -; CHECK-LABEL: @zero_backedge_count_test( +; CHECK-LABEL: define {{[^@]+}}@zero_backedge_count_test( ; CHECK-NEXT: entry: ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: diff --git a/llvm/test/Transforms/IndVarSimplify/no-iv-rewrite.ll b/llvm/test/Transforms/IndVarSimplify/no-iv-rewrite.ll --- a/llvm/test/Transforms/IndVarSimplify/no-iv-rewrite.ll +++ b/llvm/test/Transforms/IndVarSimplify/no-iv-rewrite.ll @@ -9,7 +9,7 @@ ; We should only have 2 IVs. ; sext should be eliminated while preserving gep inboundsness. define i32 @sum(i32* %arr, i32 %n) nounwind { -; CHECK-LABEL: @sum( +; CHECK-LABEL: define {{[^@]+}}@sum( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[PRECOND:%.*]] = icmp slt i32 0, [[N:%.*]] ; CHECK-NEXT: br i1 [[PRECOND]], label [[PH:%.*]], label [[RETURN:%.*]] @@ -63,7 +63,7 @@ ; %ofs sext should be eliminated while preserving gep inboundsness. ; %vall sext should obviously not be eliminated define i64 @suml(i32* %arr, i32 %n) nounwind { -; CHECK-LABEL: @suml( +; CHECK-LABEL: define {{[^@]+}}@suml( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[PRECOND:%.*]] = icmp slt i32 0, [[N:%.*]] ; CHECK-NEXT: br i1 [[PRECOND]], label [[PH:%.*]], label [[RETURN:%.*]] @@ -120,7 +120,7 @@ ; Don't create any extra adds. ; Preserve gep inboundsness, and don't factor it. define void @outofbounds(i32* %first, i32* %last, i32 %idx) nounwind { -; CHECK-LABEL: @outofbounds( +; CHECK-LABEL: define {{[^@]+}}@outofbounds( ; CHECK-NEXT: [[PRECOND:%.*]] = icmp ne i32* [[FIRST:%.*]], [[LAST:%.*]] ; CHECK-NEXT: br i1 [[PRECOND]], label [[PH:%.*]], label [[RETURN:%.*]] ; CHECK: ph: @@ -164,7 +164,7 @@ ; Preserve casts define void @bitcastiv(i32 %start, i32 %limit, i32 %step, %structI* %base) -; CHECK-LABEL: @bitcastiv( +; CHECK-LABEL: define {{[^@]+}}@bitcastiv( ; CHECK-NEXT: entry: ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: @@ -204,7 +204,7 @@ ; Test inserting a truncate at a phi use. define void @maxvisitor(i32 %limit, i32* %base) nounwind { -; CHECK-LABEL: @maxvisitor( +; CHECK-LABEL: define {{[^@]+}}@maxvisitor( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[SMAX:%.*]] = call i32 @llvm.smax.i32(i32 [[LIMIT:%.*]], i32 1) ; CHECK-NEXT: [[WIDE_TRIP_COUNT:%.*]] = zext i32 [[SMAX]] to i64 @@ -260,7 +260,7 @@ ; Test an edge case of removing an identity phi that directly feeds ; back to the loop iv. define void @identityphi(i32 %limit) nounwind { -; CHECK-LABEL: @identityphi( +; CHECK-LABEL: define {{[^@]+}}@identityphi( ; CHECK-NEXT: entry: ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: @@ -294,23 +294,27 @@ ; Test cloning an or, which is not an OverflowBinaryOperator. define i64 @cloneOr(i32 %limit, i64* %base) nounwind { -; CHECK-LABEL: @cloneOr( +; CHECK-LABEL: define {{[^@]+}}@cloneOr( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[HALFLIM:%.*]] = ashr i32 [[LIMIT:%.*]], 2 ; CHECK-NEXT: [[TMP0:%.*]] = sext i32 [[HALFLIM]] to i64 +; CHECK-NEXT: [[SMAX:%.*]] = call i32 @llvm.smax.i32(i32 [[HALFLIM]], i32 2) +; CHECK-NEXT: [[TMP1:%.*]] = add nsw i32 [[SMAX]], -1 +; CHECK-NEXT: [[TMP2:%.*]] = zext i32 [[TMP1]] to i64 +; CHECK-NEXT: [[TMP3:%.*]] = lshr i64 [[TMP2]], 1 +; CHECK-NEXT: [[TMP4:%.*]] = shl nuw nsw i64 [[TMP3]], 1 ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: ; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[LOOP]] ], [ 0, [[ENTRY:%.*]] ] ; CHECK-NEXT: [[ADR:%.*]] = getelementptr i64, i64* [[BASE:%.*]], i64 [[INDVARS_IV]] ; CHECK-NEXT: [[VAL:%.*]] = load i64, i64* [[ADR]], align 8 -; CHECK-NEXT: [[TMP1:%.*]] = or i64 [[INDVARS_IV]], 1 ; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 2 ; CHECK-NEXT: [[CMP:%.*]] = icmp slt i64 [[INDVARS_IV_NEXT]], [[TMP0]] ; CHECK-NEXT: br i1 [[CMP]], label [[LOOP]], label [[EXIT:%.*]] ; CHECK: exit: ; CHECK-NEXT: [[VAL_LCSSA:%.*]] = phi i64 [ [[VAL]], [[LOOP]] ] -; CHECK-NEXT: [[T3_LCSSA:%.*]] = phi i64 [ [[TMP1]], [[LOOP]] ] -; CHECK-NEXT: [[RESULT:%.*]] = and i64 [[VAL_LCSSA]], [[T3_LCSSA]] +; CHECK-NEXT: [[TMP5:%.*]] = add nuw nsw i64 [[TMP4]], 1 +; CHECK-NEXT: [[RESULT:%.*]] = and i64 [[VAL_LCSSA]], [[TMP5]] ; CHECK-NEXT: ret i64 [[RESULT]] ; entry: @@ -338,7 +342,7 @@ ; a simple affine IV. Make sure that indvars simplifies through. ; ReplaceLoopExitValue should fold the return value to constant 9. define i32 @indirectRecurrence() nounwind { -; CHECK-LABEL: @indirectRecurrence( +; CHECK-LABEL: define {{[^@]+}}@indirectRecurrence( ; CHECK-NEXT: entry: ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: @@ -374,7 +378,7 @@ ; Two increments should remain, one by %step and one by %step1. ; Five live-outs should remain. define i32 @isomorphic(i32 %init, i32 %step, i32 %lim) nounwind { -; CHECK-LABEL: @isomorphic( +; CHECK-LABEL: define {{[^@]+}}@isomorphic( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[STEP1:%.*]] = add i32 [[STEP:%.*]], 1 ; CHECK-NEXT: [[INIT1:%.*]] = add i32 [[INIT:%.*]], [[STEP1]] @@ -432,7 +436,7 @@ %structIF = type { i32, float } define void @congruentgepiv(%structIF* %base) nounwind uwtable ssp { -; CHECK-LABEL: @congruentgepiv( +; CHECK-LABEL: define {{[^@]+}}@congruentgepiv( ; CHECK-NEXT: entry: ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: @@ -468,7 +472,7 @@ ; Test a widened IV that is used by a phi on different paths within the loop. define void @phiUsesTrunc() nounwind { -; CHECK-LABEL: @phiUsesTrunc( +; CHECK-LABEL: define {{[^@]+}}@phiUsesTrunc( ; CHECK-NEXT: entry: ; CHECK-NEXT: br i1 undef, label [[FOR_BODY_PREHEADER:%.*]], label [[FOR_END:%.*]] ; CHECK: for.body.preheader: diff --git a/llvm/test/Transforms/IndVarSimplify/post-inc-range.ll b/llvm/test/Transforms/IndVarSimplify/post-inc-range.ll --- a/llvm/test/Transforms/IndVarSimplify/post-inc-range.ll +++ b/llvm/test/Transforms/IndVarSimplify/post-inc-range.ll @@ -8,7 +8,7 @@ ; In order to do this indvars need to prove that the narrow IV def (%i.inc) ; is not-negative from the range check inside of the loop. define void @test(i32* %base, i32 %limit, i32 %start) { -; CHECK-LABEL: @test( +; CHECK-LABEL: define {{[^@]+}}@test( ; CHECK-NEXT: for.body.lr.ph: ; CHECK-NEXT: [[UMAX:%.*]] = call i32 @llvm.umax.i32(i32 [[START:%.*]], i32 64) ; CHECK-NEXT: [[TMP0:%.*]] = sub i32 [[UMAX]], [[START]] @@ -60,7 +60,7 @@ } define void @test_false_edge(i32* %base, i32 %limit, i32 %start) { -; CHECK-LABEL: @test_false_edge( +; CHECK-LABEL: define {{[^@]+}}@test_false_edge( ; CHECK-NEXT: for.body.lr.ph: ; CHECK-NEXT: [[UMAX:%.*]] = call i32 @llvm.umax.i32(i32 [[START:%.*]], i32 65) ; CHECK-NEXT: [[TMP0:%.*]] = sub i32 [[UMAX]], [[START]] @@ -112,10 +112,11 @@ } define void @test_range_metadata(i32* %array_length_ptr, i32* %base, -; CHECK-LABEL: @test_range_metadata( +; CHECK-LABEL: define {{[^@]+}}@test_range_metadata( ; CHECK-NEXT: for.body.lr.ph: ; CHECK-NEXT: [[TMP0:%.*]] = zext i32 [[START:%.*]] to i64 -; CHECK-NEXT: [[TMP1:%.*]] = sext i32 [[LIMIT:%.*]] to i64 +; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[START]], 1 +; CHECK-NEXT: [[SMAX:%.*]] = call i32 @llvm.smax.i32(i32 [[LIMIT:%.*]], i32 [[TMP1]]) ; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK: for.body: ; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[FOR_INC:%.*]] ], [ [[TMP0]], [[FOR_BODY_LR_PH:%.*]] ] @@ -127,8 +128,9 @@ ; CHECK-NEXT: br label [[FOR_INC]] ; CHECK: for.inc: ; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 -; CHECK-NEXT: [[CMP:%.*]] = icmp slt i64 [[INDVARS_IV_NEXT]], [[TMP1]] -; CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_END]] +; CHECK-NEXT: [[LFTR_WIDEIV:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32 +; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i32 [[LFTR_WIDEIV]], [[SMAX]] +; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_BODY]], label [[FOR_END]] ; CHECK: for.end: ; CHECK-NEXT: br label [[EXIT:%.*]] ; CHECK: exit: @@ -166,7 +168,7 @@ ; Negative version of the test above, we don't know anything about ; array_length_ptr range. define void @test_neg(i32* %array_length_ptr, i32* %base, -; CHECK-LABEL: @test_neg( +; CHECK-LABEL: define {{[^@]+}}@test_neg( ; CHECK-NEXT: for.body.lr.ph: ; CHECK-NEXT: [[TMP0:%.*]] = zext i32 [[START:%.*]] to i64 ; CHECK-NEXT: br label [[FOR_BODY:%.*]] @@ -218,13 +220,14 @@ } define void @test_transitive_use(i32* %base, i32 %limit, i32 %start) { -; CHECK-LABEL: @test_transitive_use( +; CHECK-LABEL: define {{[^@]+}}@test_transitive_use( ; CHECK-NEXT: for.body.lr.ph: ; CHECK-NEXT: [[TMP0:%.*]] = zext i32 [[START:%.*]] to i64 ; CHECK-NEXT: [[TMP1:%.*]] = sext i32 [[LIMIT:%.*]] to i64 -; CHECK-NEXT: [[TMP2:%.*]] = sext i32 [[LIMIT]] to i64 ; CHECK-NEXT: [[UMAX:%.*]] = call i32 @llvm.umax.i32(i32 [[START]], i32 64) ; CHECK-NEXT: [[WIDE_TRIP_COUNT:%.*]] = zext i32 [[UMAX]] to i64 +; CHECK-NEXT: [[TMP2:%.*]] = add i32 [[START]], 1 +; CHECK-NEXT: [[SMAX:%.*]] = call i32 @llvm.smax.i32(i32 [[LIMIT]], i32 [[TMP2]]) ; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK: for.body: ; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[FOR_INC:%.*]] ], [ [[TMP0]], [[FOR_BODY_LR_PH:%.*]] ] @@ -242,8 +245,9 @@ ; CHECK-NEXT: br label [[FOR_INC]] ; CHECK: for.inc: ; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 -; CHECK-NEXT: [[CMP:%.*]] = icmp slt i64 [[INDVARS_IV_NEXT]], [[TMP2]] -; CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_END]] +; CHECK-NEXT: [[LFTR_WIDEIV:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32 +; CHECK-NEXT: [[EXITCOND3:%.*]] = icmp ne i32 [[LFTR_WIDEIV]], [[SMAX]] +; CHECK-NEXT: br i1 [[EXITCOND3]], label [[FOR_BODY]], label [[FOR_END]] ; CHECK: for.end: ; CHECK-NEXT: br label [[EXIT:%.*]] ; CHECK: exit: @@ -290,18 +294,20 @@ declare void @llvm.experimental.guard(i1, ...) define void @test_guard_one_bb(i32* %base, i32 %limit, i32 %start) { -; CHECK-LABEL: @test_guard_one_bb( +; CHECK-LABEL: define {{[^@]+}}@test_guard_one_bb( ; CHECK-NEXT: for.body.lr.ph: ; CHECK-NEXT: [[TMP0:%.*]] = zext i32 [[START:%.*]] to i64 -; CHECK-NEXT: [[TMP1:%.*]] = sext i32 [[LIMIT:%.*]] to i64 +; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[START]], 1 +; CHECK-NEXT: [[SMAX:%.*]] = call i32 @llvm.smax.i32(i32 [[LIMIT:%.*]], i32 [[TMP1]]) ; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK: for.body: ; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ], [ [[TMP0]], [[FOR_BODY_LR_PH:%.*]] ] ; CHECK-NEXT: [[WITHIN_LIMITS:%.*]] = icmp ult i64 [[INDVARS_IV]], 64 ; CHECK-NEXT: call void (i1, ...) @llvm.experimental.guard(i1 [[WITHIN_LIMITS]]) [ "deopt"() ] ; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 -; CHECK-NEXT: [[CMP:%.*]] = icmp slt i64 [[INDVARS_IV_NEXT]], [[TMP1]] -; CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_END:%.*]] +; CHECK-NEXT: [[LFTR_WIDEIV:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32 +; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i32 [[LFTR_WIDEIV]], [[SMAX]] +; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_BODY]], label [[FOR_END:%.*]] ; CHECK: for.end: ; CHECK-NEXT: br label [[EXIT:%.*]] ; CHECK: exit: @@ -330,10 +336,11 @@ } define void @test_guard_in_the_same_bb(i32* %base, i32 %limit, i32 %start) { -; CHECK-LABEL: @test_guard_in_the_same_bb( +; CHECK-LABEL: define {{[^@]+}}@test_guard_in_the_same_bb( ; CHECK-NEXT: for.body.lr.ph: ; CHECK-NEXT: [[TMP0:%.*]] = zext i32 [[START:%.*]] to i64 -; CHECK-NEXT: [[TMP1:%.*]] = sext i32 [[LIMIT:%.*]] to i64 +; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[START]], 1 +; CHECK-NEXT: [[SMAX:%.*]] = call i32 @llvm.smax.i32(i32 [[LIMIT:%.*]], i32 [[TMP1]]) ; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK: for.body: ; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[FOR_INC:%.*]] ], [ [[TMP0]], [[FOR_BODY_LR_PH:%.*]] ] @@ -342,8 +349,9 @@ ; CHECK: for.inc: ; CHECK-NEXT: call void (i1, ...) @llvm.experimental.guard(i1 [[WITHIN_LIMITS]]) [ "deopt"() ] ; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 -; CHECK-NEXT: [[CMP:%.*]] = icmp slt i64 [[INDVARS_IV_NEXT]], [[TMP1]] -; CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_END:%.*]] +; CHECK-NEXT: [[LFTR_WIDEIV:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32 +; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i32 [[LFTR_WIDEIV]], [[SMAX]] +; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_BODY]], label [[FOR_END:%.*]] ; CHECK: for.end: ; CHECK-NEXT: br label [[EXIT:%.*]] ; CHECK: exit: @@ -375,10 +383,11 @@ } define void @test_guard_in_idom(i32* %base, i32 %limit, i32 %start) { -; CHECK-LABEL: @test_guard_in_idom( +; CHECK-LABEL: define {{[^@]+}}@test_guard_in_idom( ; CHECK-NEXT: for.body.lr.ph: ; CHECK-NEXT: [[TMP0:%.*]] = zext i32 [[START:%.*]] to i64 -; CHECK-NEXT: [[TMP1:%.*]] = sext i32 [[LIMIT:%.*]] to i64 +; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[START]], 1 +; CHECK-NEXT: [[SMAX:%.*]] = call i32 @llvm.smax.i32(i32 [[LIMIT:%.*]], i32 [[TMP1]]) ; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK: for.body: ; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[FOR_INC:%.*]] ], [ [[TMP0]], [[FOR_BODY_LR_PH:%.*]] ] @@ -387,8 +396,9 @@ ; CHECK-NEXT: br label [[FOR_INC]] ; CHECK: for.inc: ; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 -; CHECK-NEXT: [[CMP:%.*]] = icmp slt i64 [[INDVARS_IV_NEXT]], [[TMP1]] -; CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_END:%.*]] +; CHECK-NEXT: [[LFTR_WIDEIV:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32 +; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i32 [[LFTR_WIDEIV]], [[SMAX]] +; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_BODY]], label [[FOR_END:%.*]] ; CHECK: for.end: ; CHECK-NEXT: br label [[EXIT:%.*]] ; CHECK: exit: @@ -420,10 +430,11 @@ } define void @test_guard_merge_ranges(i32* %base, i32 %limit, i32 %start) { -; CHECK-LABEL: @test_guard_merge_ranges( +; CHECK-LABEL: define {{[^@]+}}@test_guard_merge_ranges( ; CHECK-NEXT: for.body.lr.ph: ; CHECK-NEXT: [[TMP0:%.*]] = zext i32 [[START:%.*]] to i64 -; CHECK-NEXT: [[TMP1:%.*]] = sext i32 [[LIMIT:%.*]] to i64 +; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[START]], 1 +; CHECK-NEXT: [[SMAX:%.*]] = call i32 @llvm.smax.i32(i32 [[LIMIT:%.*]], i32 [[TMP1]]) ; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK: for.body: ; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ], [ [[TMP0]], [[FOR_BODY_LR_PH:%.*]] ] @@ -432,8 +443,9 @@ ; CHECK-NEXT: [[WITHIN_LIMITS_2:%.*]] = icmp ult i64 [[INDVARS_IV]], 2147483647 ; CHECK-NEXT: call void (i1, ...) @llvm.experimental.guard(i1 [[WITHIN_LIMITS_2]]) [ "deopt"() ] ; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 -; CHECK-NEXT: [[CMP:%.*]] = icmp slt i64 [[INDVARS_IV_NEXT]], [[TMP1]] -; CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_END:%.*]] +; CHECK-NEXT: [[LFTR_WIDEIV:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32 +; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i32 [[LFTR_WIDEIV]], [[SMAX]] +; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_BODY]], label [[FOR_END:%.*]] ; CHECK: for.end: ; CHECK-NEXT: br label [[EXIT:%.*]] ; CHECK: exit: diff --git a/llvm/test/Transforms/IndVarSimplify/replace-loop-exit-folds.ll b/llvm/test/Transforms/IndVarSimplify/replace-loop-exit-folds.ll --- a/llvm/test/Transforms/IndVarSimplify/replace-loop-exit-folds.ll +++ b/llvm/test/Transforms/IndVarSimplify/replace-loop-exit-folds.ll @@ -4,7 +4,7 @@ target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-a:0:32-n32-S64" define i32 @remove_loop(i32 %size) { -; CHECK-LABEL: @remove_loop( +; CHECK-LABEL: define {{[^@]+}}@remove_loop( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[SIZE:%.*]], 31 ; CHECK-NEXT: [[UMIN:%.*]] = call i32 @llvm.umin.i32(i32 [[SIZE]], i32 31) @@ -36,18 +36,23 @@ } define i32 @used_loop(i32 %size) minsize { -; CHECK-LABEL: @used_loop( +; CHECK-LABEL: define {{[^@]+}}@used_loop( ; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[SIZE:%.*]], 31 +; CHECK-NEXT: [[UMIN:%.*]] = call i32 @llvm.umin.i32(i32 [[SIZE]], i32 31) +; CHECK-NEXT: [[TMP1:%.*]] = sub i32 [[TMP0]], [[UMIN]] +; CHECK-NEXT: [[TMP2:%.*]] = lshr i32 [[TMP1]], 5 +; CHECK-NEXT: [[TMP3:%.*]] = shl nuw i32 [[TMP2]], 5 ; CHECK-NEXT: br label [[WHILE_COND:%.*]] ; CHECK: while.cond: -; CHECK-NEXT: [[SIZE_ADDR_0:%.*]] = phi i32 [ [[SIZE:%.*]], [[ENTRY:%.*]] ], [ [[SUB:%.*]], [[WHILE_COND]] ] +; CHECK-NEXT: [[SIZE_ADDR_0:%.*]] = phi i32 [ [[SIZE]], [[ENTRY:%.*]] ], [ [[SUB:%.*]], [[WHILE_COND]] ] ; CHECK-NEXT: tail call void @call() ; CHECK-NEXT: [[CMP:%.*]] = icmp ugt i32 [[SIZE_ADDR_0]], 31 ; CHECK-NEXT: [[SUB]] = add i32 [[SIZE_ADDR_0]], -32 ; CHECK-NEXT: br i1 [[CMP]], label [[WHILE_COND]], label [[WHILE_END:%.*]] ; CHECK: while.end: -; CHECK-NEXT: [[SIZE_LCSSA:%.*]] = phi i32 [ [[SIZE_ADDR_0]], [[WHILE_COND]] ] -; CHECK-NEXT: ret i32 [[SIZE_LCSSA]] +; CHECK-NEXT: [[TMP4:%.*]] = sub i32 [[SIZE]], [[TMP3]] +; CHECK-NEXT: ret i32 [[TMP4]] ; entry: br label %while.cond @@ -66,11 +71,16 @@ define i32 @test_signed_while(i32 %S) { -; CHECK-LABEL: @test_signed_while( +; CHECK-LABEL: define {{[^@]+}}@test_signed_while( ; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[S:%.*]], 31 +; CHECK-NEXT: [[SMIN:%.*]] = call i32 @llvm.smin.i32(i32 [[S]], i32 31) +; CHECK-NEXT: [[TMP1:%.*]] = sub i32 [[TMP0]], [[SMIN]] +; CHECK-NEXT: [[TMP2:%.*]] = lshr i32 [[TMP1]], 5 +; CHECK-NEXT: [[TMP3:%.*]] = shl nuw i32 [[TMP2]], 5 ; CHECK-NEXT: br label [[WHILE_COND:%.*]] ; CHECK: while.cond: -; CHECK-NEXT: [[S_ADDR_0:%.*]] = phi i32 [ [[S:%.*]], [[ENTRY:%.*]] ], [ [[SUB:%.*]], [[WHILE_BODY:%.*]] ] +; CHECK-NEXT: [[S_ADDR_0:%.*]] = phi i32 [ [[S]], [[ENTRY:%.*]] ], [ [[SUB:%.*]], [[WHILE_BODY:%.*]] ] ; CHECK-NEXT: [[CMP:%.*]] = icmp sgt i32 [[S_ADDR_0]], 31 ; CHECK-NEXT: br i1 [[CMP]], label [[WHILE_BODY]], label [[WHILE_END:%.*]] ; CHECK: while.body: @@ -78,8 +88,8 @@ ; CHECK-NEXT: tail call void @call() ; CHECK-NEXT: br label [[WHILE_COND]] ; CHECK: while.end: -; CHECK-NEXT: [[S_ADDR_0_LCSSA:%.*]] = phi i32 [ [[S_ADDR_0]], [[WHILE_COND]] ] -; CHECK-NEXT: ret i32 [[S_ADDR_0_LCSSA]] +; CHECK-NEXT: [[TMP4:%.*]] = sub i32 [[S]], [[TMP3]] +; CHECK-NEXT: ret i32 [[TMP4]] ; entry: br label %while.cond @@ -100,7 +110,7 @@ } define i32 @test_signed_do(i32 %S) { -; CHECK-LABEL: @test_signed_do( +; CHECK-LABEL: define {{[^@]+}}@test_signed_do( ; CHECK-NEXT: entry: ; CHECK-NEXT: br label [[DO_BODY:%.*]] ; CHECK: do.body: @@ -129,11 +139,16 @@ } define i32 @test_unsigned_while(i32 %S) { -; CHECK-LABEL: @test_unsigned_while( +; CHECK-LABEL: define {{[^@]+}}@test_unsigned_while( ; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[S:%.*]], 15 +; CHECK-NEXT: [[UMIN:%.*]] = call i32 @llvm.umin.i32(i32 [[S]], i32 15) +; CHECK-NEXT: [[TMP1:%.*]] = sub i32 [[TMP0]], [[UMIN]] +; CHECK-NEXT: [[TMP2:%.*]] = lshr i32 [[TMP1]], 4 +; CHECK-NEXT: [[TMP3:%.*]] = shl nuw i32 [[TMP2]], 4 ; CHECK-NEXT: br label [[WHILE_COND:%.*]] ; CHECK: while.cond: -; CHECK-NEXT: [[S_ADDR_0:%.*]] = phi i32 [ [[S:%.*]], [[ENTRY:%.*]] ], [ [[SUB:%.*]], [[WHILE_BODY:%.*]] ] +; CHECK-NEXT: [[S_ADDR_0:%.*]] = phi i32 [ [[S]], [[ENTRY:%.*]] ], [ [[SUB:%.*]], [[WHILE_BODY:%.*]] ] ; CHECK-NEXT: [[CMP:%.*]] = icmp ugt i32 [[S_ADDR_0]], 15 ; CHECK-NEXT: br i1 [[CMP]], label [[WHILE_BODY]], label [[WHILE_END:%.*]] ; CHECK: while.body: @@ -141,8 +156,8 @@ ; CHECK-NEXT: tail call void @call() ; CHECK-NEXT: br label [[WHILE_COND]] ; CHECK: while.end: -; CHECK-NEXT: [[S_ADDR_0_LCSSA:%.*]] = phi i32 [ [[S_ADDR_0]], [[WHILE_COND]] ] -; CHECK-NEXT: ret i32 [[S_ADDR_0_LCSSA]] +; CHECK-NEXT: [[TMP4:%.*]] = sub i32 [[S]], [[TMP3]] +; CHECK-NEXT: ret i32 [[TMP4]] ; entry: br label %while.cond @@ -163,7 +178,7 @@ } define i32 @test_unsigned_do(i32 %S) { -; CHECK-LABEL: @test_unsigned_do( +; CHECK-LABEL: define {{[^@]+}}@test_unsigned_do( ; CHECK-NEXT: entry: ; CHECK-NEXT: br label [[DO_BODY:%.*]] ; CHECK: do.body: diff --git a/llvm/test/Transforms/IndVarSimplify/rewrite-loop-exit-values-phi.ll b/llvm/test/Transforms/IndVarSimplify/rewrite-loop-exit-values-phi.ll --- a/llvm/test/Transforms/IndVarSimplify/rewrite-loop-exit-values-phi.ll +++ b/llvm/test/Transforms/IndVarSimplify/rewrite-loop-exit-values-phi.ll @@ -1,24 +1,45 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py ; RUN: opt -passes=indvars -S %s -o - | FileCheck %s ; When bailing out in rewriteLoopExitValues() you would be left with a PHI node ; that was not deleted, and the IndVar pass would return an incorrect modified ; status. This was caught by the expensive check introduced in D86589. -; CHECK-LABEL: header: -; CHECK-NEXT: %idx = phi i64 [ %idx.next, %latch ], [ undef, %entry ] -; CHECK-NEXT: %cond = icmp sgt i64 %n, %idx -; CHECK-NEXT: br i1 %cond, label %end, label %inner.preheader - -; CHECK-LABEL: latch: -; CHECK-NEXT: %idx.next = add nsw i64 %idx, -1 -; CHECK-NEXT: br label %header - target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" target triple = "x86_64-unknown-linux-gnu" @ptr = external global i64 define dso_local void @hoge() local_unnamed_addr { +; CHECK-LABEL: define {{[^@]+}}@hoge( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[N:%.*]] = sdiv exact i64 undef, 40 +; CHECK-NEXT: [[TMP0:%.*]] = sub i64 undef, [[N]] +; CHECK-NEXT: br label [[HEADER:%.*]] +; CHECK: header: +; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[LATCH:%.*]] ], [ [[TMP0]], [[ENTRY:%.*]] ] +; CHECK-NEXT: [[IDX:%.*]] = phi i64 [ [[IDX_NEXT:%.*]], [[LATCH]] ], [ undef, [[ENTRY]] ] +; CHECK-NEXT: [[COND:%.*]] = icmp sgt i64 [[N]], [[IDX]] +; CHECK-NEXT: br i1 [[COND]], label [[END:%.*]], label [[INNER_PREHEADER:%.*]] +; CHECK: inner.preheader: +; CHECK-NEXT: br label [[INNER:%.*]] +; CHECK: inner: +; CHECK-NEXT: [[I:%.*]] = phi i64 [ [[I_NEXT:%.*]], [[INNER]] ], [ 0, [[INNER_PREHEADER]] ] +; CHECK-NEXT: [[I_NEXT]] = add nuw i64 [[I]], 1 +; CHECK-NEXT: store i64 undef, i64* @ptr, align 8 +; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[I_NEXT]], [[INDVARS_IV]] +; CHECK-NEXT: br i1 [[EXITCOND]], label [[INNER]], label [[INNER_EXIT:%.*]] +; CHECK: inner_exit: +; CHECK-NEXT: [[INDVAR:%.*]] = phi i64 [ [[I_NEXT]], [[INNER]] ] +; CHECK-NEXT: [[INDVAR_USE:%.*]] = add i64 [[INDVAR]], 1 +; CHECK-NEXT: br label [[LATCH]] +; CHECK: latch: +; CHECK-NEXT: [[IDX_NEXT]] = add nsw i64 [[IDX]], -1 +; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add i64 [[INDVARS_IV]], -1 +; CHECK-NEXT: br label [[HEADER]] +; CHECK: end: +; CHECK-NEXT: ret void +; entry: ; preds = %entry %n = sdiv exact i64 undef, 40 br label %header diff --git a/llvm/test/Transforms/LoopUnroll/X86/high-cost-expansion.ll b/llvm/test/Transforms/LoopUnroll/X86/high-cost-expansion.ll --- a/llvm/test/Transforms/LoopUnroll/X86/high-cost-expansion.ll +++ b/llvm/test/Transforms/LoopUnroll/X86/high-cost-expansion.ll @@ -11,12 +11,46 @@ ; CHECK-NEXT: [[I5:%.*]] = icmp sgt i64 [[I4]], [[I2]] ; CHECK-NEXT: br i1 [[I5]], label [[BB10:%.*]], label [[BB6_PREHEADER:%.*]] ; CHECK: bb6.preheader: +; CHECK-NEXT: [[SMAX:%.*]] = call i64 @llvm.smax.i64(i64 [[I4]], i64 [[I2]]) +; CHECK-NEXT: [[TMP0:%.*]] = sub i64 [[SMAX]], [[I3]] +; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[SMAX]], -1 +; CHECK-NEXT: [[TMP2:%.*]] = sub i64 [[TMP1]], [[I3]] +; CHECK-NEXT: [[XTRAITER:%.*]] = and i64 [[TMP0]], 7 +; CHECK-NEXT: [[LCMP_MOD:%.*]] = icmp ne i64 [[XTRAITER]], 0 +; CHECK-NEXT: br i1 [[LCMP_MOD]], label [[BB6_PROL_PREHEADER:%.*]], label [[BB6_PROL_LOOPEXIT:%.*]] +; CHECK: bb6.prol.preheader: +; CHECK-NEXT: br label [[BB6_PROL:%.*]] +; CHECK: bb6.prol: +; CHECK-NEXT: [[I7_PROL:%.*]] = phi i64 [ [[I8_PROL:%.*]], [[BB6_PROL]] ], [ [[I4]], [[BB6_PROL_PREHEADER]] ] +; CHECK-NEXT: [[PROL_ITER:%.*]] = phi i64 [ 0, [[BB6_PROL_PREHEADER]] ], [ [[PROL_ITER_NEXT:%.*]], [[BB6_PROL]] ] +; CHECK-NEXT: [[I8_PROL]] = add i64 [[I7_PROL]], 1 +; CHECK-NEXT: [[I9_PROL:%.*]] = icmp slt i64 [[I7_PROL]], [[I2]] +; CHECK-NEXT: [[PROL_ITER_NEXT]] = add i64 [[PROL_ITER]], 1 +; CHECK-NEXT: [[PROL_ITER_CMP:%.*]] = icmp ne i64 [[PROL_ITER_NEXT]], [[XTRAITER]] +; CHECK-NEXT: br i1 [[PROL_ITER_CMP]], label [[BB6_PROL]], label [[BB6_PROL_LOOPEXIT_UNR_LCSSA:%.*]], !llvm.loop [[LOOP0:![0-9]+]] +; CHECK: bb6.prol.loopexit.unr-lcssa: +; CHECK-NEXT: [[I7_UNR_PH:%.*]] = phi i64 [ [[I8_PROL]], [[BB6_PROL]] ] +; CHECK-NEXT: br label [[BB6_PROL_LOOPEXIT]] +; CHECK: bb6.prol.loopexit: +; CHECK-NEXT: [[I7_UNR:%.*]] = phi i64 [ [[I4]], [[BB6_PREHEADER]] ], [ [[I7_UNR_PH]], [[BB6_PROL_LOOPEXIT_UNR_LCSSA]] ] +; CHECK-NEXT: [[TMP3:%.*]] = icmp ult i64 [[TMP2]], 7 +; CHECK-NEXT: br i1 [[TMP3]], label [[BB10_LOOPEXIT:%.*]], label [[BB6_PREHEADER_NEW:%.*]] +; CHECK: bb6.preheader.new: ; CHECK-NEXT: br label [[BB6:%.*]] ; CHECK: bb6: -; CHECK-NEXT: [[I7:%.*]] = phi i64 [ [[I8:%.*]], [[BB6]] ], [ [[I4]], [[BB6_PREHEADER]] ] -; CHECK-NEXT: [[I8]] = add i64 [[I7]], 1 -; CHECK-NEXT: [[I9:%.*]] = icmp slt i64 [[I7]], [[I2]] -; CHECK-NEXT: br i1 [[I9]], label [[BB6]], label [[BB10_LOOPEXIT:%.*]] +; CHECK-NEXT: [[I7:%.*]] = phi i64 [ [[I7_UNR]], [[BB6_PREHEADER_NEW]] ], [ [[I8_7:%.*]], [[BB6]] ] +; CHECK-NEXT: [[I8:%.*]] = add i64 [[I7]], 1 +; CHECK-NEXT: [[I8_1:%.*]] = add i64 [[I8]], 1 +; CHECK-NEXT: [[I8_2:%.*]] = add i64 [[I8_1]], 1 +; CHECK-NEXT: [[I8_3:%.*]] = add i64 [[I8_2]], 1 +; CHECK-NEXT: [[I8_4:%.*]] = add i64 [[I8_3]], 1 +; CHECK-NEXT: [[I8_5:%.*]] = add i64 [[I8_4]], 1 +; CHECK-NEXT: [[I8_6:%.*]] = add i64 [[I8_5]], 1 +; CHECK-NEXT: [[I8_7]] = add i64 [[I8_6]], 1 +; CHECK-NEXT: [[I9_7:%.*]] = icmp slt i64 [[I8_6]], [[I2]] +; CHECK-NEXT: br i1 [[I9_7]], label [[BB6]], label [[BB10_LOOPEXIT_UNR_LCSSA:%.*]] +; CHECK: bb10.loopexit.unr-lcssa: +; CHECK-NEXT: br label [[BB10_LOOPEXIT]] ; CHECK: bb10.loopexit: ; CHECK-NEXT: br label [[BB10]] ; CHECK: bb10: @@ -51,12 +85,46 @@ ; CHECK-NEXT: [[I5:%.*]] = icmp sgt i64 [[I4]], [[I2]] ; CHECK-NEXT: br i1 [[I5]], label [[BB10:%.*]], label [[BB6_PREHEADER:%.*]] ; CHECK: bb6.preheader: +; CHECK-NEXT: [[SMAX:%.*]] = call i64 @llvm.smax.i64(i64 [[I4]], i64 [[I2]]) +; CHECK-NEXT: [[TMP0:%.*]] = sub i64 [[SMAX]], [[I3]] +; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[SMAX]], -1 +; CHECK-NEXT: [[TMP2:%.*]] = sub i64 [[TMP1]], [[I3]] +; CHECK-NEXT: [[XTRAITER:%.*]] = and i64 [[TMP0]], 7 +; CHECK-NEXT: [[LCMP_MOD:%.*]] = icmp ne i64 [[XTRAITER]], 0 +; CHECK-NEXT: br i1 [[LCMP_MOD]], label [[BB6_PROL_PREHEADER:%.*]], label [[BB6_PROL_LOOPEXIT:%.*]] +; CHECK: bb6.prol.preheader: +; CHECK-NEXT: br label [[BB6_PROL:%.*]] +; CHECK: bb6.prol: +; CHECK-NEXT: [[I7_PROL:%.*]] = phi i64 [ [[I8_PROL:%.*]], [[BB6_PROL]] ], [ [[I4]], [[BB6_PROL_PREHEADER]] ] +; CHECK-NEXT: [[PROL_ITER:%.*]] = phi i64 [ 0, [[BB6_PROL_PREHEADER]] ], [ [[PROL_ITER_NEXT:%.*]], [[BB6_PROL]] ] +; CHECK-NEXT: [[I8_PROL]] = add i64 [[I7_PROL]], 1 +; CHECK-NEXT: [[I9_PROL:%.*]] = icmp slt i64 [[I7_PROL]], [[I2]] +; CHECK-NEXT: [[PROL_ITER_NEXT]] = add i64 [[PROL_ITER]], 1 +; CHECK-NEXT: [[PROL_ITER_CMP:%.*]] = icmp ne i64 [[PROL_ITER_NEXT]], [[XTRAITER]] +; CHECK-NEXT: br i1 [[PROL_ITER_CMP]], label [[BB6_PROL]], label [[BB6_PROL_LOOPEXIT_UNR_LCSSA:%.*]], !llvm.loop [[LOOP2:![0-9]+]] +; CHECK: bb6.prol.loopexit.unr-lcssa: +; CHECK-NEXT: [[I7_UNR_PH:%.*]] = phi i64 [ [[I8_PROL]], [[BB6_PROL]] ] +; CHECK-NEXT: br label [[BB6_PROL_LOOPEXIT]] +; CHECK: bb6.prol.loopexit: +; CHECK-NEXT: [[I7_UNR:%.*]] = phi i64 [ [[I4]], [[BB6_PREHEADER]] ], [ [[I7_UNR_PH]], [[BB6_PROL_LOOPEXIT_UNR_LCSSA]] ] +; CHECK-NEXT: [[TMP3:%.*]] = icmp ult i64 [[TMP2]], 7 +; CHECK-NEXT: br i1 [[TMP3]], label [[BB10_LOOPEXIT:%.*]], label [[BB6_PREHEADER_NEW:%.*]] +; CHECK: bb6.preheader.new: ; CHECK-NEXT: br label [[BB6:%.*]] ; CHECK: bb6: -; CHECK-NEXT: [[I7:%.*]] = phi i64 [ [[I8:%.*]], [[BB6]] ], [ [[I4]], [[BB6_PREHEADER]] ] -; CHECK-NEXT: [[I8]] = add i64 [[I7]], 1 -; CHECK-NEXT: [[I9:%.*]] = icmp slt i64 [[I7]], [[I2]] -; CHECK-NEXT: br i1 [[I9]], label [[BB6]], label [[BB10_LOOPEXIT:%.*]] +; CHECK-NEXT: [[I7:%.*]] = phi i64 [ [[I7_UNR]], [[BB6_PREHEADER_NEW]] ], [ [[I8_7:%.*]], [[BB6]] ] +; CHECK-NEXT: [[I8:%.*]] = add nuw nsw i64 [[I7]], 1 +; CHECK-NEXT: [[I8_1:%.*]] = add nuw nsw i64 [[I8]], 1 +; CHECK-NEXT: [[I8_2:%.*]] = add nuw nsw i64 [[I8_1]], 1 +; CHECK-NEXT: [[I8_3:%.*]] = add nuw nsw i64 [[I8_2]], 1 +; CHECK-NEXT: [[I8_4:%.*]] = add nuw nsw i64 [[I8_3]], 1 +; CHECK-NEXT: [[I8_5:%.*]] = add nuw nsw i64 [[I8_4]], 1 +; CHECK-NEXT: [[I8_6:%.*]] = add nuw nsw i64 [[I8_5]], 1 +; CHECK-NEXT: [[I8_7]] = add nuw nsw i64 [[I8_6]], 1 +; CHECK-NEXT: [[I9_7:%.*]] = icmp slt i64 [[I8_6]], [[I2]] +; CHECK-NEXT: br i1 [[I9_7]], label [[BB6]], label [[BB10_LOOPEXIT_UNR_LCSSA:%.*]] +; CHECK: bb10.loopexit.unr-lcssa: +; CHECK-NEXT: br label [[BB10_LOOPEXIT]] ; CHECK: bb10.loopexit: ; CHECK-NEXT: br label [[BB10]] ; CHECK: bb10: diff --git a/llvm/test/Transforms/PhaseOrdering/X86/pr38280.ll b/llvm/test/Transforms/PhaseOrdering/X86/pr38280.ll --- a/llvm/test/Transforms/PhaseOrdering/X86/pr38280.ll +++ b/llvm/test/Transforms/PhaseOrdering/X86/pr38280.ll @@ -4,44 +4,47 @@ ; PR38280 / Issue #37628 define void @apply_delta(ptr nocapture noundef %dst, ptr nocapture noundef readonly %src, i64 noundef %neg_offs, i64 noundef %count) { -; CHECK-LABEL: @apply_delta( +; CHECK-LABEL: define {{[^@]+}}@apply_delta( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[CMP21:%.*]] = icmp ugt i64 [[COUNT:%.*]], 7 ; CHECK-NEXT: br i1 [[CMP21]], label [[WHILE_BODY:%.*]], label [[WHILE_COND3_PREHEADER:%.*]] +; CHECK: while.cond3.preheader.loopexit: +; CHECK-NEXT: [[TMP0:%.*]] = and i64 [[COUNT]], 7 +; CHECK-NEXT: br label [[WHILE_COND3_PREHEADER]] ; CHECK: while.cond3.preheader: -; CHECK-NEXT: [[COUNT_ADDR_0_LCSSA:%.*]] = phi i64 [ [[COUNT]], [[ENTRY:%.*]] ], [ [[SUB:%.*]], [[WHILE_BODY]] ] -; CHECK-NEXT: [[SRC_ADDR_0_LCSSA:%.*]] = phi ptr [ [[SRC:%.*]], [[ENTRY]] ], [ [[ADD_PTR2:%.*]], [[WHILE_BODY]] ] -; CHECK-NEXT: [[DST_ADDR_0_LCSSA:%.*]] = phi ptr [ [[DST:%.*]], [[ENTRY]] ], [ [[ADD_PTR1:%.*]], [[WHILE_BODY]] ] +; CHECK-NEXT: [[COUNT_ADDR_0_LCSSA:%.*]] = phi i64 [ [[COUNT]], [[ENTRY:%.*]] ], [ [[TMP0]], [[WHILE_COND3_PREHEADER_LOOPEXIT:%.*]] ] +; CHECK-NEXT: [[SRC_ADDR_0_LCSSA:%.*]] = phi ptr [ [[SRC:%.*]], [[ENTRY]] ], [ [[ADD_PTR2:%.*]], [[WHILE_COND3_PREHEADER_LOOPEXIT]] ] +; CHECK-NEXT: [[DST_ADDR_0_LCSSA:%.*]] = phi ptr [ [[DST:%.*]], [[ENTRY]] ], [ [[ADD_PTR1:%.*]], [[WHILE_COND3_PREHEADER_LOOPEXIT]] ] ; CHECK-NEXT: [[TOBOOL_NOT27:%.*]] = icmp eq i64 [[COUNT_ADDR_0_LCSSA]], 0 ; CHECK-NEXT: br i1 [[TOBOOL_NOT27]], label [[WHILE_END9:%.*]], label [[WHILE_BODY4:%.*]] ; CHECK: while.body: ; CHECK-NEXT: [[DST_ADDR_024:%.*]] = phi ptr [ [[ADD_PTR1]], [[WHILE_BODY]] ], [ [[DST]], [[ENTRY]] ] ; CHECK-NEXT: [[SRC_ADDR_023:%.*]] = phi ptr [ [[ADD_PTR2]], [[WHILE_BODY]] ], [ [[SRC]], [[ENTRY]] ] -; CHECK-NEXT: [[COUNT_ADDR_022:%.*]] = phi i64 [ [[SUB]], [[WHILE_BODY]] ], [ [[COUNT]], [[ENTRY]] ] -; CHECK-NEXT: [[TMP0:%.*]] = load <8 x i8>, ptr [[SRC_ADDR_023]], align 1 +; CHECK-NEXT: [[COUNT_ADDR_022:%.*]] = phi i64 [ [[SUB:%.*]], [[WHILE_BODY]] ], [ [[COUNT]], [[ENTRY]] ] +; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i8>, ptr [[SRC_ADDR_023]], align 1 ; CHECK-NEXT: [[ADD_PTR:%.*]] = getelementptr inbounds i8, ptr [[DST_ADDR_024]], i64 [[NEG_OFFS:%.*]] -; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i8>, ptr [[ADD_PTR]], align 1 -; CHECK-NEXT: [[ADD:%.*]] = add <8 x i8> [[TMP1]], [[TMP0]] +; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i8>, ptr [[ADD_PTR]], align 1 +; CHECK-NEXT: [[ADD:%.*]] = add <8 x i8> [[TMP2]], [[TMP1]] ; CHECK-NEXT: store <8 x i8> [[ADD]], ptr [[DST_ADDR_024]], align 1 ; CHECK-NEXT: [[ADD_PTR1]] = getelementptr inbounds i8, ptr [[DST_ADDR_024]], i64 8 ; CHECK-NEXT: [[ADD_PTR2]] = getelementptr inbounds i8, ptr [[SRC_ADDR_023]], i64 8 ; CHECK-NEXT: [[SUB]] = add i64 [[COUNT_ADDR_022]], -8 ; CHECK-NEXT: [[CMP:%.*]] = icmp ugt i64 [[SUB]], 7 -; CHECK-NEXT: br i1 [[CMP]], label [[WHILE_BODY]], label [[WHILE_COND3_PREHEADER]] +; CHECK-NEXT: br i1 [[CMP]], label [[WHILE_BODY]], label [[WHILE_COND3_PREHEADER_LOOPEXIT]] ; CHECK: while.body4: ; CHECK-NEXT: [[DST_ADDR_130:%.*]] = phi ptr [ [[INCDEC_PTR:%.*]], [[WHILE_BODY4]] ], [ [[DST_ADDR_0_LCSSA]], [[WHILE_COND3_PREHEADER]] ] ; CHECK-NEXT: [[SRC_ADDR_129:%.*]] = phi ptr [ [[INCDEC_PTR8:%.*]], [[WHILE_BODY4]] ], [ [[SRC_ADDR_0_LCSSA]], [[WHILE_COND3_PREHEADER]] ] ; CHECK-NEXT: [[COUNT_ADDR_128:%.*]] = phi i64 [ [[DEC:%.*]], [[WHILE_BODY4]] ], [ [[COUNT_ADDR_0_LCSSA]], [[WHILE_COND3_PREHEADER]] ] ; CHECK-NEXT: [[DEC]] = add i64 [[COUNT_ADDR_128]], -1 -; CHECK-NEXT: [[TMP2:%.*]] = load i8, ptr [[SRC_ADDR_129]], align 1 +; CHECK-NEXT: [[TMP3:%.*]] = load i8, ptr [[SRC_ADDR_129]], align 1 ; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[DST_ADDR_130]], i64 [[NEG_OFFS]] -; CHECK-NEXT: [[TMP3:%.*]] = load i8, ptr [[ARRAYIDX]], align 1 -; CHECK-NEXT: [[ADD6:%.*]] = add i8 [[TMP3]], [[TMP2]] +; CHECK-NEXT: [[TMP4:%.*]] = load i8, ptr [[ARRAYIDX]], align 1 +; CHECK-NEXT: [[ADD6:%.*]] = add i8 [[TMP4]], [[TMP3]] ; CHECK-NEXT: store i8 [[ADD6]], ptr [[DST_ADDR_130]], align 1 ; CHECK-NEXT: [[INCDEC_PTR]] = getelementptr inbounds i8, ptr [[DST_ADDR_130]], i64 1 ; CHECK-NEXT: [[INCDEC_PTR8]] = getelementptr inbounds i8, ptr [[SRC_ADDR_129]], i64 1 ; CHECK-NEXT: [[TOBOOL_NOT:%.*]] = icmp eq i64 [[DEC]], 0 -; CHECK-NEXT: br i1 [[TOBOOL_NOT]], label [[WHILE_END9]], label [[WHILE_BODY4]], !llvm.loop [[LOOP0:![0-9]+]] +; CHECK-NEXT: br i1 [[TOBOOL_NOT]], label [[WHILE_END9]], label [[WHILE_BODY4]] ; CHECK: while.end9: ; CHECK-NEXT: ret void ;