diff --git a/llvm/include/llvm/Analysis/BranchProbabilityInfo.h b/llvm/include/llvm/Analysis/BranchProbabilityInfo.h --- a/llvm/include/llvm/Analysis/BranchProbabilityInfo.h +++ b/llvm/include/llvm/Analysis/BranchProbabilityInfo.h @@ -315,7 +315,7 @@ bool calcColdCallHeuristics(const BasicBlock *BB); bool calcPointerHeuristics(const BasicBlock *BB); bool calcLoopBranchHeuristics(const BasicBlock *BB, const LoopInfo &LI); - bool calcZeroHeuristics(const BasicBlock *BB, const TargetLibraryInfo *TLI); + bool calcIntegerHeuristics(const BasicBlock *BB, const TargetLibraryInfo *TLI); bool calcFloatingPointHeuristics(const BasicBlock *BB); bool calcInvokeHeuristics(const BasicBlock *BB); }; diff --git a/llvm/lib/Analysis/BranchProbabilityInfo.cpp b/llvm/lib/Analysis/BranchProbabilityInfo.cpp --- a/llvm/lib/Analysis/BranchProbabilityInfo.cpp +++ b/llvm/lib/Analysis/BranchProbabilityInfo.cpp @@ -122,8 +122,8 @@ static const uint32_t PH_TAKEN_WEIGHT = 20; static const uint32_t PH_NONTAKEN_WEIGHT = 12; -static const uint32_t ZH_TAKEN_WEIGHT = 20; -static const uint32_t ZH_NONTAKEN_WEIGHT = 12; +static const uint32_t INTH_TAKEN_WEIGHT = 20; +static const uint32_t INTH_NONTAKEN_WEIGHT = 12; static const uint32_t FPH_TAKEN_WEIGHT = 20; static const uint32_t FPH_NONTAKEN_WEIGHT = 12; @@ -856,7 +856,7 @@ return true; } -bool BranchProbabilityInfo::calcZeroHeuristics(const BasicBlock *BB, +bool BranchProbabilityInfo::calcIntegerHeuristics(const BasicBlock *BB, const TargetLibraryInfo *TLI) { const BranchInst *BI = dyn_cast(BB->getTerminator()); if (!BI || !BI->isConditional()) @@ -873,10 +873,21 @@ return dyn_cast(V); }; + BranchProbability TakenProb(INTH_TAKEN_WEIGHT, + INTH_TAKEN_WEIGHT + INTH_NONTAKEN_WEIGHT); + BranchProbability UntakenProb(INTH_NONTAKEN_WEIGHT, + INTH_TAKEN_WEIGHT + INTH_NONTAKEN_WEIGHT); Value *RHS = CI->getOperand(1); ConstantInt *CV = GetConstantInt(RHS); - if (!CV) - return false; + if (!CV) { + // X == Y -> Unlikely + // Otherwise -> Likely + if (CI->isTrueWhenEqual()) + std::swap(TakenProb, UntakenProb); + setEdgeProbability( + BB, SmallVector({TakenProb, UntakenProb})); + return true; + } // If the LHS is the result of AND'ing a value with a single bit bitmask, // we don't have information about probabilities. @@ -964,10 +975,6 @@ return false; } - BranchProbability TakenProb(ZH_TAKEN_WEIGHT, - ZH_TAKEN_WEIGHT + ZH_NONTAKEN_WEIGHT); - BranchProbability UntakenProb(ZH_NONTAKEN_WEIGHT, - ZH_TAKEN_WEIGHT + ZH_NONTAKEN_WEIGHT); if (!isProb) std::swap(TakenProb, UntakenProb); @@ -1221,7 +1228,7 @@ continue; if (calcPointerHeuristics(BB)) continue; - if (calcZeroHeuristics(BB, TLI)) + if (calcIntegerHeuristics(BB, TLI)) continue; if (calcFloatingPointHeuristics(BB)) continue; diff --git a/llvm/test/Analysis/BranchProbabilityInfo/zero_heuristics.ll b/llvm/test/Analysis/BranchProbabilityInfo/integer_heuristics.ll rename from llvm/test/Analysis/BranchProbabilityInfo/zero_heuristics.ll rename to llvm/test/Analysis/BranchProbabilityInfo/integer_heuristics.ll --- a/llvm/test/Analysis/BranchProbabilityInfo/zero_heuristics.ll +++ b/llvm/test/Analysis/BranchProbabilityInfo/integer_heuristics.ll @@ -101,3 +101,54 @@ exit: ret void } + +declare void @foo() + +; CHECK-LABEL: foo1 +define i32 @foo1(i32 %x, i32 %y, i8 signext %z, i8 signext %w) { +entry: + %c = icmp eq i32 %x, %y + br i1 %c, label %then, label %else +; CHECK: edge entry -> then probability is 0x30000000 / 0x80000000 = 37.50% +; CHECK: edge entry -> else probability is 0x50000000 / 0x80000000 = 62.50% +then: + tail call void @foo() + br label %else +; CHECK: edge then -> else probability is 0x80000000 / 0x80000000 = 100.00% [HOT edge] +else: + %v = phi i8 [ %z, %then ], [ %w, %entry ] + %r = sext i8 %v to i32 + ret i32 %r +} + +; CHECK-LABEL: foo2 +define i32 @foo2(i32 %x, i32 %y, i8 signext %z, i8 signext %w) { +entry: + %c = icmp ne i32 %x, %y + br i1 %c, label %then, label %else +; CHECK: edge entry -> then probability is 0x50000000 / 0x80000000 = 62.50% +; CHECK: edge entry -> else probability is 0x30000000 / 0x80000000 = 37.50% +then: + br label %else +; CHECK: edge then -> else probability is 0x80000000 / 0x80000000 = 100.00% [HOT edge] +else: + %v = phi i8 [ %z, %then ], [ %w, %entry ] + %r = sext i8 %v to i32 + ret i32 %r +} + +; CHECK-LABEL: foo3 +define i32 @foo3(i32 %x, i32 %y, i8 signext %z, i8 signext %w) { +entry: + %c = icmp ult i32 %x, %y + br i1 %c, label %then, label %else +; CHECK: edge entry -> then probability is 0x50000000 / 0x80000000 = 62.50% +; CHECK: edge entry -> else probability is 0x30000000 / 0x80000000 = 37.50% +then: + br label %else +; CHECK: edge then -> else probability is 0x80000000 / 0x80000000 = 100.00% [HOT edge] +else: + %v = phi i8 [ %z, %then ], [ %w, %entry ] + %r = sext i8 %v to i32 + ret i32 %r +} diff --git a/llvm/test/Analysis/BranchProbabilityInfo/loop.ll b/llvm/test/Analysis/BranchProbabilityInfo/loop.ll --- a/llvm/test/Analysis/BranchProbabilityInfo/loop.ll +++ b/llvm/test/Analysis/BranchProbabilityInfo/loop.ll @@ -263,8 +263,8 @@ %0 = load i32, i32* %c, align 4 %cmp1 = icmp eq i32 %0, %i.011 br i1 %cmp1, label %for.inc5, label %if.end -; CHECK: edge for.body -> for.inc5 probability is 0x40000000 / 0x80000000 = 50.00% -; CHECK: edge for.body -> if.end probability is 0x40000000 / 0x80000000 = 50.00% +; CHECK: edge for.body -> for.inc5 probability is 0x30000000 / 0x80000000 = 37.50% +; CHECK: edge for.body -> if.end probability is 0x50000000 / 0x80000000 = 62.50% if.end: call void @g1() @@ -324,22 +324,22 @@ %0 = load i32, i32* %c, align 4 %cmp4 = icmp eq i32 %0, %j.017 br i1 %cmp4, label %for.inc, label %if.end -; CHECK: edge for.body3 -> for.inc probability is 0x40000000 / 0x80000000 = 50.00% -; CHECK: edge for.body3 -> if.end probability is 0x40000000 / 0x80000000 = 50.00% +; CHECK: edge for.body3 -> for.inc probability is 0x30000000 / 0x80000000 = 37.50% +; CHECK: edge for.body3 -> if.end probability is 0x50000000 / 0x80000000 = 62.50% if.end: %1 = load i32, i32* %arrayidx5, align 4 %cmp6 = icmp eq i32 %1, %j.017 br i1 %cmp6, label %for.inc, label %if.end8 -; CHECK: edge if.end -> for.inc probability is 0x40000000 / 0x80000000 = 50.00% -; CHECK: edge if.end -> if.end8 probability is 0x40000000 / 0x80000000 = 50.00% +; CHECK: edge if.end -> for.inc probability is 0x30000000 / 0x80000000 = 37.50% +; CHECK: edge if.end -> if.end8 probability is 0x50000000 / 0x80000000 = 62.50% if.end8: %2 = load i32, i32* %arrayidx9, align 4 %cmp10 = icmp eq i32 %2, %j.017 br i1 %cmp10, label %for.inc, label %if.end12 -; CHECK: edge if.end8 -> for.inc probability is 0x40000000 / 0x80000000 = 50.00% -; CHECK: edge if.end8 -> if.end12 probability is 0x40000000 / 0x80000000 = 50.00% +; CHECK: edge if.end8 -> for.inc probability is 0x30000000 / 0x80000000 = 37.50% +; CHECK: edge if.end8 -> if.end12 probability is 0x50000000 / 0x80000000 = 62.50% if.end12: call void @g2() diff --git a/llvm/test/CodeGen/AArch64/aarch64-gep-opt.ll b/llvm/test/CodeGen/AArch64/aarch64-gep-opt.ll --- a/llvm/test/CodeGen/AArch64/aarch64-gep-opt.ll +++ b/llvm/test/CodeGen/AArch64/aarch64-gep-opt.ll @@ -88,7 +88,6 @@ ; CHECK-LABEL: test_GEP_across_BB: ; CHECK: ldr {{w[0-9]+}}, [{{x[0-9]+}}, #528] ; CHECK: ldr {{w[0-9]+}}, [{{x[0-9]+}}, #532] -; CHECK-NOT: add ; CHECK: str {{w[0-9]+}}, [{{x[0-9]+}}, #532] ; CHECK: str {{w[0-9]+}}, [{{x[0-9]+}}, #528] diff --git a/llvm/test/CodeGen/AArch64/branch-relax-alignment.ll b/llvm/test/CodeGen/AArch64/branch-relax-alignment.ll --- a/llvm/test/CodeGen/AArch64/branch-relax-alignment.ll +++ b/llvm/test/CodeGen/AArch64/branch-relax-alignment.ll @@ -1,19 +1,26 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mtriple=aarch64-apple-darwin -aarch64-bcc-offset-bits=4 -align-all-nofallthru-blocks=4 < %s | FileCheck %s ; Long branch is assumed because the block has a higher alignment ; requirement than the function. -; CHECK-LABEL: invert_bcc_block_align_higher_func: -; CHECK: b.eq [[JUMP_BB1:LBB[0-9]+_[0-9]+]] -; CHECK-NEXT: b [[JUMP_BB2:LBB[0-9]+_[0-9]+]] - -; CHECK: [[JUMP_BB1]]: -; CHECK: ret -; CHECK: .p2align 4 - -; CHECK: [[JUMP_BB2]]: -; CHECK: ret define i32 @invert_bcc_block_align_higher_func(i32 %x, i32 %y) align 4 #0 { +; CHECK-LABEL: invert_bcc_block_align_higher_func: +; CHECK: ; %bb.0: +; CHECK-NEXT: cmp w0, w1 +; CHECK-NEXT: b.ne LBB0_1 +; CHECK-NEXT: b LBB0_2 +; CHECK-NEXT: LBB0_1: ; %bb2 +; CHECK-NEXT: mov w8, #9 +; CHECK-NEXT: mov w0, #1 +; CHECK-NEXT: str w8, [x8] +; CHECK-NEXT: ret +; CHECK-NEXT: .p2align 4 +; CHECK-NEXT: LBB0_2: ; %bb1 +; CHECK-NEXT: mov w8, #42 +; CHECK-NEXT: mov w0, wzr +; CHECK-NEXT: str w8, [x8] +; CHECK-NEXT: ret %1 = icmp eq i32 %x, %y br i1 %1, label %bb1, label %bb2 diff --git a/llvm/test/CodeGen/AArch64/combine-comparisons-by-cse.ll b/llvm/test/CodeGen/AArch64/combine-comparisons-by-cse.ll --- a/llvm/test/CodeGen/AArch64/combine-comparisons-by-cse.ll +++ b/llvm/test/CodeGen/AArch64/combine-comparisons-by-cse.ll @@ -1,3 +1,4 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -debugify-and-strip-all-safe < %s -mtriple=aarch64-linux-gnu | FileCheck %s ; marked as external to prevent possible optimizations @@ -8,12 +9,34 @@ ; (a > 10 && b == c) || (a >= 10 && b == d) define i32 @combine_gt_ge_10() #0 { -; CHECK-LABEL: combine_gt_ge_10 -; CHECK: cmp -; CHECK: b.le -; CHECK: ret -; CHECK-NOT: cmp -; CHECK: b.lt +; CHECK-LABEL: combine_gt_ge_10: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: adrp x8, a +; CHECK-NEXT: ldr w8, [x8, :lo12:a] +; CHECK-NEXT: cmp w8, #10 // =10 +; CHECK-NEXT: adrp x8, b +; CHECK-NEXT: b.le .LBB0_2 +; CHECK-NEXT: // %bb.1: // %land.lhs.true +; CHECK-NEXT: adrp x10, c +; CHECK-NEXT: ldr w9, [x8, :lo12:b] +; CHECK-NEXT: ldr w10, [x10, :lo12:c] +; CHECK-NEXT: cmp w9, w10 +; CHECK-NEXT: b.ne .LBB0_3 +; CHECK-NEXT: b .LBB0_5 +; CHECK-NEXT: .LBB0_2: // %lor.lhs.false +; CHECK-NEXT: b.lt .LBB0_4 +; CHECK-NEXT: .LBB0_3: // %land.lhs.true3 +; CHECK-NEXT: adrp x9, d +; CHECK-NEXT: ldr w8, [x8, :lo12:b] +; CHECK-NEXT: ldr w9, [x9, :lo12:d] +; CHECK-NEXT: cmp w8, w9 +; CHECK-NEXT: b.eq .LBB0_5 +; CHECK-NEXT: .LBB0_4: // %if.end +; CHECK-NEXT: mov w0, wzr +; CHECK-NEXT: ret +; CHECK-NEXT: .LBB0_5: +; CHECK-NEXT: mov w0, #1 +; CHECK-NEXT: ret entry: %0 = load i32, i32* @a, align 4 %cmp = icmp sgt i32 %0, 10 @@ -45,12 +68,35 @@ ; (a > 5 && b == c) || (a < 5 && b == d) define i32 @combine_gt_lt_5() #0 { -; CHECK-LABEL: combine_gt_lt_5 -; CHECK: cmp -; CHECK: b.le -; CHECK: ret -; CHECK-NOT: cmp -; CHECK: b.ge +; CHECK-LABEL: combine_gt_lt_5: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: adrp x8, a +; CHECK-NEXT: ldr w8, [x8, :lo12:a] +; CHECK-NEXT: cmp w8, #5 // =5 +; CHECK-NEXT: b.le .LBB1_2 +; CHECK-NEXT: // %bb.1: // %land.lhs.true +; CHECK-NEXT: adrp x8, b +; CHECK-NEXT: adrp x9, c +; CHECK-NEXT: ldr w8, [x8, :lo12:b] +; CHECK-NEXT: ldr w9, [x9, :lo12:c] +; CHECK-NEXT: cmp w8, w9 +; CHECK-NEXT: b.ne .LBB1_4 +; CHECK-NEXT: b .LBB1_5 +; CHECK-NEXT: .LBB1_2: // %lor.lhs.false +; CHECK-NEXT: b.ge .LBB1_4 +; CHECK-NEXT: // %bb.3: // %land.lhs.true3 +; CHECK-NEXT: adrp x8, b +; CHECK-NEXT: adrp x9, d +; CHECK-NEXT: ldr w8, [x8, :lo12:b] +; CHECK-NEXT: ldr w9, [x9, :lo12:d] +; CHECK-NEXT: cmp w8, w9 +; CHECK-NEXT: b.eq .LBB1_5 +; CHECK-NEXT: .LBB1_4: // %if.end +; CHECK-NEXT: mov w0, wzr +; CHECK-NEXT: ret +; CHECK-NEXT: .LBB1_5: +; CHECK-NEXT: mov w0, #1 +; CHECK-NEXT: ret entry: %0 = load i32, i32* @a, align 4 %cmp = icmp sgt i32 %0, 5 @@ -82,12 +128,34 @@ ; (a < 5 && b == c) || (a <= 5 && b == d) define i32 @combine_lt_ge_5() #0 { -; CHECK-LABEL: combine_lt_ge_5 -; CHECK: cmp -; CHECK: b.ge -; CHECK: ret -; CHECK-NOT: cmp -; CHECK: b.gt +; CHECK-LABEL: combine_lt_ge_5: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: adrp x8, a +; CHECK-NEXT: ldr w8, [x8, :lo12:a] +; CHECK-NEXT: cmp w8, #5 // =5 +; CHECK-NEXT: adrp x8, b +; CHECK-NEXT: b.ge .LBB2_2 +; CHECK-NEXT: // %bb.1: // %land.lhs.true +; CHECK-NEXT: adrp x10, c +; CHECK-NEXT: ldr w9, [x8, :lo12:b] +; CHECK-NEXT: ldr w10, [x10, :lo12:c] +; CHECK-NEXT: cmp w9, w10 +; CHECK-NEXT: b.ne .LBB2_3 +; CHECK-NEXT: b .LBB2_5 +; CHECK-NEXT: .LBB2_2: // %lor.lhs.false +; CHECK-NEXT: b.gt .LBB2_4 +; CHECK-NEXT: .LBB2_3: // %land.lhs.true3 +; CHECK-NEXT: adrp x9, d +; CHECK-NEXT: ldr w8, [x8, :lo12:b] +; CHECK-NEXT: ldr w9, [x9, :lo12:d] +; CHECK-NEXT: cmp w8, w9 +; CHECK-NEXT: b.eq .LBB2_5 +; CHECK-NEXT: .LBB2_4: // %if.end +; CHECK-NEXT: mov w0, wzr +; CHECK-NEXT: ret +; CHECK-NEXT: .LBB2_5: +; CHECK-NEXT: mov w0, #1 +; CHECK-NEXT: ret entry: %0 = load i32, i32* @a, align 4 %cmp = icmp slt i32 %0, 5 @@ -119,12 +187,35 @@ ; (a < 5 && b == c) || (a > 5 && b == d) define i32 @combine_lt_gt_5() #0 { -; CHECK-LABEL: combine_lt_gt_5 -; CHECK: cmp -; CHECK: b.ge -; CHECK: ret -; CHECK-NOT: cmp -; CHECK: b.le +; CHECK-LABEL: combine_lt_gt_5: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: adrp x8, a +; CHECK-NEXT: ldr w8, [x8, :lo12:a] +; CHECK-NEXT: cmp w8, #5 // =5 +; CHECK-NEXT: b.ge .LBB3_2 +; CHECK-NEXT: // %bb.1: // %land.lhs.true +; CHECK-NEXT: adrp x8, b +; CHECK-NEXT: adrp x9, c +; CHECK-NEXT: ldr w8, [x8, :lo12:b] +; CHECK-NEXT: ldr w9, [x9, :lo12:c] +; CHECK-NEXT: cmp w8, w9 +; CHECK-NEXT: b.ne .LBB3_4 +; CHECK-NEXT: b .LBB3_5 +; CHECK-NEXT: .LBB3_2: // %lor.lhs.false +; CHECK-NEXT: b.le .LBB3_4 +; CHECK-NEXT: // %bb.3: // %land.lhs.true3 +; CHECK-NEXT: adrp x8, b +; CHECK-NEXT: adrp x9, d +; CHECK-NEXT: ldr w8, [x8, :lo12:b] +; CHECK-NEXT: ldr w9, [x9, :lo12:d] +; CHECK-NEXT: cmp w8, w9 +; CHECK-NEXT: b.eq .LBB3_5 +; CHECK-NEXT: .LBB3_4: // %if.end +; CHECK-NEXT: mov w0, wzr +; CHECK-NEXT: ret +; CHECK-NEXT: .LBB3_5: +; CHECK-NEXT: mov w0, #1 +; CHECK-NEXT: ret entry: %0 = load i32, i32* @a, align 4 %cmp = icmp slt i32 %0, 5 @@ -156,12 +247,35 @@ ; (a > -5 && b == c) || (a < -5 && b == d) define i32 @combine_gt_lt_n5() #0 { -; CHECK-LABEL: combine_gt_lt_n5 -; CHECK: cmn -; CHECK: b.le -; CHECK: ret -; CHECK-NOT: cmn -; CHECK: b.ge +; CHECK-LABEL: combine_gt_lt_n5: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: adrp x8, a +; CHECK-NEXT: ldr w8, [x8, :lo12:a] +; CHECK-NEXT: cmn w8, #5 // =5 +; CHECK-NEXT: b.le .LBB4_2 +; CHECK-NEXT: // %bb.1: // %land.lhs.true +; CHECK-NEXT: adrp x8, b +; CHECK-NEXT: adrp x9, c +; CHECK-NEXT: ldr w8, [x8, :lo12:b] +; CHECK-NEXT: ldr w9, [x9, :lo12:c] +; CHECK-NEXT: cmp w8, w9 +; CHECK-NEXT: b.ne .LBB4_4 +; CHECK-NEXT: b .LBB4_5 +; CHECK-NEXT: .LBB4_2: // %lor.lhs.false +; CHECK-NEXT: b.ge .LBB4_4 +; CHECK-NEXT: // %bb.3: // %land.lhs.true3 +; CHECK-NEXT: adrp x8, b +; CHECK-NEXT: adrp x9, d +; CHECK-NEXT: ldr w8, [x8, :lo12:b] +; CHECK-NEXT: ldr w9, [x9, :lo12:d] +; CHECK-NEXT: cmp w8, w9 +; CHECK-NEXT: b.eq .LBB4_5 +; CHECK-NEXT: .LBB4_4: // %if.end +; CHECK-NEXT: mov w0, wzr +; CHECK-NEXT: ret +; CHECK-NEXT: .LBB4_5: +; CHECK-NEXT: mov w0, #1 +; CHECK-NEXT: ret entry: %0 = load i32, i32* @a, align 4 %cmp = icmp sgt i32 %0, -5 @@ -193,12 +307,35 @@ ; (a < -5 && b == c) || (a > -5 && b == d) define i32 @combine_lt_gt_n5() #0 { -; CHECK-LABEL: combine_lt_gt_n5 -; CHECK: cmn -; CHECK: b.ge -; CHECK: ret -; CHECK-NOT: cmn -; CHECK: b.le +; CHECK-LABEL: combine_lt_gt_n5: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: adrp x8, a +; CHECK-NEXT: ldr w8, [x8, :lo12:a] +; CHECK-NEXT: cmn w8, #5 // =5 +; CHECK-NEXT: b.ge .LBB5_2 +; CHECK-NEXT: // %bb.1: // %land.lhs.true +; CHECK-NEXT: adrp x8, b +; CHECK-NEXT: adrp x9, c +; CHECK-NEXT: ldr w8, [x8, :lo12:b] +; CHECK-NEXT: ldr w9, [x9, :lo12:c] +; CHECK-NEXT: cmp w8, w9 +; CHECK-NEXT: b.ne .LBB5_4 +; CHECK-NEXT: b .LBB5_5 +; CHECK-NEXT: .LBB5_2: // %lor.lhs.false +; CHECK-NEXT: b.le .LBB5_4 +; CHECK-NEXT: // %bb.3: // %land.lhs.true3 +; CHECK-NEXT: adrp x8, b +; CHECK-NEXT: adrp x9, d +; CHECK-NEXT: ldr w8, [x8, :lo12:b] +; CHECK-NEXT: ldr w9, [x9, :lo12:d] +; CHECK-NEXT: cmp w8, w9 +; CHECK-NEXT: b.eq .LBB5_5 +; CHECK-NEXT: .LBB5_4: // %if.end +; CHECK-NEXT: mov w0, wzr +; CHECK-NEXT: ret +; CHECK-NEXT: .LBB5_5: +; CHECK-NEXT: mov w0, #1 +; CHECK-NEXT: ret entry: %0 = load i32, i32* @a, align 4 %cmp = icmp slt i32 %0, -5 @@ -236,6 +373,38 @@ ; no checks for this case, it just should be processed without errors define void @combine_non_adjacent_cmp_br(%struct.Struct* nocapture readonly %hdCall) #0 { +; CHECK-LABEL: combine_non_adjacent_cmp_br: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: str x30, [sp, #-48]! // 8-byte Folded Spill +; CHECK-NEXT: stp x22, x21, [sp, #16] // 16-byte Folded Spill +; CHECK-NEXT: stp x20, x19, [sp, #32] // 16-byte Folded Spill +; CHECK-NEXT: .cfi_def_cfa_offset 48 +; CHECK-NEXT: .cfi_offset w19, -8 +; CHECK-NEXT: .cfi_offset w20, -16 +; CHECK-NEXT: .cfi_offset w21, -24 +; CHECK-NEXT: .cfi_offset w22, -32 +; CHECK-NEXT: .cfi_offset w30, -48 +; CHECK-NEXT: ldr x19, [x0] +; CHECK-NEXT: mov w20, #24 +; CHECK-NEXT: adrp x22, glob +; CHECK-NEXT: add x21, x19, #2 // =2 +; CHECK-NEXT: .LBB6_1: // %land.rhs +; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: ldr x8, [x20] +; CHECK-NEXT: cmp x8, #1 // =1 +; CHECK-NEXT: b.lt .LBB6_3 +; CHECK-NEXT: // %bb.2: // %while.body +; CHECK-NEXT: // in Loop: Header=BB6_1 Depth=1 +; CHECK-NEXT: ldr x0, [x22, :lo12:glob] +; CHECK-NEXT: bl Update +; CHECK-NEXT: sub x21, x21, #2 // =2 +; CHECK-NEXT: cmp x19, x21 +; CHECK-NEXT: b.lt .LBB6_1 +; CHECK-NEXT: .LBB6_3: // %while.end +; CHECK-NEXT: ldp x20, x19, [sp, #32] // 16-byte Folded Reload +; CHECK-NEXT: ldp x22, x21, [sp, #16] // 16-byte Folded Reload +; CHECK-NEXT: ldr x30, [sp], #48 // 8-byte Folded Reload +; CHECK-NEXT: ret entry: %size = getelementptr inbounds %struct.Struct, %struct.Struct* %hdCall, i64 0, i32 0 %0 = load i64, i64* %size, align 8 @@ -262,11 +431,49 @@ declare void @do_something() #1 define i32 @do_nothing_if_resultant_opcodes_would_differ() #0 { -; CHECK-LABEL: do_nothing_if_resultant_opcodes_would_differ -; CHECK: cmn -; CHECK: b.gt -; CHECK: cmp -; CHECK: b.gt +; CHECK-LABEL: do_nothing_if_resultant_opcodes_would_differ: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: str x30, [sp, #-32]! // 8-byte Folded Spill +; CHECK-NEXT: stp x20, x19, [sp, #16] // 16-byte Folded Spill +; CHECK-NEXT: .cfi_def_cfa_offset 32 +; CHECK-NEXT: .cfi_offset w19, -8 +; CHECK-NEXT: .cfi_offset w20, -16 +; CHECK-NEXT: .cfi_offset w30, -32 +; CHECK-NEXT: adrp x19, a +; CHECK-NEXT: ldr w8, [x19, :lo12:a] +; CHECK-NEXT: cmn w8, #2 // =2 +; CHECK-NEXT: b.le .LBB7_2 +; CHECK-NEXT: // %bb.1: // %while.end +; CHECK-NEXT: cmp w8, #1 // =1 +; CHECK-NEXT: b.le .LBB7_5 +; CHECK-NEXT: b .LBB7_6 +; CHECK-NEXT: .LBB7_2: // %while.body.preheader +; CHECK-NEXT: sub w20, w8, #1 // =1 +; CHECK-NEXT: .LBB7_3: // %while.body +; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: bl do_something +; CHECK-NEXT: adds w20, w20, #1 // =1 +; CHECK-NEXT: b.mi .LBB7_3 +; CHECK-NEXT: // %bb.4: // %while.cond.while.end_crit_edge +; CHECK-NEXT: ldr w8, [x19, :lo12:a] +; CHECK-NEXT: cmp w8, #1 // =1 +; CHECK-NEXT: b.gt .LBB7_6 +; CHECK-NEXT: .LBB7_5: // %land.lhs.true +; CHECK-NEXT: adrp x8, b +; CHECK-NEXT: adrp x9, d +; CHECK-NEXT: ldr w8, [x8, :lo12:b] +; CHECK-NEXT: ldr w9, [x9, :lo12:d] +; CHECK-NEXT: cmp w8, w9 +; CHECK-NEXT: b.eq .LBB7_7 +; CHECK-NEXT: .LBB7_6: // %if.end +; CHECK-NEXT: mov w0, wzr +; CHECK-NEXT: b .LBB7_8 +; CHECK-NEXT: .LBB7_7: +; CHECK-NEXT: mov w0, #123 +; CHECK-NEXT: .LBB7_8: // %return +; CHECK-NEXT: ldp x20, x19, [sp, #16] // 16-byte Folded Reload +; CHECK-NEXT: ldr x30, [sp], #32 // 8-byte Folded Reload +; CHECK-NEXT: ret entry: %0 = load i32, i32* @a, align 4 %cmp4 = icmp slt i32 %0, -1 @@ -306,11 +513,43 @@ } define i32 @do_nothing_if_compares_can_not_be_adjusted_to_each_other() #0 { -; CHECK-LABEL: do_nothing_if_compares_can_not_be_adjusted_to_each_other -; CHECK: cmp -; CHECK: b.gt -; CHECK: cmn -; CHECK: b.lt +; CHECK-LABEL: do_nothing_if_compares_can_not_be_adjusted_to_each_other: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: stp x30, x19, [sp, #-16]! // 16-byte Folded Spill +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: .cfi_offset w19, -8 +; CHECK-NEXT: .cfi_offset w30, -16 +; CHECK-NEXT: adrp x8, a +; CHECK-NEXT: ldr w8, [x8, :lo12:a] +; CHECK-NEXT: cmp w8, #0 // =0 +; CHECK-NEXT: b.gt .LBB8_3 +; CHECK-NEXT: // %bb.1: // %while.body.preheader +; CHECK-NEXT: sub w19, w8, #1 // =1 +; CHECK-NEXT: .LBB8_2: // %while.body +; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: bl do_something +; CHECK-NEXT: adds w19, w19, #1 // =1 +; CHECK-NEXT: b.mi .LBB8_2 +; CHECK-NEXT: .LBB8_3: // %while.end +; CHECK-NEXT: adrp x8, c +; CHECK-NEXT: ldr w8, [x8, :lo12:c] +; CHECK-NEXT: cmn w8, #2 // =2 +; CHECK-NEXT: b.lt .LBB8_5 +; CHECK-NEXT: // %bb.4: // %land.lhs.true +; CHECK-NEXT: adrp x8, b +; CHECK-NEXT: adrp x9, d +; CHECK-NEXT: ldr w8, [x8, :lo12:b] +; CHECK-NEXT: ldr w9, [x9, :lo12:d] +; CHECK-NEXT: cmp w8, w9 +; CHECK-NEXT: b.eq .LBB8_6 +; CHECK-NEXT: .LBB8_5: // %if.end +; CHECK-NEXT: mov w0, wzr +; CHECK-NEXT: ldp x30, x19, [sp], #16 // 16-byte Folded Reload +; CHECK-NEXT: ret +; CHECK-NEXT: .LBB8_6: +; CHECK-NEXT: mov w0, #123 +; CHECK-NEXT: ldp x30, x19, [sp], #16 // 16-byte Folded Reload +; CHECK-NEXT: ret entry: %0 = load i32, i32* @a, align 4 %cmp4 = icmp slt i32 %0, 1 @@ -356,19 +595,44 @@ ; b.gt .LBB0_5 define i32 @fcmpri(i32 %argc, i8** nocapture readonly %argv) { - ; CHECK-LABEL: fcmpri: -; CHECK: cmp w0, #2 -; CHECK: b.lt .LBB9_3 -; CHECK-NOT: cmp w0, #1 -; CHECK-NOT: b.le .LBB9_3 - -; CHECK-LABEL-DAG: .LBB9_3 -; CHECK: cmp w19, #0 -; CHECK: fcmp d8, #0.0 -; CHECK-NOT: cmp w19, #1 -; CHECK-NOT: b.ge .LBB9_5 - +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: str d8, [sp, #-32]! // 8-byte Folded Spill +; CHECK-NEXT: stp x30, x19, [sp, #16] // 16-byte Folded Spill +; CHECK-NEXT: .cfi_def_cfa_offset 32 +; CHECK-NEXT: .cfi_offset w19, -8 +; CHECK-NEXT: .cfi_offset w30, -16 +; CHECK-NEXT: .cfi_offset b8, -32 +; CHECK-NEXT: cmp w0, #2 // =2 +; CHECK-NEXT: b.lt .LBB9_3 +; CHECK-NEXT: // %bb.1: // %land.lhs.true +; CHECK-NEXT: ldr x8, [x1, #8] +; CHECK-NEXT: cbz x8, .LBB9_3 +; CHECK-NEXT: // %bb.2: +; CHECK-NEXT: mov w0, #3 +; CHECK-NEXT: b .LBB9_4 +; CHECK-NEXT: .LBB9_3: // %if.end +; CHECK-NEXT: mov w0, #1 +; CHECK-NEXT: bl zoo +; CHECK-NEXT: mov w19, w0 +; CHECK-NEXT: mov w0, #-1 +; CHECK-NEXT: bl yoo +; CHECK-NEXT: cmp w19, #0 // =0 +; CHECK-NEXT: cinc w0, w19, gt +; CHECK-NEXT: mov w1, #2 +; CHECK-NEXT: mov v8.16b, v0.16b +; CHECK-NEXT: bl xoo +; CHECK-NEXT: fmov d0, #-1.00000000 +; CHECK-NEXT: fadd d0, d8, d0 +; CHECK-NEXT: fcmp d8, #0.0 +; CHECK-NEXT: fcsel d0, d8, d0, gt +; CHECK-NEXT: fmov d1, #-2.00000000 +; CHECK-NEXT: bl woo +; CHECK-NEXT: mov w0, #4 +; CHECK-NEXT: .LBB9_4: // %return +; CHECK-NEXT: ldp x30, x19, [sp, #16] // 16-byte Folded Reload +; CHECK-NEXT: ldr d8, [sp], #32 // 8-byte Folded Reload +; CHECK-NEXT: ret entry: %cmp = icmp sgt i32 %argc, 1 br i1 %cmp, label %land.lhs.true, label %if.end @@ -405,10 +669,27 @@ define void @cmp_shifted(i32 %in, i32 %lhs, i32 %rhs) { ; CHECK-LABEL: cmp_shifted: -; CHECK: cmp w0, #2, lsl #12 -; [...] -; CHECK: cmp w0, #1 - +; CHECK: // %bb.0: +; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: .cfi_offset w30, -16 +; CHECK-NEXT: cmp w0, #2, lsl #12 // =8192 +; CHECK-NEXT: b.lt .LBB10_2 +; CHECK-NEXT: // %bb.1: // %true +; CHECK-NEXT: mov w0, #128 +; CHECK-NEXT: b .LBB10_5 +; CHECK-NEXT: .LBB10_2: // %false +; CHECK-NEXT: cmp w0, #1 // =1 +; CHECK-NEXT: b.lt .LBB10_4 +; CHECK-NEXT: // %bb.3: // %truer +; CHECK-NEXT: mov w0, #42 +; CHECK-NEXT: b .LBB10_5 +; CHECK-NEXT: .LBB10_4: // %falser +; CHECK-NEXT: mov w0, #1 +; CHECK-NEXT: .LBB10_5: // %true +; CHECK-NEXT: bl zoo +; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret %tst_low = icmp sgt i32 %in, 8191 br i1 %tst_low, label %true, label %false @@ -430,10 +711,38 @@ } define i32 @combine_gt_ge_sel(i64 %v, i64* %p) #0 { -; CHECK-LABEL: combine_gt_ge_sel -; CHECK: ldr [[reg1:w[0-9]*]], -; CHECK: cmp [[reg1]], #0 -; CHECK: csel {{.*}}, gt +; CHECK-LABEL: combine_gt_ge_sel: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: adrp x8, a +; CHECK-NEXT: ldr w8, [x8, :lo12:a] +; CHECK-NEXT: cmp w8, #0 // =0 +; CHECK-NEXT: csel x9, x0, xzr, gt +; CHECK-NEXT: str x9, [x1] +; CHECK-NEXT: b.le .LBB11_2 +; CHECK-NEXT: // %bb.1: // %lor.lhs.false +; CHECK-NEXT: cmp w8, #2 // =2 +; CHECK-NEXT: b.ge .LBB11_3 +; CHECK-NEXT: b .LBB11_4 +; CHECK-NEXT: .LBB11_2: // %land.lhs.true +; CHECK-NEXT: adrp x8, b +; CHECK-NEXT: adrp x9, c +; CHECK-NEXT: ldr w8, [x8, :lo12:b] +; CHECK-NEXT: ldr w9, [x9, :lo12:c] +; CHECK-NEXT: cmp w8, w9 +; CHECK-NEXT: b.eq .LBB11_5 +; CHECK-NEXT: .LBB11_3: // %land.lhs.true3 +; CHECK-NEXT: adrp x8, b +; CHECK-NEXT: adrp x9, d +; CHECK-NEXT: ldr w8, [x8, :lo12:b] +; CHECK-NEXT: ldr w9, [x9, :lo12:d] +; CHECK-NEXT: cmp w8, w9 +; CHECK-NEXT: b.eq .LBB11_5 +; CHECK-NEXT: .LBB11_4: // %if.end +; CHECK-NEXT: mov w0, wzr +; CHECK-NEXT: ret +; CHECK-NEXT: .LBB11_5: +; CHECK-NEXT: mov w0, #1 +; CHECK-NEXT: ret entry: %0 = load i32, i32* @a, align 4 %cmp = icmp sgt i32 %0, 0 diff --git a/llvm/test/CodeGen/AArch64/cond-br-tuning.ll b/llvm/test/CodeGen/AArch64/cond-br-tuning.ll --- a/llvm/test/CodeGen/AArch64/cond-br-tuning.ll +++ b/llvm/test/CodeGen/AArch64/cond-br-tuning.ll @@ -1,14 +1,22 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -debugify-and-strip-all-safe < %s -O3 -mtriple=aarch64-eabi -verify-machineinstrs | FileCheck %s target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128" target triple = "aarch64-linaro-linux-gnueabi" ; CMN is an alias of ADDS. -; CHECK-LABEL: test_add_cbz: -; CHECK: cmn w0, w1 -; CHECK: b.eq -; CHECK: ret define void @test_add_cbz(i32 %a, i32 %b, i32* %ptr) { +; CHECK-LABEL: test_add_cbz: +; CHECK: // %bb.0: +; CHECK-NEXT: cmn w0, w1 +; CHECK-NEXT: b.eq .LBB0_2 +; CHECK-NEXT: // %bb.1: // %L1 +; CHECK-NEXT: str wzr, [x2] +; CHECK-NEXT: ret +; CHECK-NEXT: .LBB0_2: // %L2 +; CHECK-NEXT: mov w8, #1 +; CHECK-NEXT: str w8, [x2] +; CHECK-NEXT: ret %c = add nsw i32 %a, %b %d = icmp ne i32 %c, 0 br i1 %d, label %L1, label %L2 @@ -20,11 +28,17 @@ ret void } -; CHECK-LABEL: test_add_cbz_multiple_use: -; CHECK: adds -; CHECK: b.eq -; CHECK: ret define void @test_add_cbz_multiple_use(i32 %a, i32 %b, i32* %ptr) { +; CHECK-LABEL: test_add_cbz_multiple_use: +; CHECK: // %bb.0: +; CHECK-NEXT: adds w8, w0, w1 +; CHECK-NEXT: b.eq .LBB1_2 +; CHECK-NEXT: // %bb.1: // %L1 +; CHECK-NEXT: str wzr, [x2] +; CHECK-NEXT: ret +; CHECK-NEXT: .LBB1_2: // %L2 +; CHECK-NEXT: str w8, [x2] +; CHECK-NEXT: ret %c = add nsw i32 %a, %b %d = icmp ne i32 %c, 0 br i1 %d, label %L1, label %L2 @@ -36,10 +50,18 @@ ret void } -; CHECK-LABEL: test_add_cbz_64: -; CHECK: cmn x0, x1 -; CHECK: b.eq define void @test_add_cbz_64(i64 %a, i64 %b, i64* %ptr) { +; CHECK-LABEL: test_add_cbz_64: +; CHECK: // %bb.0: +; CHECK-NEXT: cmn x0, x1 +; CHECK-NEXT: b.eq .LBB2_2 +; CHECK-NEXT: // %bb.1: // %L1 +; CHECK-NEXT: str xzr, [x2] +; CHECK-NEXT: ret +; CHECK-NEXT: .LBB2_2: // %L2 +; CHECK-NEXT: mov w8, #1 +; CHECK-NEXT: str x8, [x2] +; CHECK-NEXT: ret %c = add nsw i64 %a, %b %d = icmp ne i64 %c, 0 br i1 %d, label %L1, label %L2 @@ -51,10 +73,18 @@ ret void } -; CHECK-LABEL: test_and_cbz: -; CHECK: tst w0, #0x6 -; CHECK: b.eq define void @test_and_cbz(i32 %a, i32* %ptr) { +; CHECK-LABEL: test_and_cbz: +; CHECK: // %bb.0: +; CHECK-NEXT: tst w0, #0x6 +; CHECK-NEXT: b.eq .LBB3_2 +; CHECK-NEXT: // %bb.1: // %L1 +; CHECK-NEXT: str wzr, [x1] +; CHECK-NEXT: ret +; CHECK-NEXT: .LBB3_2: // %L2 +; CHECK-NEXT: mov w8, #1 +; CHECK-NEXT: str w8, [x1] +; CHECK-NEXT: ret %c = and i32 %a, 6 %d = icmp ne i32 %c, 0 br i1 %d, label %L1, label %L2 @@ -66,10 +96,18 @@ ret void } -; CHECK-LABEL: test_bic_cbnz: -; CHECK: bics wzr, w1, w0 -; CHECK: b.ne define void @test_bic_cbnz(i32 %a, i32 %b, i32* %ptr) { +; CHECK-LABEL: test_bic_cbnz: +; CHECK: // %bb.0: +; CHECK-NEXT: bics wzr, w1, w0 +; CHECK-NEXT: b.eq .LBB4_2 +; CHECK-NEXT: // %bb.1: // %L2 +; CHECK-NEXT: mov w8, #1 +; CHECK-NEXT: str w8, [x2] +; CHECK-NEXT: ret +; CHECK-NEXT: .LBB4_2: // %L1 +; CHECK-NEXT: str wzr, [x2] +; CHECK-NEXT: ret %c = and i32 %a, %b %d = icmp eq i32 %c, %b br i1 %d, label %L1, label %L2 @@ -81,11 +119,15 @@ ret void } -; CHECK-LABEL: test_add_tbz: -; CHECK: adds -; CHECK: b.pl -; CHECK: ret define void @test_add_tbz(i32 %a, i32 %b, i32* %ptr) { +; CHECK-LABEL: test_add_tbz: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: adds w8, w0, w1 +; CHECK-NEXT: b.pl .LBB5_2 +; CHECK-NEXT: // %bb.1: // %L1 +; CHECK-NEXT: str w8, [x2] +; CHECK-NEXT: .LBB5_2: // %L2 +; CHECK-NEXT: ret entry: %add = add nsw i32 %a, %b %cmp36 = icmp sge i32 %add, 0 @@ -97,11 +139,15 @@ ret void } -; CHECK-LABEL: test_subs_tbz: -; CHECK: subs -; CHECK: b.pl -; CHECK: ret define void @test_subs_tbz(i32 %a, i32 %b, i32* %ptr) { +; CHECK-LABEL: test_subs_tbz: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: subs w8, w0, w1 +; CHECK-NEXT: b.pl .LBB6_2 +; CHECK-NEXT: // %bb.1: // %L1 +; CHECK-NEXT: str w8, [x2] +; CHECK-NEXT: .LBB6_2: // %L2 +; CHECK-NEXT: ret entry: %sub = sub nsw i32 %a, %b %cmp36 = icmp sge i32 %sub, 0 @@ -113,11 +159,15 @@ ret void } -; CHECK-LABEL: test_add_tbnz -; CHECK: adds -; CHECK: b.mi -; CHECK: ret define void @test_add_tbnz(i32 %a, i32 %b, i32* %ptr) { +; CHECK-LABEL: test_add_tbnz: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: adds w8, w0, w1 +; CHECK-NEXT: b.mi .LBB7_2 +; CHECK-NEXT: // %bb.1: // %L1 +; CHECK-NEXT: str w8, [x2] +; CHECK-NEXT: .LBB7_2: // %L2 +; CHECK-NEXT: ret entry: %add = add nsw i32 %a, %b %cmp36 = icmp slt i32 %add, 0 @@ -129,11 +179,15 @@ ret void } -; CHECK-LABEL: test_subs_tbnz -; CHECK: subs -; CHECK: b.mi -; CHECK: ret define void @test_subs_tbnz(i32 %a, i32 %b, i32* %ptr) { +; CHECK-LABEL: test_subs_tbnz: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: subs w8, w0, w1 +; CHECK-NEXT: b.mi .LBB8_2 +; CHECK-NEXT: // %bb.1: // %L1 +; CHECK-NEXT: str w8, [x2] +; CHECK-NEXT: .LBB8_2: // %L2 +; CHECK-NEXT: ret entry: %sub = sub nsw i32 %a, %b %cmp36 = icmp slt i32 %sub, 0 @@ -149,11 +203,22 @@ declare void @bar(i32) ; Don't transform since the call will clobber the NZCV bits. -; CHECK-LABEL: test_call_clobber: -; CHECK: and w[[DST:[0-9]+]], w1, #0x6 -; CHECK: bl bar -; CHECK: cbnz w[[DST]] define void @test_call_clobber(i32 %unused, i32 %a) { +; CHECK-LABEL: test_call_clobber: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: stp x30, x19, [sp, #-16]! // 16-byte Folded Spill +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: .cfi_offset w19, -8 +; CHECK-NEXT: .cfi_offset w30, -16 +; CHECK-NEXT: and w19, w1, #0x6 +; CHECK-NEXT: mov w0, w19 +; CHECK-NEXT: bl bar +; CHECK-NEXT: cbnz w19, .LBB9_2 +; CHECK-NEXT: // %bb.1: // %if.end +; CHECK-NEXT: ldp x30, x19, [sp], #16 // 16-byte Folded Reload +; CHECK-NEXT: ret +; CHECK-NEXT: .LBB9_2: // %if.then +; CHECK-NEXT: bl foo entry: %c = and i32 %a, 6 call void @bar(i32 %c) diff --git a/llvm/test/CodeGen/AArch64/fast-isel-cmp-branch.ll b/llvm/test/CodeGen/AArch64/fast-isel-cmp-branch.ll --- a/llvm/test/CodeGen/AArch64/fast-isel-cmp-branch.ll +++ b/llvm/test/CodeGen/AArch64/fast-isel-cmp-branch.ll @@ -1,10 +1,18 @@ -; RUN: llc -aarch64-enable-atomic-cfg-tidy=0 -mtriple=aarch64-apple-darwin < %s | FileCheck %s -; RUN: llc -fast-isel -fast-isel-abort=1 -aarch64-enable-atomic-cfg-tidy=0 -mtriple=aarch64-apple-darwin < %s | FileCheck %s +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -aarch64-enable-atomic-cfg-tidy=0 -mtriple=aarch64-apple-darwin < %s | FileCheck %s --check-prefixes=CHECK,NOFASTISEL +; RUN: llc -fast-isel -fast-isel-abort=1 -aarch64-enable-atomic-cfg-tidy=0 -mtriple=aarch64-apple-darwin < %s | FileCheck %s --check-prefixes=CHECK,FASTISEL define i32 @fcmp_oeq(float %x, float %y) { -; CHECK-LABEL: fcmp_oeq -; CHECK: fcmp s0, s1 -; CHECK-NEXT: b.ne {{LBB.+_2}} +; CHECK-LABEL: fcmp_oeq: +; CHECK: ; %bb.0: +; CHECK-NEXT: fcmp s0, s1 +; CHECK-NEXT: b.ne LBB0_2 +; CHECK-NEXT: ; %bb.1: ; %bb1 +; CHECK-NEXT: mov w0, wzr +; CHECK-NEXT: ret +; CHECK-NEXT: LBB0_2: ; %bb2 +; CHECK-NEXT: mov w0, #1 +; CHECK-NEXT: ret %1 = fcmp oeq float %x, %y br i1 %1, label %bb1, label %bb2 bb2: @@ -14,9 +22,16 @@ } define i32 @fcmp_ogt(float %x, float %y) { -; CHECK-LABEL: fcmp_ogt -; CHECK: fcmp s0, s1 -; CHECK-NEXT: b.le {{LBB.+_2}} +; CHECK-LABEL: fcmp_ogt: +; CHECK: ; %bb.0: +; CHECK-NEXT: fcmp s0, s1 +; CHECK-NEXT: b.le LBB1_2 +; CHECK-NEXT: ; %bb.1: ; %bb1 +; CHECK-NEXT: mov w0, wzr +; CHECK-NEXT: ret +; CHECK-NEXT: LBB1_2: ; %bb2 +; CHECK-NEXT: mov w0, #1 +; CHECK-NEXT: ret %1 = fcmp ogt float %x, %y br i1 %1, label %bb1, label %bb2 bb2: @@ -26,9 +41,16 @@ } define i32 @fcmp_oge(float %x, float %y) { -; CHECK-LABEL: fcmp_oge -; CHECK: fcmp s0, s1 -; CHECK-NEXT: b.lt {{LBB.+_2}} +; CHECK-LABEL: fcmp_oge: +; CHECK: ; %bb.0: +; CHECK-NEXT: fcmp s0, s1 +; CHECK-NEXT: b.lt LBB2_2 +; CHECK-NEXT: ; %bb.1: ; %bb1 +; CHECK-NEXT: mov w0, wzr +; CHECK-NEXT: ret +; CHECK-NEXT: LBB2_2: ; %bb2 +; CHECK-NEXT: mov w0, #1 +; CHECK-NEXT: ret %1 = fcmp oge float %x, %y br i1 %1, label %bb1, label %bb2 bb2: @@ -38,9 +60,16 @@ } define i32 @fcmp_olt(float %x, float %y) { -; CHECK-LABEL: fcmp_olt -; CHECK: fcmp s0, s1 -; CHECK-NEXT: b.pl {{LBB.+_2}} +; CHECK-LABEL: fcmp_olt: +; CHECK: ; %bb.0: +; CHECK-NEXT: fcmp s0, s1 +; CHECK-NEXT: b.pl LBB3_2 +; CHECK-NEXT: ; %bb.1: ; %bb1 +; CHECK-NEXT: mov w0, wzr +; CHECK-NEXT: ret +; CHECK-NEXT: LBB3_2: ; %bb2 +; CHECK-NEXT: mov w0, #1 +; CHECK-NEXT: ret %1 = fcmp olt float %x, %y br i1 %1, label %bb1, label %bb2 bb2: @@ -50,9 +79,16 @@ } define i32 @fcmp_ole(float %x, float %y) { -; CHECK-LABEL: fcmp_ole -; CHECK: fcmp s0, s1 -; CHECK-NEXT: b.hi {{LBB.+_2}} +; CHECK-LABEL: fcmp_ole: +; CHECK: ; %bb.0: +; CHECK-NEXT: fcmp s0, s1 +; CHECK-NEXT: b.hi LBB4_2 +; CHECK-NEXT: ; %bb.1: ; %bb1 +; CHECK-NEXT: mov w0, wzr +; CHECK-NEXT: ret +; CHECK-NEXT: LBB4_2: ; %bb2 +; CHECK-NEXT: mov w0, #1 +; CHECK-NEXT: ret %1 = fcmp ole float %x, %y br i1 %1, label %bb1, label %bb2 bb2: @@ -62,10 +98,30 @@ } define i32 @fcmp_one(float %x, float %y) { -; CHECK-LABEL: fcmp_one -; CHECK: fcmp s0, s1 -; CHECK-NEXT: b.mi -; CHECK-NEXT: b.gt +; NOFASTISEL-LABEL: fcmp_one: +; NOFASTISEL: ; %bb.0: +; NOFASTISEL-NEXT: fcmp s0, s1 +; NOFASTISEL-NEXT: b.mi LBB5_1 +; NOFASTISEL-NEXT: b.gt LBB5_1 +; NOFASTISEL-NEXT: b LBB5_2 +; NOFASTISEL-NEXT: LBB5_1: ; %bb1 +; NOFASTISEL-NEXT: mov w0, wzr +; NOFASTISEL-NEXT: ret +; NOFASTISEL-NEXT: LBB5_2: ; %bb2 +; NOFASTISEL-NEXT: mov w0, #1 +; NOFASTISEL-NEXT: ret +; +; FASTISEL-LABEL: fcmp_one: +; FASTISEL: ; %bb.0: +; FASTISEL-NEXT: fcmp s0, s1 +; FASTISEL-NEXT: b.mi LBB5_2 +; FASTISEL-NEXT: b.gt LBB5_2 +; FASTISEL-NEXT: ; %bb.1: ; %bb2 +; FASTISEL-NEXT: mov w0, #1 +; FASTISEL-NEXT: ret +; FASTISEL-NEXT: LBB5_2: ; %bb1 +; FASTISEL-NEXT: mov w0, wzr +; FASTISEL-NEXT: ret %1 = fcmp one float %x, %y br i1 %1, label %bb1, label %bb2 bb2: @@ -75,9 +131,16 @@ } define i32 @fcmp_ord(float %x, float %y) { -; CHECK-LABEL: fcmp_ord -; CHECK: fcmp s0, s1 -; CHECK-NEXT: b.vs {{LBB.+_2}} +; CHECK-LABEL: fcmp_ord: +; CHECK: ; %bb.0: +; CHECK-NEXT: fcmp s0, s1 +; CHECK-NEXT: b.vs LBB6_2 +; CHECK-NEXT: ; %bb.1: ; %bb1 +; CHECK-NEXT: mov w0, wzr +; CHECK-NEXT: ret +; CHECK-NEXT: LBB6_2: ; %bb2 +; CHECK-NEXT: mov w0, #1 +; CHECK-NEXT: ret %1 = fcmp ord float %x, %y br i1 %1, label %bb1, label %bb2 bb2: @@ -87,9 +150,16 @@ } define i32 @fcmp_uno(float %x, float %y) { -; CHECK-LABEL: fcmp_uno -; CHECK: fcmp s0, s1 -; CHECK-NEXT: b.vs {{LBB.+_2}} +; CHECK-LABEL: fcmp_uno: +; CHECK: ; %bb.0: +; CHECK-NEXT: fcmp s0, s1 +; CHECK-NEXT: b.vs LBB7_2 +; CHECK-NEXT: ; %bb.1: ; %bb2 +; CHECK-NEXT: mov w0, #1 +; CHECK-NEXT: ret +; CHECK-NEXT: LBB7_2: ; %bb1 +; CHECK-NEXT: mov w0, wzr +; CHECK-NEXT: ret %1 = fcmp uno float %x, %y br i1 %1, label %bb1, label %bb2 bb2: @@ -99,10 +169,30 @@ } define i32 @fcmp_ueq(float %x, float %y) { -; CHECK-LABEL: fcmp_ueq -; CHECK: fcmp s0, s1 -; CHECK-NEXT: b.eq {{LBB.+_2}} -; CHECK-NEXT: b.vs {{LBB.+_2}} +; NOFASTISEL-LABEL: fcmp_ueq: +; NOFASTISEL: ; %bb.0: +; NOFASTISEL-NEXT: fcmp s0, s1 +; NOFASTISEL-NEXT: b.eq LBB8_2 +; NOFASTISEL-NEXT: b.vs LBB8_2 +; NOFASTISEL-NEXT: b LBB8_1 +; NOFASTISEL-NEXT: LBB8_1: ; %bb2 +; NOFASTISEL-NEXT: mov w0, #1 +; NOFASTISEL-NEXT: ret +; NOFASTISEL-NEXT: LBB8_2: ; %bb1 +; NOFASTISEL-NEXT: mov w0, wzr +; NOFASTISEL-NEXT: ret +; +; FASTISEL-LABEL: fcmp_ueq: +; FASTISEL: ; %bb.0: +; FASTISEL-NEXT: fcmp s0, s1 +; FASTISEL-NEXT: b.eq LBB8_2 +; FASTISEL-NEXT: b.vs LBB8_2 +; FASTISEL-NEXT: ; %bb.1: ; %bb2 +; FASTISEL-NEXT: mov w0, #1 +; FASTISEL-NEXT: ret +; FASTISEL-NEXT: LBB8_2: ; %bb1 +; FASTISEL-NEXT: mov w0, wzr +; FASTISEL-NEXT: ret %1 = fcmp ueq float %x, %y br i1 %1, label %bb1, label %bb2 bb2: @@ -112,9 +202,16 @@ } define i32 @fcmp_ugt(float %x, float %y) { -; CHECK-LABEL: fcmp_ugt -; CHECK: fcmp s0, s1 -; CHECK-NEXT: b.ls {{LBB.+_2}} +; CHECK-LABEL: fcmp_ugt: +; CHECK: ; %bb.0: +; CHECK-NEXT: fcmp s0, s1 +; CHECK-NEXT: b.ls LBB9_2 +; CHECK-NEXT: ; %bb.1: ; %bb1 +; CHECK-NEXT: mov w0, wzr +; CHECK-NEXT: ret +; CHECK-NEXT: LBB9_2: ; %bb2 +; CHECK-NEXT: mov w0, #1 +; CHECK-NEXT: ret %1 = fcmp ugt float %x, %y br i1 %1, label %bb1, label %bb2 bb2: @@ -124,9 +221,16 @@ } define i32 @fcmp_uge(float %x, float %y) { -; CHECK-LABEL: fcmp_uge -; CHECK: fcmp s0, s1 -; CHECK-NEXT: b.mi {{LBB.+_2}} +; CHECK-LABEL: fcmp_uge: +; CHECK: ; %bb.0: +; CHECK-NEXT: fcmp s0, s1 +; CHECK-NEXT: b.mi LBB10_2 +; CHECK-NEXT: ; %bb.1: ; %bb1 +; CHECK-NEXT: mov w0, wzr +; CHECK-NEXT: ret +; CHECK-NEXT: LBB10_2: ; %bb2 +; CHECK-NEXT: mov w0, #1 +; CHECK-NEXT: ret %1 = fcmp uge float %x, %y br i1 %1, label %bb1, label %bb2 bb2: @@ -136,9 +240,16 @@ } define i32 @fcmp_ult(float %x, float %y) { -; CHECK-LABEL: fcmp_ult -; CHECK: fcmp s0, s1 -; CHECK-NEXT: b.ge {{LBB.+_2}} +; CHECK-LABEL: fcmp_ult: +; CHECK: ; %bb.0: +; CHECK-NEXT: fcmp s0, s1 +; CHECK-NEXT: b.ge LBB11_2 +; CHECK-NEXT: ; %bb.1: ; %bb1 +; CHECK-NEXT: mov w0, wzr +; CHECK-NEXT: ret +; CHECK-NEXT: LBB11_2: ; %bb2 +; CHECK-NEXT: mov w0, #1 +; CHECK-NEXT: ret %1 = fcmp ult float %x, %y br i1 %1, label %bb1, label %bb2 bb2: @@ -148,9 +259,16 @@ } define i32 @fcmp_ule(float %x, float %y) { -; CHECK-LABEL: fcmp_ule -; CHECK: fcmp s0, s1 -; CHECK-NEXT: b.gt {{LBB.+_2}} +; CHECK-LABEL: fcmp_ule: +; CHECK: ; %bb.0: +; CHECK-NEXT: fcmp s0, s1 +; CHECK-NEXT: b.gt LBB12_2 +; CHECK-NEXT: ; %bb.1: ; %bb1 +; CHECK-NEXT: mov w0, wzr +; CHECK-NEXT: ret +; CHECK-NEXT: LBB12_2: ; %bb2 +; CHECK-NEXT: mov w0, #1 +; CHECK-NEXT: ret %1 = fcmp ule float %x, %y br i1 %1, label %bb1, label %bb2 bb2: @@ -160,9 +278,16 @@ } define i32 @fcmp_une(float %x, float %y) { -; CHECK-LABEL: fcmp_une -; CHECK: fcmp s0, s1 -; CHECK-NEXT: b.eq {{LBB.+_2}} +; CHECK-LABEL: fcmp_une: +; CHECK: ; %bb.0: +; CHECK-NEXT: fcmp s0, s1 +; CHECK-NEXT: b.eq LBB13_2 +; CHECK-NEXT: ; %bb.1: ; %bb1 +; CHECK-NEXT: mov w0, wzr +; CHECK-NEXT: ret +; CHECK-NEXT: LBB13_2: ; %bb2 +; CHECK-NEXT: mov w0, #1 +; CHECK-NEXT: ret %1 = fcmp une float %x, %y br i1 %1, label %bb1, label %bb2 bb2: @@ -172,9 +297,16 @@ } define i32 @icmp_eq(i32 %x, i32 %y) { -; CHECK-LABEL: icmp_eq -; CHECK: cmp w0, w1 -; CHECK-NEXT: b.ne {{LBB.+_2}} +; CHECK-LABEL: icmp_eq: +; CHECK: ; %bb.0: +; CHECK-NEXT: cmp w0, w1 +; CHECK-NEXT: b.eq LBB14_2 +; CHECK-NEXT: ; %bb.1: ; %bb2 +; CHECK-NEXT: mov w0, #1 +; CHECK-NEXT: ret +; CHECK-NEXT: LBB14_2: ; %bb1 +; CHECK-NEXT: mov w0, wzr +; CHECK-NEXT: ret %1 = icmp eq i32 %x, %y br i1 %1, label %bb1, label %bb2 bb2: @@ -184,9 +316,16 @@ } define i32 @icmp_ne(i32 %x, i32 %y) { -; CHECK-LABEL: icmp_ne -; CHECK: cmp w0, w1 -; CHECK-NEXT: b.eq {{LBB.+_2}} +; CHECK-LABEL: icmp_ne: +; CHECK: ; %bb.0: +; CHECK-NEXT: cmp w0, w1 +; CHECK-NEXT: b.eq LBB15_2 +; CHECK-NEXT: ; %bb.1: ; %bb1 +; CHECK-NEXT: mov w0, wzr +; CHECK-NEXT: ret +; CHECK-NEXT: LBB15_2: ; %bb2 +; CHECK-NEXT: mov w0, #1 +; CHECK-NEXT: ret %1 = icmp ne i32 %x, %y br i1 %1, label %bb1, label %bb2 bb2: @@ -196,9 +335,16 @@ } define i32 @icmp_ugt(i32 %x, i32 %y) { -; CHECK-LABEL: icmp_ugt -; CHECK: cmp w0, w1 -; CHECK-NEXT: b.ls {{LBB.+_2}} +; CHECK-LABEL: icmp_ugt: +; CHECK: ; %bb.0: +; CHECK-NEXT: cmp w0, w1 +; CHECK-NEXT: b.ls LBB16_2 +; CHECK-NEXT: ; %bb.1: ; %bb1 +; CHECK-NEXT: mov w0, wzr +; CHECK-NEXT: ret +; CHECK-NEXT: LBB16_2: ; %bb2 +; CHECK-NEXT: mov w0, #1 +; CHECK-NEXT: ret %1 = icmp ugt i32 %x, %y br i1 %1, label %bb1, label %bb2 bb2: @@ -208,9 +354,16 @@ } define i32 @icmp_uge(i32 %x, i32 %y) { -; CHECK-LABEL: icmp_uge -; CHECK: cmp w0, w1 -; CHECK-NEXT: b.lo {{LBB.+_2}} +; CHECK-LABEL: icmp_uge: +; CHECK: ; %bb.0: +; CHECK-NEXT: cmp w0, w1 +; CHECK-NEXT: b.hs LBB17_2 +; CHECK-NEXT: ; %bb.1: ; %bb2 +; CHECK-NEXT: mov w0, #1 +; CHECK-NEXT: ret +; CHECK-NEXT: LBB17_2: ; %bb1 +; CHECK-NEXT: mov w0, wzr +; CHECK-NEXT: ret %1 = icmp uge i32 %x, %y br i1 %1, label %bb1, label %bb2 bb2: @@ -220,9 +373,16 @@ } define i32 @icmp_ult(i32 %x, i32 %y) { -; CHECK-LABEL: icmp_ult -; CHECK: cmp w0, w1 -; CHECK-NEXT: b.hs {{LBB.+_2}} +; CHECK-LABEL: icmp_ult: +; CHECK: ; %bb.0: +; CHECK-NEXT: cmp w0, w1 +; CHECK-NEXT: b.hs LBB18_2 +; CHECK-NEXT: ; %bb.1: ; %bb1 +; CHECK-NEXT: mov w0, wzr +; CHECK-NEXT: ret +; CHECK-NEXT: LBB18_2: ; %bb2 +; CHECK-NEXT: mov w0, #1 +; CHECK-NEXT: ret %1 = icmp ult i32 %x, %y br i1 %1, label %bb1, label %bb2 bb2: @@ -232,9 +392,16 @@ } define i32 @icmp_ule(i32 %x, i32 %y) { -; CHECK-LABEL: icmp_ule -; CHECK: cmp w0, w1 -; CHECK-NEXT: b.hi {{LBB.+_2}} +; CHECK-LABEL: icmp_ule: +; CHECK: ; %bb.0: +; CHECK-NEXT: cmp w0, w1 +; CHECK-NEXT: b.ls LBB19_2 +; CHECK-NEXT: ; %bb.1: ; %bb2 +; CHECK-NEXT: mov w0, #1 +; CHECK-NEXT: ret +; CHECK-NEXT: LBB19_2: ; %bb1 +; CHECK-NEXT: mov w0, wzr +; CHECK-NEXT: ret %1 = icmp ule i32 %x, %y br i1 %1, label %bb1, label %bb2 bb2: @@ -244,9 +411,16 @@ } define i32 @icmp_sgt(i32 %x, i32 %y) { -; CHECK-LABEL: icmp_sgt -; CHECK: cmp w0, w1 -; CHECK-NEXT: b.le {{LBB.+_2}} +; CHECK-LABEL: icmp_sgt: +; CHECK: ; %bb.0: +; CHECK-NEXT: cmp w0, w1 +; CHECK-NEXT: b.le LBB20_2 +; CHECK-NEXT: ; %bb.1: ; %bb1 +; CHECK-NEXT: mov w0, wzr +; CHECK-NEXT: ret +; CHECK-NEXT: LBB20_2: ; %bb2 +; CHECK-NEXT: mov w0, #1 +; CHECK-NEXT: ret %1 = icmp sgt i32 %x, %y br i1 %1, label %bb1, label %bb2 bb2: @@ -256,9 +430,16 @@ } define i32 @icmp_sge(i32 %x, i32 %y) { -; CHECK-LABEL: icmp_sge -; CHECK: cmp w0, w1 -; CHECK-NEXT: b.lt {{LBB.+_2}} +; CHECK-LABEL: icmp_sge: +; CHECK: ; %bb.0: +; CHECK-NEXT: cmp w0, w1 +; CHECK-NEXT: b.ge LBB21_2 +; CHECK-NEXT: ; %bb.1: ; %bb2 +; CHECK-NEXT: mov w0, #1 +; CHECK-NEXT: ret +; CHECK-NEXT: LBB21_2: ; %bb1 +; CHECK-NEXT: mov w0, wzr +; CHECK-NEXT: ret %1 = icmp sge i32 %x, %y br i1 %1, label %bb1, label %bb2 bb2: @@ -268,9 +449,16 @@ } define i32 @icmp_slt(i32 %x, i32 %y) { -; CHECK-LABEL: icmp_slt -; CHECK: cmp w0, w1 -; CHECK-NEXT: b.ge {{LBB.+_2}} +; CHECK-LABEL: icmp_slt: +; CHECK: ; %bb.0: +; CHECK-NEXT: cmp w0, w1 +; CHECK-NEXT: b.ge LBB22_2 +; CHECK-NEXT: ; %bb.1: ; %bb1 +; CHECK-NEXT: mov w0, wzr +; CHECK-NEXT: ret +; CHECK-NEXT: LBB22_2: ; %bb2 +; CHECK-NEXT: mov w0, #1 +; CHECK-NEXT: ret %1 = icmp slt i32 %x, %y br i1 %1, label %bb1, label %bb2 bb2: @@ -280,9 +468,16 @@ } define i32 @icmp_sle(i32 %x, i32 %y) { -; CHECK-LABEL: icmp_sle -; CHECK: cmp w0, w1 -; CHECK-NEXT: b.gt {{LBB.+_2}} +; CHECK-LABEL: icmp_sle: +; CHECK: ; %bb.0: +; CHECK-NEXT: cmp w0, w1 +; CHECK-NEXT: b.le LBB23_2 +; CHECK-NEXT: ; %bb.1: ; %bb2 +; CHECK-NEXT: mov w0, #1 +; CHECK-NEXT: ret +; CHECK-NEXT: LBB23_2: ; %bb1 +; CHECK-NEXT: mov w0, wzr +; CHECK-NEXT: ret %1 = icmp sle i32 %x, %y br i1 %1, label %bb1, label %bb2 bb2: diff --git a/llvm/test/CodeGen/ARM/2011-12-14-machine-sink.ll b/llvm/test/CodeGen/ARM/2011-12-14-machine-sink.ll --- a/llvm/test/CodeGen/ARM/2011-12-14-machine-sink.ll +++ b/llvm/test/CodeGen/ARM/2011-12-14-machine-sink.ll @@ -1,9 +1,14 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; REQUIRES: asserts ; RUN: llc < %s -o /dev/null -stats 2>&1 | FileCheck %s -check-prefix=STATS ; Radar 10266272 target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:32:64-v128:32:128-a0:0:32-n32-S32" target triple = "thumbv7-apple-ios4.0.0" -; STATS-NOT: machine-sink + +; STATS: 1 machine-cse - Number of common subexpression eliminated +; STATS: 1 machine-sink - Number of critical edges split +; STATS: 1 machine-sink - Number of machine instructions sunk + define i32 @foo(i32 %h, i32 %arg1) nounwind readonly ssp { entry: diff --git a/llvm/test/CodeGen/ARM/cmpxchg-weak.ll b/llvm/test/CodeGen/ARM/cmpxchg-weak.ll --- a/llvm/test/CodeGen/ARM/cmpxchg-weak.ll +++ b/llvm/test/CodeGen/ARM/cmpxchg-weak.ll @@ -5,27 +5,27 @@ %pair = cmpxchg weak i32* %addr, i32 %desired, i32 %new seq_cst monotonic %oldval = extractvalue { i32, i1 } %pair, 0 -; CHECK-NEXT: %bb.0: -; CHECK-NEXT: ldrex [[LOADED:r[0-9]+]], [r0] -; CHECK-NEXT: cmp [[LOADED]], r1 -; CHECK-NEXT: bne [[LDFAILBB:LBB[0-9]+_[0-9]+]] -; CHECK-NEXT: %bb.1: -; CHECK-NEXT: dmb ish -; CHECK-NEXT: strex [[SUCCESS:r[0-9]+]], r2, [r0] -; CHECK-NEXT: cmp [[SUCCESS]], #0 -; CHECK-NEXT: beq [[SUCCESSBB:LBB[0-9]+_[0-9]+]] -; CHECK-NEXT: %bb.2: -; CHECK-NEXT: str r3, [r0] -; CHECK-NEXT: bx lr -; CHECK-NEXT: [[LDFAILBB]]: +; CHECK-NEXT: @ %bb.0: @ %cmpxchg.start +; CHECK-NEXT: ldrex r3, [r0] +; CHECK-NEXT: cmp r3, r1 +; CHECK-NEXT: beq LBB0_2 +; CHECK-NEXT: @ %bb.1: @ %cmpxchg.nostore ; CHECK-NEXT: clrex +; CHECK-NEXT: b LBB0_3 +; CHECK-NEXT: LBB0_2: @ %cmpxchg.fencedstore +; CHECK-NEXT: dmb ish +; CHECK-NEXT: strex r1, r2, [r0] +; CHECK-NEXT: cmp r1, #0 +; CHECK-NEXT: beq LBB0_4 +; CHECK-NEXT: LBB0_3: @ %cmpxchg.end ; CHECK-NEXT: str r3, [r0] ; CHECK-NEXT: bx lr -; CHECK-NEXT: [[SUCCESSBB]]: +; CHECK-NEXT: LBB0_4: @ %cmpxchg.success ; CHECK-NEXT: dmb ish ; CHECK-NEXT: str r3, [r0] ; CHECK-NEXT: bx lr + store i32 %oldval, i32* %addr ret void } @@ -37,23 +37,24 @@ %pair = cmpxchg weak i32* %addr, i32 %desired, i32 %new seq_cst monotonic %success = extractvalue { i32, i1 } %pair, 1 -; CHECK-NEXT: %bb.0: -; CHECK-NEXT: ldrex [[LOADED:r[0-9]+]], [r1] -; CHECK-NEXT: cmp [[LOADED]], r2 -; CHECK-NEXT: bne [[LDFAILBB:LBB[0-9]+_[0-9]+]] -; CHECK-NEXT: %bb.1: -; CHECK-NEXT: dmb ish +; CHECK-NEXT: @ %bb.0: @ %cmpxchg.start +; CHECK-NEXT: ldrex r0, [r1] +; CHECK-NEXT: cmp r0, r2 +; CHECK-NEXT: beq LBB1_2 +; CHECK-NEXT: @ %bb.1: @ %cmpxchg.nostore ; CHECK-NEXT: mov r0, #0 -; CHECK-NEXT: strex [[SUCCESS:r[0-9]+]], r3, [r1] -; CHECK-NEXT: cmp [[SUCCESS]], #0 +; CHECK-NEXT: clrex +; CHECK-NEXT: bx lr +; CHECK-NEXT: LBB1_2: @ %cmpxchg.fencedstore +; CHECK-NEXT: dmb ish +; CHECK-NEXT: mov r0, #0 +; CHECK-NEXT: strex r2, r3, [r1] +; CHECK-NEXT: cmp r2, #0 ; CHECK-NEXT: bxne lr ; CHECK-NEXT: mov r0, #1 ; CHECK-NEXT: dmb ish ; CHECK-NEXT: bx lr -; CHECK-NEXT: [[LDFAILBB]]: -; CHECK-NEXT: mov r0, #0 -; CHECK-NEXT: clrex -; CHECK-NEXT: bx lr + ret i1 %success } diff --git a/llvm/test/CodeGen/ARM/lsr-unfolded-offset.ll b/llvm/test/CodeGen/ARM/lsr-unfolded-offset.ll --- a/llvm/test/CodeGen/ARM/lsr-unfolded-offset.ll +++ b/llvm/test/CodeGen/ARM/lsr-unfolded-offset.ll @@ -1,3 +1,4 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -regalloc=greedy -arm-atomic-cfg-tidy=0 < %s | FileCheck %s ; LSR shouldn't introduce more induction variables than needed, increasing @@ -7,8 +8,11 @@ ; CHECK: sub sp, #{{40|36|32|28|24}} ; CHECK: %for.inc -; CHECK-NOT: ldr -; CHECK: add +; CHECK: adds r6, #1 +; CHECK: adds r4, #24 +; CHECK: cmp r1, r6 +; CHECK: bne LBB0_3 + target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:32-f32:32:32-f64:32:32-v64:32:64-v128:32:128-a0:0:32-n32" target triple = "thumbv7-apple-ios" diff --git a/llvm/test/CodeGen/ARM/machine-cse-cmp.ll b/llvm/test/CodeGen/ARM/machine-cse-cmp.ll --- a/llvm/test/CodeGen/ARM/machine-cse-cmp.ll +++ b/llvm/test/CodeGen/ARM/machine-cse-cmp.ll @@ -52,7 +52,7 @@ ; CHECK-LABEL: f3: ; CHECK-NOT: sub ; CHECK: cmp -; CHECK: blt +; CHECK: bge %0 = load i32, i32* %offset, align 4 %cmp = icmp slt i32 %0, %size %s = sub nsw i32 %0, %size diff --git a/llvm/test/CodeGen/Hexagon/newvaluejump2.ll b/llvm/test/CodeGen/Hexagon/newvaluejump2.ll --- a/llvm/test/CodeGen/Hexagon/newvaluejump2.ll +++ b/llvm/test/CodeGen/Hexagon/newvaluejump2.ll @@ -1,3 +1,4 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -march=hexagon -mcpu=hexagonv5 -disable-hexagon-misched < %s \ ; RUN: | FileCheck %s ; Check that we generate new value jump, both registers, with one @@ -5,8 +6,35 @@ @Reg = common global i32 0, align 4 define i32 @main() nounwind { +; CHECK-LABEL: main: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: { +; CHECK-NEXT: r1 = memw(gp+#Reg) +; CHECK-NEXT: allocframe(r29,#8):raw +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: r0 = memw(r29+#4) +; CHECK-NEXT: if (!cmp.gt(r0.new,r1)) jump:nt .LBB0_1 +; CHECK-NEXT: } +; CHECK-NEXT: // %bb.2: // %if.else +; CHECK-NEXT: { +; CHECK-NEXT: call baz +; CHECK-NEXT: r1:0 = combine(#20,#10) +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: r0 = #0 +; CHECK-NEXT: dealloc_return +; CHECK-NEXT: } +; CHECK-NEXT: .LBB0_1: // %if.then +; CHECK-NEXT: { +; CHECK-NEXT: call bar +; CHECK-NEXT: r1:0 = combine(#2,#1) +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: r0 = #0 +; CHECK-NEXT: dealloc_return +; CHECK-NEXT: } entry: -; CHECK: if (cmp.gt(r{{[0-9]+}}.new,r{{[0-9]+}})) jump:{{[t|nt]}} .LBB{{[0-9]+}}_{{[0-9]+}} %Reg2 = alloca i32, align 4 %0 = load i32, i32* %Reg2, align 4 %1 = load i32, i32* @Reg, align 4 diff --git a/llvm/test/CodeGen/Mips/brcongt.ll b/llvm/test/CodeGen/Mips/brcongt.ll --- a/llvm/test/CodeGen/Mips/brcongt.ll +++ b/llvm/test/CodeGen/Mips/brcongt.ll @@ -1,3 +1,4 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -march=mipsel -mattr=mips16 -relocation-model=pic -O3 < %s | FileCheck %s -check-prefix=16 @i = global i32 5, align 4 @@ -6,14 +7,32 @@ @result = global i32 0, align 4 define void @test() nounwind { +; 16-LABEL: test: +; 16: # %bb.0: # %entry +; 16-NEXT: lui $2, %hi(_gp_disp) +; 16-NEXT: addiu $2, $2, %lo(_gp_disp) +; 16-NEXT: li $2, %hi(_gp_disp) +; 16-NEXT: addiu $3, $pc, %lo(_gp_disp) +; 16-NEXT: sll $2, $2, 16 +; 16-NEXT: addu $2, $3, $2 +; 16-NEXT: lw $3, %got(i)($2) +; 16-NEXT: lw $4, %got(j)($2) +; 16-NEXT: lw $3, 0($3) +; 16-NEXT: lw $4, 0($4) +; 16-NEXT: slt $4, $3 +; 16-NEXT: bteqz $BB0_2 # 16 bit inst +; 16-NEXT: # %bb.1: # %if.end +; 16-NEXT: jrc $ra +; 16-NEXT: $BB0_2: # %if.then +; 16-NEXT: lw $2, %got(result)($2) +; 16-NEXT: li $3, 1 +; 16-NEXT: sw $3, 0($2) +; 16-NEXT: jrc $ra entry: %0 = load i32, i32* @i, align 4 %1 = load i32, i32* @j, align 4 %cmp = icmp sgt i32 %0, %1 br i1 %cmp, label %if.end, label %if.then -; 16: slt ${{[0-9]+}}, ${{[0-9]+}} -; 16: btnez $[[LABEL:[0-9A-Ba-b_]+]] -; 16: $[[LABEL]]: if.then: ; preds = %entry store i32 1, i32* @result, align 4 br label %if.end diff --git a/llvm/test/CodeGen/Mips/brconlt.ll b/llvm/test/CodeGen/Mips/brconlt.ll --- a/llvm/test/CodeGen/Mips/brconlt.ll +++ b/llvm/test/CodeGen/Mips/brconlt.ll @@ -1,3 +1,4 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -march=mipsel -mattr=mips16 -relocation-model=pic -O3 < %s | FileCheck %s -check-prefix=16 ; RUN: llc -march=mips -mattr=micromips -mcpu=mips32r6 -relocation-model=pic -O3 < %s | FileCheck %s -check-prefix=MM32R6 @@ -7,16 +8,52 @@ @result = global i32 0, align 4 define void @test() nounwind { +; 16-LABEL: test: +; 16: # %bb.0: # %entry +; 16-NEXT: lui $2, %hi(_gp_disp) +; 16-NEXT: addiu $2, $2, %lo(_gp_disp) +; 16-NEXT: li $2, %hi(_gp_disp) +; 16-NEXT: addiu $3, $pc, %lo(_gp_disp) +; 16-NEXT: sll $2, $2, 16 +; 16-NEXT: addu $2, $3, $2 +; 16-NEXT: lw $3, %got(i)($2) +; 16-NEXT: lw $4, %got(j)($2) +; 16-NEXT: lw $3, 0($3) +; 16-NEXT: lw $4, 0($4) +; 16-NEXT: slt $4, $3 +; 16-NEXT: bteqz $BB0_2 # 16 bit inst +; 16-NEXT: # %bb.1: # %if.end +; 16-NEXT: jrc $ra +; 16-NEXT: $BB0_2: # %if.then +; 16-NEXT: lw $2, %got(result)($2) +; 16-NEXT: li $3, 1 +; 16-NEXT: sw $3, 0($2) +; 16-NEXT: jrc $ra +; +; MM32R6-LABEL: test: +; MM32R6: # %bb.0: # %entry +; MM32R6-NEXT: lui $2, %hi(_gp_disp) +; MM32R6-NEXT: addiu $2, $2, %lo(_gp_disp) +; MM32R6-NEXT: addu $2, $2, $25 +; MM32R6-NEXT: lw $3, %got(i)($2) +; MM32R6-NEXT: lw $4, %got(j)($2) +; MM32R6-NEXT: lw16 $3, 0($3) +; MM32R6-NEXT: lw16 $4, 0($4) +; MM32R6-NEXT: slt $1, $4, $3 +; MM32R6-NEXT: beqzc $1, $BB0_2 +; MM32R6-NEXT: # %bb.1: # %if.end +; MM32R6-NEXT: jrc $ra +; MM32R6-NEXT: $BB0_2: # %if.then +; MM32R6-NEXT: lw $2, %got(result)($2) +; MM32R6-NEXT: li16 $3, 1 +; MM32R6-NEXT: sw16 $3, 0($2) +; MM32R6-NEXT: jrc $ra entry: %0 = load i32, i32* @j, align 4 %1 = load i32, i32* @i, align 4 %cmp = icmp slt i32 %0, %1 br i1 %cmp, label %if.end, label %if.then -; 16: slt ${{[0-9]+}}, ${{[0-9]+}} -; MM32R6: slt ${{[0-9]+}}, ${{[0-9]+}} -; 16: btnez $[[LABEL:[0-9A-Ba-b_]+]] -; 16: $[[LABEL]]: if.then: ; preds = %entry store i32 1, i32* @result, align 4 diff --git a/llvm/test/CodeGen/Mips/brconne.ll b/llvm/test/CodeGen/Mips/brconne.ll --- a/llvm/test/CodeGen/Mips/brconne.ll +++ b/llvm/test/CodeGen/Mips/brconne.ll @@ -1,3 +1,4 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -march=mipsel -mattr=mips16 -relocation-model=pic -O3 < %s | FileCheck %s -check-prefix=16 @i = global i32 5, align 4 @@ -5,15 +6,32 @@ @result = global i32 0, align 4 define void @test() nounwind { +; 16-LABEL: test: +; 16: # %bb.0: # %entry +; 16-NEXT: lui $2, %hi(_gp_disp) +; 16-NEXT: addiu $2, $2, %lo(_gp_disp) +; 16-NEXT: li $2, %hi(_gp_disp) +; 16-NEXT: addiu $3, $pc, %lo(_gp_disp) +; 16-NEXT: sll $2, $2, 16 +; 16-NEXT: addu $2, $3, $2 +; 16-NEXT: lw $3, %got(i)($2) +; 16-NEXT: lw $4, %got(j)($2) +; 16-NEXT: lw $3, 0($3) +; 16-NEXT: lw $4, 0($4) +; 16-NEXT: cmp $4, $3 +; 16-NEXT: bteqz $BB0_2 # 16 bit inst +; 16-NEXT: # %bb.1: # %if.end +; 16-NEXT: jrc $ra +; 16-NEXT: $BB0_2: # %if.then +; 16-NEXT: lw $2, %got(result)($2) +; 16-NEXT: li $3, 1 +; 16-NEXT: sw $3, 0($2) +; 16-NEXT: jrc $ra entry: %0 = load i32, i32* @j, align 4 %1 = load i32, i32* @i, align 4 %cmp = icmp eq i32 %0, %1 br i1 %cmp, label %if.then, label %if.end -; 16: cmp ${{[0-9]+}}, ${{[0-9]+}} -; 16: btnez $[[LABEL:[0-9A-Ba-b_]+]] -; 16: lw ${{[0-9]+}}, %got(result)(${{[0-9]+}}) -; 16: $[[LABEL]]: if.then: ; preds = %entry store i32 1, i32* @result, align 4 diff --git a/llvm/test/CodeGen/Mips/compactbranches/no-beqzc-bnezc.ll b/llvm/test/CodeGen/Mips/compactbranches/no-beqzc-bnezc.ll --- a/llvm/test/CodeGen/Mips/compactbranches/no-beqzc-bnezc.ll +++ b/llvm/test/CodeGen/Mips/compactbranches/no-beqzc-bnezc.ll @@ -1,16 +1,57 @@ -; RUN: llc -march=mipsel -mcpu=mips32r6 -disable-mips-delay-filler < %s | FileCheck %s -; RUN: llc -march=mips -mcpu=mips32r6 -disable-mips-delay-filler < %s -filetype=obj \ -; RUN: -o - | llvm-objdump -d - | FileCheck %s --check-prefix=ENCODING -; RUN: llc -march=mipsel -mcpu=mips64r6 -disable-mips-delay-filler -target-abi=n64 < %s | FileCheck %s -; RUN: llc -march=mips -mcpu=mips64r6 -disable-mips-delay-filler -target-abi=n64 < %s -filetype=obj \ -; RUN: -o - | llvm-objdump -d - | FileCheck %s --check-prefix=ENCODING +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -march=mipsel -mcpu=mips32r6 -disable-mips-delay-filler < %s | FileCheck %s --check-prefixes=ENCODING,MIPSEL32 +; RUN: llc -march=mips -mcpu=mips32r6 -disable-mips-delay-filler < %s | FileCheck %s --check-prefixes=ENCODING,MIPS32 +; RUN: llc -march=mipsel -mcpu=mips64r6 -disable-mips-delay-filler -target-abi=n64 < %s | FileCheck %s --check-prefixes=ENCODING,MIPSEL64 +; RUN: llc -march=mips -mcpu=mips64r6 -disable-mips-delay-filler -target-abi=n64 < %s | FileCheck %s --check-prefixes=ENCODING,MIPS64 ; bnezc and beqzc have restriction that $rt != 0 define i32 @f() { ; CHECK-LABEL: f: ; CHECK-NOT: bnezc $0 - +; MIPSEL32-LABEL: f: +; MIPSEL32: # %bb.0: +; MIPSEL32-NEXT: bnez $zero, $BB0_2 +; MIPSEL32-NEXT: nop +; MIPSEL32-NEXT: # %bb.1: # %if.then +; MIPSEL32-NEXT: addiu $2, $zero, 1 +; MIPSEL32-NEXT: jrc $ra +; MIPSEL32-NEXT: $BB0_2: # %if.end +; MIPSEL32-NEXT: addiu $2, $zero, 0 +; MIPSEL32-NEXT: jrc $ra +; +; MIPS32-LABEL: f: +; MIPS32: # %bb.0: +; MIPS32-NEXT: bnez $zero, $BB0_2 +; MIPS32-NEXT: nop +; MIPS32-NEXT: # %bb.1: # %if.then +; MIPS32-NEXT: addiu $2, $zero, 1 +; MIPS32-NEXT: jrc $ra +; MIPS32-NEXT: $BB0_2: # %if.end +; MIPS32-NEXT: addiu $2, $zero, 0 +; MIPS32-NEXT: jrc $ra +; +; MIPSEL64-LABEL: f: +; MIPSEL64: # %bb.0: +; MIPSEL64-NEXT: bnez $zero, .LBB0_2 +; MIPSEL64-NEXT: nop +; MIPSEL64-NEXT: # %bb.1: # %if.then +; MIPSEL64-NEXT: addiu $2, $zero, 1 +; MIPSEL64-NEXT: jrc $ra +; MIPSEL64-NEXT: .LBB0_2: # %if.end +; MIPSEL64-NEXT: addiu $2, $zero, 0 +; MIPSEL64-NEXT: jrc $ra +; +; MIPS64-LABEL: f: +; MIPS64: # %bb.0: +; MIPS64-NEXT: bnez $zero, .LBB0_2 +; MIPS64-NEXT: nop +; MIPS64-NEXT: # %bb.1: # %if.then +; MIPS64-NEXT: addiu $2, $zero, 1 +; MIPS64-NEXT: jrc $ra +; MIPS64-NEXT: .LBB0_2: # %if.end +; MIPS64-NEXT: addiu $2, $zero, 0 +; MIPS64-NEXT: jrc $ra %cmp = icmp eq i32 1, 1 br i1 %cmp, label %if.then, label %if.end @@ -24,7 +65,49 @@ define i32 @f1() { ; CHECK-LABEL: f1: ; CHECK-NOT: beqzc $0 - +; MIPSEL32-LABEL: f1: +; MIPSEL32: # %bb.0: +; MIPSEL32-NEXT: b $BB1_2 +; MIPSEL32-NEXT: nop +; MIPSEL32-NEXT: # %bb.1: # %if.end +; MIPSEL32-NEXT: addiu $2, $zero, 0 +; MIPSEL32-NEXT: jrc $ra +; MIPSEL32-NEXT: $BB1_2: # %if.then +; MIPSEL32-NEXT: addiu $2, $zero, 1 +; MIPSEL32-NEXT: jrc $ra +; +; MIPS32-LABEL: f1: +; MIPS32: # %bb.0: +; MIPS32-NEXT: b $BB1_2 +; MIPS32-NEXT: nop +; MIPS32-NEXT: # %bb.1: # %if.end +; MIPS32-NEXT: addiu $2, $zero, 0 +; MIPS32-NEXT: jrc $ra +; MIPS32-NEXT: $BB1_2: # %if.then +; MIPS32-NEXT: addiu $2, $zero, 1 +; MIPS32-NEXT: jrc $ra +; +; MIPSEL64-LABEL: f1: +; MIPSEL64: # %bb.0: +; MIPSEL64-NEXT: b .LBB1_2 +; MIPSEL64-NEXT: nop +; MIPSEL64-NEXT: # %bb.1: # %if.end +; MIPSEL64-NEXT: addiu $2, $zero, 0 +; MIPSEL64-NEXT: jrc $ra +; MIPSEL64-NEXT: .LBB1_2: # %if.then +; MIPSEL64-NEXT: addiu $2, $zero, 1 +; MIPSEL64-NEXT: jrc $ra +; +; MIPS64-LABEL: f1: +; MIPS64: # %bb.0: +; MIPS64-NEXT: b .LBB1_2 +; MIPS64-NEXT: nop +; MIPS64-NEXT: # %bb.1: # %if.end +; MIPS64-NEXT: addiu $2, $zero, 0 +; MIPS64-NEXT: jrc $ra +; MIPS64-NEXT: .LBB1_2: # %if.then +; MIPS64-NEXT: addiu $2, $zero, 1 +; MIPS64-NEXT: jrc $ra %cmp = icmp eq i32 0, 0 br i1 %cmp, label %if.then, label %if.end @@ -40,9 +123,49 @@ ; beqc and bnec have the restriction that $rs < $rt. define i32 @f2(i32 %a, i32 %b) { -; ENCODING-LABEL: : -; ENCODING-NOT: beqc $5, $4 -; ENCODING-NOT: bnec $5, $4 +; MIPSEL32-LABEL: f2: +; MIPSEL32: # %bb.0: +; MIPSEL32-NEXT: beqc $5, $4, $BB2_2 +; MIPSEL32-NEXT: # %bb.1: # %if.end +; MIPSEL32-NEXT: addiu $2, $zero, 0 +; MIPSEL32-NEXT: jrc $ra +; MIPSEL32-NEXT: $BB2_2: # %if.then +; MIPSEL32-NEXT: addiu $2, $zero, 1 +; MIPSEL32-NEXT: jrc $ra +; +; MIPS32-LABEL: f2: +; MIPS32: # %bb.0: +; MIPS32-NEXT: beqc $5, $4, $BB2_2 +; MIPS32-NEXT: # %bb.1: # %if.end +; MIPS32-NEXT: addiu $2, $zero, 0 +; MIPS32-NEXT: jrc $ra +; MIPS32-NEXT: $BB2_2: # %if.then +; MIPS32-NEXT: addiu $2, $zero, 1 +; MIPS32-NEXT: jrc $ra +; +; MIPSEL64-LABEL: f2: +; MIPSEL64: # %bb.0: +; MIPSEL64-NEXT: sll $1, $4, 0 +; MIPSEL64-NEXT: sll $2, $5, 0 +; MIPSEL64-NEXT: beqc $2, $1, .LBB2_2 +; MIPSEL64-NEXT: # %bb.1: # %if.end +; MIPSEL64-NEXT: addiu $2, $zero, 0 +; MIPSEL64-NEXT: jrc $ra +; MIPSEL64-NEXT: .LBB2_2: # %if.then +; MIPSEL64-NEXT: addiu $2, $zero, 1 +; MIPSEL64-NEXT: jrc $ra +; +; MIPS64-LABEL: f2: +; MIPS64: # %bb.0: +; MIPS64-NEXT: sll $1, $4, 0 +; MIPS64-NEXT: sll $2, $5, 0 +; MIPS64-NEXT: beqc $2, $1, .LBB2_2 +; MIPS64-NEXT: # %bb.1: # %if.end +; MIPS64-NEXT: addiu $2, $zero, 0 +; MIPS64-NEXT: jrc $ra +; MIPS64-NEXT: .LBB2_2: # %if.then +; MIPS64-NEXT: addiu $2, $zero, 1 +; MIPS64-NEXT: jrc $ra %cmp = icmp eq i32 %b, %a br i1 %cmp, label %if.then, label %if.end @@ -57,7 +180,53 @@ define i64 @f3() { ; CHECK-LABEL: f3: ; CHECK-NOT: bnezc $0 - +; MIPSEL32-LABEL: f3: +; MIPSEL32: # %bb.0: +; MIPSEL32-NEXT: bnez $zero, $BB3_2 +; MIPSEL32-NEXT: nop +; MIPSEL32-NEXT: # %bb.1: # %if.then +; MIPSEL32-NEXT: addiu $2, $zero, 1 +; MIPSEL32-NEXT: addiu $3, $zero, 0 +; MIPSEL32-NEXT: jrc $ra +; MIPSEL32-NEXT: $BB3_2: # %if.end +; MIPSEL32-NEXT: addiu $2, $zero, 0 +; MIPSEL32-NEXT: addiu $3, $zero, 0 +; MIPSEL32-NEXT: jrc $ra +; +; MIPS32-LABEL: f3: +; MIPS32: # %bb.0: +; MIPS32-NEXT: bnez $zero, $BB3_2 +; MIPS32-NEXT: nop +; MIPS32-NEXT: # %bb.1: # %if.then +; MIPS32-NEXT: addiu $2, $zero, 0 +; MIPS32-NEXT: addiu $3, $zero, 1 +; MIPS32-NEXT: jrc $ra +; MIPS32-NEXT: $BB3_2: # %if.end +; MIPS32-NEXT: addiu $2, $zero, 0 +; MIPS32-NEXT: addiu $3, $zero, 0 +; MIPS32-NEXT: jrc $ra +; +; MIPSEL64-LABEL: f3: +; MIPSEL64: # %bb.0: +; MIPSEL64-NEXT: bnez $zero, .LBB3_2 +; MIPSEL64-NEXT: nop +; MIPSEL64-NEXT: # %bb.1: # %if.then +; MIPSEL64-NEXT: daddiu $2, $zero, 1 +; MIPSEL64-NEXT: jrc $ra +; MIPSEL64-NEXT: .LBB3_2: # %if.end +; MIPSEL64-NEXT: daddiu $2, $zero, 0 +; MIPSEL64-NEXT: jrc $ra +; +; MIPS64-LABEL: f3: +; MIPS64: # %bb.0: +; MIPS64-NEXT: bnez $zero, .LBB3_2 +; MIPS64-NEXT: nop +; MIPS64-NEXT: # %bb.1: # %if.then +; MIPS64-NEXT: daddiu $2, $zero, 1 +; MIPS64-NEXT: jrc $ra +; MIPS64-NEXT: .LBB3_2: # %if.end +; MIPS64-NEXT: daddiu $2, $zero, 0 +; MIPS64-NEXT: jrc $ra %cmp = icmp eq i64 1, 1 br i1 %cmp, label %if.then, label %if.end @@ -71,7 +240,53 @@ define i64 @f4() { ; CHECK-LABEL: f4: ; CHECK-NOT: beqzc $0 - +; MIPSEL32-LABEL: f4: +; MIPSEL32: # %bb.0: +; MIPSEL32-NEXT: b $BB4_2 +; MIPSEL32-NEXT: nop +; MIPSEL32-NEXT: # %bb.1: # %if.end +; MIPSEL32-NEXT: addiu $2, $zero, 0 +; MIPSEL32-NEXT: addiu $3, $zero, 0 +; MIPSEL32-NEXT: jrc $ra +; MIPSEL32-NEXT: $BB4_2: # %if.then +; MIPSEL32-NEXT: addiu $2, $zero, 1 +; MIPSEL32-NEXT: addiu $3, $zero, 0 +; MIPSEL32-NEXT: jrc $ra +; +; MIPS32-LABEL: f4: +; MIPS32: # %bb.0: +; MIPS32-NEXT: b $BB4_2 +; MIPS32-NEXT: nop +; MIPS32-NEXT: # %bb.1: # %if.end +; MIPS32-NEXT: addiu $2, $zero, 0 +; MIPS32-NEXT: addiu $3, $zero, 0 +; MIPS32-NEXT: jrc $ra +; MIPS32-NEXT: $BB4_2: # %if.then +; MIPS32-NEXT: addiu $2, $zero, 0 +; MIPS32-NEXT: addiu $3, $zero, 1 +; MIPS32-NEXT: jrc $ra +; +; MIPSEL64-LABEL: f4: +; MIPSEL64: # %bb.0: +; MIPSEL64-NEXT: b .LBB4_2 +; MIPSEL64-NEXT: nop +; MIPSEL64-NEXT: # %bb.1: # %if.end +; MIPSEL64-NEXT: daddiu $2, $zero, 0 +; MIPSEL64-NEXT: jrc $ra +; MIPSEL64-NEXT: .LBB4_2: # %if.then +; MIPSEL64-NEXT: daddiu $2, $zero, 1 +; MIPSEL64-NEXT: jrc $ra +; +; MIPS64-LABEL: f4: +; MIPS64: # %bb.0: +; MIPS64-NEXT: b .LBB4_2 +; MIPS64-NEXT: nop +; MIPS64-NEXT: # %bb.1: # %if.end +; MIPS64-NEXT: daddiu $2, $zero, 0 +; MIPS64-NEXT: jrc $ra +; MIPS64-NEXT: .LBB4_2: # %if.then +; MIPS64-NEXT: daddiu $2, $zero, 1 +; MIPS64-NEXT: jrc $ra %cmp = icmp eq i64 0, 0 br i1 %cmp, label %if.then, label %if.end @@ -87,9 +302,55 @@ ; beqc and bnec have the restriction that $rs < $rt. define i64 @f5(i64 %a, i64 %b) { -; ENCODING-LABEL: : -; ENCODING-NOT: beqc $5, $4 -; ENCODING-NOT: bnec $5, $4 +; MIPSEL32-LABEL: f5: +; MIPSEL32: # %bb.0: +; MIPSEL32-NEXT: xor $1, $7, $5 +; MIPSEL32-NEXT: xor $2, $6, $4 +; MIPSEL32-NEXT: or $1, $2, $1 +; MIPSEL32-NEXT: beqzc $1, $BB5_2 +; MIPSEL32-NEXT: # %bb.1: # %if.end +; MIPSEL32-NEXT: addiu $2, $zero, 0 +; MIPSEL32-NEXT: addiu $3, $zero, 0 +; MIPSEL32-NEXT: jrc $ra +; MIPSEL32-NEXT: $BB5_2: # %if.then +; MIPSEL32-NEXT: addiu $2, $zero, 1 +; MIPSEL32-NEXT: addiu $3, $zero, 0 +; MIPSEL32-NEXT: jrc $ra +; +; MIPS32-LABEL: f5: +; MIPS32: # %bb.0: +; MIPS32-NEXT: xor $1, $6, $4 +; MIPS32-NEXT: xor $2, $7, $5 +; MIPS32-NEXT: or $1, $2, $1 +; MIPS32-NEXT: beqzc $1, $BB5_2 +; MIPS32-NEXT: # %bb.1: # %if.end +; MIPS32-NEXT: addiu $2, $zero, 0 +; MIPS32-NEXT: addiu $3, $zero, 0 +; MIPS32-NEXT: jrc $ra +; MIPS32-NEXT: $BB5_2: # %if.then +; MIPS32-NEXT: addiu $2, $zero, 0 +; MIPS32-NEXT: addiu $3, $zero, 1 +; MIPS32-NEXT: jrc $ra +; +; MIPSEL64-LABEL: f5: +; MIPSEL64: # %bb.0: +; MIPSEL64-NEXT: beqc $5, $4, .LBB5_2 +; MIPSEL64-NEXT: # %bb.1: # %if.end +; MIPSEL64-NEXT: daddiu $2, $zero, 0 +; MIPSEL64-NEXT: jrc $ra +; MIPSEL64-NEXT: .LBB5_2: # %if.then +; MIPSEL64-NEXT: daddiu $2, $zero, 1 +; MIPSEL64-NEXT: jrc $ra +; +; MIPS64-LABEL: f5: +; MIPS64: # %bb.0: +; MIPS64-NEXT: beqc $5, $4, .LBB5_2 +; MIPS64-NEXT: # %bb.1: # %if.end +; MIPS64-NEXT: daddiu $2, $zero, 0 +; MIPS64-NEXT: jrc $ra +; MIPS64-NEXT: .LBB5_2: # %if.then +; MIPS64-NEXT: daddiu $2, $zero, 1 +; MIPS64-NEXT: jrc $ra %cmp = icmp eq i64 %b, %a br i1 %cmp, label %if.then, label %if.end @@ -104,7 +365,47 @@ define i32 @f6(i32 %a) { ; CHECK-LABEL: f6: ; CHECK: beqzc ${{[0-9]+}}, {{((\$)|(\.L))}}BB - +; MIPSEL32-LABEL: f6: +; MIPSEL32: # %bb.0: +; MIPSEL32-NEXT: beqzc $4, $BB6_2 +; MIPSEL32-NEXT: # %bb.1: # %if.end +; MIPSEL32-NEXT: addiu $2, $zero, 0 +; MIPSEL32-NEXT: jrc $ra +; MIPSEL32-NEXT: $BB6_2: # %if.then +; MIPSEL32-NEXT: addiu $2, $zero, 1 +; MIPSEL32-NEXT: jrc $ra +; +; MIPS32-LABEL: f6: +; MIPS32: # %bb.0: +; MIPS32-NEXT: beqzc $4, $BB6_2 +; MIPS32-NEXT: # %bb.1: # %if.end +; MIPS32-NEXT: addiu $2, $zero, 0 +; MIPS32-NEXT: jrc $ra +; MIPS32-NEXT: $BB6_2: # %if.then +; MIPS32-NEXT: addiu $2, $zero, 1 +; MIPS32-NEXT: jrc $ra +; +; MIPSEL64-LABEL: f6: +; MIPSEL64: # %bb.0: +; MIPSEL64-NEXT: sll $1, $4, 0 +; MIPSEL64-NEXT: beqzc $1, .LBB6_2 +; MIPSEL64-NEXT: # %bb.1: # %if.end +; MIPSEL64-NEXT: addiu $2, $zero, 0 +; MIPSEL64-NEXT: jrc $ra +; MIPSEL64-NEXT: .LBB6_2: # %if.then +; MIPSEL64-NEXT: addiu $2, $zero, 1 +; MIPSEL64-NEXT: jrc $ra +; +; MIPS64-LABEL: f6: +; MIPS64: # %bb.0: +; MIPS64-NEXT: sll $1, $4, 0 +; MIPS64-NEXT: beqzc $1, .LBB6_2 +; MIPS64-NEXT: # %bb.1: # %if.end +; MIPS64-NEXT: addiu $2, $zero, 0 +; MIPS64-NEXT: jrc $ra +; MIPS64-NEXT: .LBB6_2: # %if.then +; MIPS64-NEXT: addiu $2, $zero, 1 +; MIPS64-NEXT: jrc $ra %cmp = icmp eq i32 %a, 0 br i1 %cmp, label %if.then, label %if.end @@ -118,7 +419,47 @@ define i32 @f7(i32 %a) { ; CHECK-LABEL: f7: ; CHECK: bnezc ${{[0-9]+}}, {{((\$)|(\.L))}}BB - +; MIPSEL32-LABEL: f7: +; MIPSEL32: # %bb.0: +; MIPSEL32-NEXT: beqzc $4, $BB7_2 +; MIPSEL32-NEXT: # %bb.1: # %if.end +; MIPSEL32-NEXT: addiu $2, $zero, 0 +; MIPSEL32-NEXT: jrc $ra +; MIPSEL32-NEXT: $BB7_2: # %if.then +; MIPSEL32-NEXT: addiu $2, $zero, 1 +; MIPSEL32-NEXT: jrc $ra +; +; MIPS32-LABEL: f7: +; MIPS32: # %bb.0: +; MIPS32-NEXT: beqzc $4, $BB7_2 +; MIPS32-NEXT: # %bb.1: # %if.end +; MIPS32-NEXT: addiu $2, $zero, 0 +; MIPS32-NEXT: jrc $ra +; MIPS32-NEXT: $BB7_2: # %if.then +; MIPS32-NEXT: addiu $2, $zero, 1 +; MIPS32-NEXT: jrc $ra +; +; MIPSEL64-LABEL: f7: +; MIPSEL64: # %bb.0: +; MIPSEL64-NEXT: sll $1, $4, 0 +; MIPSEL64-NEXT: beqzc $1, .LBB7_2 +; MIPSEL64-NEXT: # %bb.1: # %if.end +; MIPSEL64-NEXT: addiu $2, $zero, 0 +; MIPSEL64-NEXT: jrc $ra +; MIPSEL64-NEXT: .LBB7_2: # %if.then +; MIPSEL64-NEXT: addiu $2, $zero, 1 +; MIPSEL64-NEXT: jrc $ra +; +; MIPS64-LABEL: f7: +; MIPS64: # %bb.0: +; MIPS64-NEXT: sll $1, $4, 0 +; MIPS64-NEXT: beqzc $1, .LBB7_2 +; MIPS64-NEXT: # %bb.1: # %if.end +; MIPS64-NEXT: addiu $2, $zero, 0 +; MIPS64-NEXT: jrc $ra +; MIPS64-NEXT: .LBB7_2: # %if.then +; MIPS64-NEXT: addiu $2, $zero, 1 +; MIPS64-NEXT: jrc $ra %cmp = icmp eq i32 0, %a br i1 %cmp, label %if.then, label %if.end diff --git a/llvm/test/CodeGen/Mips/lcb2.ll b/llvm/test/CodeGen/Mips/lcb2.ll --- a/llvm/test/CodeGen/Mips/lcb2.ll +++ b/llvm/test/CodeGen/Mips/lcb2.ll @@ -1,3 +1,4 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mtriple=mipsel-linux-gnu -march=mipsel -mattr=mips16 -mattr=+soft-float -mips16-hard-float -relocation-model=static -mips16-constant-islands=true < %s | FileCheck %s -check-prefix=lcb ; RUN: llc -mtriple=mipsel-linux-gnu -march=mipsel -mattr=mips16 -mattr=+soft-float -mips16-hard-float -relocation-model=static -mips16-constant-islands=true < %s | FileCheck %s -check-prefix=lcbn @@ -8,6 +9,51 @@ ; Function Attrs: nounwind optsize define i32 @bnez() #0 { +; lcb-LABEL: bnez: +; lcb: # %bb.0: # %entry +; lcb-NEXT: li $2, %hi(i) +; lcb-NEXT: sll $2, $2, 16 +; lcb-NEXT: lw $3, %lo(i)($2) +; lcb-NEXT: bnez $3, $BB0_2 +; lcb-NEXT: # %bb.1: # %if.then +; lcb-NEXT: li $3, 0 +; lcb-NEXT: #APP +; lcb-NEXT: .set push +; lcb-NEXT: .set at +; lcb-NEXT: .set macro +; lcb-NEXT: .set reorder +; lcb-EMPTY: +; lcb-NEXT: .space 10000 +; lcb-EMPTY: +; lcb-NEXT: .set pop +; lcb-NEXT: #NO_APP +; lcb-NEXT: sw $3, %lo(i)($2) +; lcb-NEXT: $BB0_2: # %if.end +; lcb-NEXT: li $2, 0 +; lcb-NEXT: jrc $ra +; +; lcbn-LABEL: bnez: +; lcbn: # %bb.0: # %entry +; lcbn-NEXT: li $2, %hi(i) +; lcbn-NEXT: sll $2, $2, 16 +; lcbn-NEXT: lw $3, %lo(i)($2) +; lcbn-NEXT: bnez $3, $BB0_2 +; lcbn-NEXT: # %bb.1: # %if.then +; lcbn-NEXT: li $3, 0 +; lcbn-NEXT: #APP +; lcbn-NEXT: .set push +; lcbn-NEXT: .set at +; lcbn-NEXT: .set macro +; lcbn-NEXT: .set reorder +; lcbn-EMPTY: +; lcbn-NEXT: .space 10000 +; lcbn-EMPTY: +; lcbn-NEXT: .set pop +; lcbn-NEXT: #NO_APP +; lcbn-NEXT: sw $3, %lo(i)($2) +; lcbn-NEXT: $BB0_2: # %if.end +; lcbn-NEXT: li $2, 0 +; lcbn-NEXT: jrc $ra entry: %0 = load i32, i32* @i, align 4, !tbaa !1 %cmp = icmp eq i32 %0, 0 @@ -21,15 +67,90 @@ if.end: ; preds = %if.then, %entry ret i32 0 } -; lcb: .ent bnez -; lcbn: .ent bnez -; lcb: bnez ${{[0-9]+}}, $BB{{[0-9]+}}_{{[0-9]+}} -; lcbn-NOT: bnez ${{[0-9]+}}, $BB{{[0-9]+}}_{{[0-9]+}} # 16 bit inst -; lcb: .end bnez -; lcbn: .end bnez ; Function Attrs: nounwind optsize define i32 @beqz() #0 { +; lcb-LABEL: beqz: +; lcb: # %bb.0: # %entry +; lcb-NEXT: li $2, %hi(i) +; lcb-NEXT: sll $2, $2, 16 +; lcb-NEXT: lw $2, %lo(i)($2) +; lcb-NEXT: beqz $2, $BB1_2 +; lcb-NEXT: # %bb.1: # %if.else +; lcb-NEXT: li $2, %hi(j) +; lcb-NEXT: sll $2, $2, 16 +; lcb-NEXT: li $3, 55 +; lcb-NEXT: sw $3, %lo(j)($2) +; lcb-NEXT: #APP +; lcb-NEXT: .set push +; lcb-NEXT: .set at +; lcb-NEXT: .set macro +; lcb-NEXT: .set reorder +; lcb-EMPTY: +; lcb-NEXT: .space 10000 +; lcb-EMPTY: +; lcb-NEXT: .set pop +; lcb-NEXT: #NO_APP +; lcb-NEXT: b $BB1_3 +; lcb-NEXT: $BB1_2: # %if.then +; lcb-NEXT: li $2, %hi(j) +; lcb-NEXT: sll $2, $2, 16 +; lcb-NEXT: li $3, 10 +; lcb-NEXT: sw $3, %lo(j)($2) +; lcb-NEXT: #APP +; lcb-NEXT: .set push +; lcb-NEXT: .set at +; lcb-NEXT: .set macro +; lcb-NEXT: .set reorder +; lcb-EMPTY: +; lcb-NEXT: .space 10000 +; lcb-EMPTY: +; lcb-NEXT: .set pop +; lcb-NEXT: #NO_APP +; lcb-NEXT: $BB1_3: # %if.end +; lcb-NEXT: li $2, 0 +; lcb-NEXT: jrc $ra +; +; lcbn-LABEL: beqz: +; lcbn: # %bb.0: # %entry +; lcbn-NEXT: li $2, %hi(i) +; lcbn-NEXT: sll $2, $2, 16 +; lcbn-NEXT: lw $2, %lo(i)($2) +; lcbn-NEXT: beqz $2, $BB1_2 +; lcbn-NEXT: # %bb.1: # %if.else +; lcbn-NEXT: li $2, %hi(j) +; lcbn-NEXT: sll $2, $2, 16 +; lcbn-NEXT: li $3, 55 +; lcbn-NEXT: sw $3, %lo(j)($2) +; lcbn-NEXT: #APP +; lcbn-NEXT: .set push +; lcbn-NEXT: .set at +; lcbn-NEXT: .set macro +; lcbn-NEXT: .set reorder +; lcbn-EMPTY: +; lcbn-NEXT: .space 10000 +; lcbn-EMPTY: +; lcbn-NEXT: .set pop +; lcbn-NEXT: #NO_APP +; lcbn-NEXT: b $BB1_3 +; lcbn-NEXT: $BB1_2: # %if.then +; lcbn-NEXT: li $2, %hi(j) +; lcbn-NEXT: sll $2, $2, 16 +; lcbn-NEXT: li $3, 10 +; lcbn-NEXT: sw $3, %lo(j)($2) +; lcbn-NEXT: #APP +; lcbn-NEXT: .set push +; lcbn-NEXT: .set at +; lcbn-NEXT: .set macro +; lcbn-NEXT: .set reorder +; lcbn-EMPTY: +; lcbn-NEXT: .space 10000 +; lcbn-EMPTY: +; lcbn-NEXT: .set pop +; lcbn-NEXT: #NO_APP +; lcbn-NEXT: $BB1_3: # %if.end +; lcbn-NEXT: li $2, 0 +; lcbn-NEXT: jrc $ra entry: %0 = load i32, i32* @i, align 4, !tbaa !1 %cmp = icmp eq i32 %0, 0 @@ -49,16 +170,93 @@ ret i32 0 } -; lcb: .ent beqz -; lcbn: .ent beqz -; lcb: beqz ${{[0-9]+}}, $BB{{[0-9]+}}_{{[0-9]+}} -; lcbn-NOT: beqz ${{[0-9]+}}, $BB{{[0-9]+}}_{{[0-9]+}} # 16 bit inst -; lcb: .end beqz -; lcbn: .end beqz - - ; Function Attrs: nounwind optsize define void @bteqz() #0 { +; lcb-LABEL: bteqz: +; lcb: # %bb.0: # %entry +; lcb-NEXT: li $2, %hi(j) +; lcb-NEXT: sll $2, $2, 16 +; lcb-NEXT: lw $2, %lo(j)($2) +; lcb-NEXT: li $3, %hi(i) +; lcb-NEXT: sll $3, $3, 16 +; lcb-NEXT: lw $3, %lo(i)($3) +; lcb-NEXT: cmp $3, $2 +; lcb-NEXT: bteqz $BB2_2 +; lcb-NEXT: # %bb.1: # %if.else +; lcb-NEXT: li $2, %hi(k) +; lcb-NEXT: sll $2, $2, 16 +; lcb-NEXT: #APP +; lcb-NEXT: .set push +; lcb-NEXT: .set at +; lcb-NEXT: .set macro +; lcb-NEXT: .set reorder +; lcb-EMPTY: +; lcb-NEXT: .space 1000 +; lcb-EMPTY: +; lcb-NEXT: .set pop +; lcb-NEXT: #NO_APP +; lcb-NEXT: li $3, 2 +; lcb-NEXT: sw $3, %lo(k)($2) +; lcb-NEXT: jrc $ra +; lcb-NEXT: $BB2_2: # %if.then +; lcb-NEXT: li $2, %hi(k) +; lcb-NEXT: sll $2, $2, 16 +; lcb-NEXT: li $3, 1 +; lcb-NEXT: sw $3, %lo(k)($2) +; lcb-NEXT: #APP +; lcb-NEXT: .set push +; lcb-NEXT: .set at +; lcb-NEXT: .set macro +; lcb-NEXT: .set reorder +; lcb-EMPTY: +; lcb-NEXT: .space 1000 +; lcb-EMPTY: +; lcb-NEXT: .set pop +; lcb-NEXT: #NO_APP +; lcb-NEXT: jrc $ra +; +; lcbn-LABEL: bteqz: +; lcbn: # %bb.0: # %entry +; lcbn-NEXT: li $2, %hi(j) +; lcbn-NEXT: sll $2, $2, 16 +; lcbn-NEXT: lw $2, %lo(j)($2) +; lcbn-NEXT: li $3, %hi(i) +; lcbn-NEXT: sll $3, $3, 16 +; lcbn-NEXT: lw $3, %lo(i)($3) +; lcbn-NEXT: cmp $3, $2 +; lcbn-NEXT: bteqz $BB2_2 +; lcbn-NEXT: # %bb.1: # %if.else +; lcbn-NEXT: li $2, %hi(k) +; lcbn-NEXT: sll $2, $2, 16 +; lcbn-NEXT: #APP +; lcbn-NEXT: .set push +; lcbn-NEXT: .set at +; lcbn-NEXT: .set macro +; lcbn-NEXT: .set reorder +; lcbn-EMPTY: +; lcbn-NEXT: .space 1000 +; lcbn-EMPTY: +; lcbn-NEXT: .set pop +; lcbn-NEXT: #NO_APP +; lcbn-NEXT: li $3, 2 +; lcbn-NEXT: sw $3, %lo(k)($2) +; lcbn-NEXT: jrc $ra +; lcbn-NEXT: $BB2_2: # %if.then +; lcbn-NEXT: li $2, %hi(k) +; lcbn-NEXT: sll $2, $2, 16 +; lcbn-NEXT: li $3, 1 +; lcbn-NEXT: sw $3, %lo(k)($2) +; lcbn-NEXT: #APP +; lcbn-NEXT: .set push +; lcbn-NEXT: .set at +; lcbn-NEXT: .set macro +; lcbn-NEXT: .set reorder +; lcbn-EMPTY: +; lcbn-NEXT: .space 1000 +; lcbn-EMPTY: +; lcbn-NEXT: .set pop +; lcbn-NEXT: #NO_APP +; lcbn-NEXT: jrc $ra entry: %0 = load i32, i32* @i, align 4, !tbaa !1 %1 = load i32, i32* @j, align 4, !tbaa !1 @@ -79,16 +277,65 @@ ret void } -; lcb: .ent bteqz -; lcbn: .ent bteqz -; lcb: btnez $BB{{[0-9]+}}_{{[0-9]+}} -; lcbn-NOT: btnez $BB{{[0-9]+}}_{{[0-9]+}} # 16 bit inst -; lcb: .end bteqz -; lcbn: .end bteqz - - ; Function Attrs: nounwind optsize define void @btz() #0 { +; lcb-LABEL: btz: +; lcb: # %bb.0: # %entry +; lcb-NEXT: li $2, %hi(i) +; lcb-NEXT: sll $2, $2, 16 +; lcb-NEXT: lw $4, %lo(i)($2) +; lcb-NEXT: li $3, %hi(j) +; lcb-NEXT: sll $3, $3, 16 +; lcb-NEXT: lw $5, %lo(j)($3) +; lcb-NEXT: slt $5, $4 +; lcb-NEXT: bteqz $BB3_2 +; lcb-NEXT: $BB3_1: # %if.then +; lcb-NEXT: # =>This Inner Loop Header: Depth=1 +; lcb-NEXT: #APP +; lcb-NEXT: .set push +; lcb-NEXT: .set at +; lcb-NEXT: .set macro +; lcb-NEXT: .set reorder +; lcb-EMPTY: +; lcb-NEXT: .space 60000 +; lcb-EMPTY: +; lcb-NEXT: .set pop +; lcb-NEXT: #NO_APP +; lcb-NEXT: lw $4, %lo(i)($2) +; lcb-NEXT: lw $5, %lo(j)($3) +; lcb-NEXT: slt $5, $4 +; lcb-NEXT: btnez $BB3_1 +; lcb-NEXT: $BB3_2: # %if.end +; lcb-NEXT: jrc $ra +; +; lcbn-LABEL: btz: +; lcbn: # %bb.0: # %entry +; lcbn-NEXT: li $2, %hi(i) +; lcbn-NEXT: sll $2, $2, 16 +; lcbn-NEXT: lw $4, %lo(i)($2) +; lcbn-NEXT: li $3, %hi(j) +; lcbn-NEXT: sll $3, $3, 16 +; lcbn-NEXT: lw $5, %lo(j)($3) +; lcbn-NEXT: slt $5, $4 +; lcbn-NEXT: bteqz $BB3_2 +; lcbn-NEXT: $BB3_1: # %if.then +; lcbn-NEXT: # =>This Inner Loop Header: Depth=1 +; lcbn-NEXT: #APP +; lcbn-NEXT: .set push +; lcbn-NEXT: .set at +; lcbn-NEXT: .set macro +; lcbn-NEXT: .set reorder +; lcbn-EMPTY: +; lcbn-NEXT: .space 60000 +; lcbn-EMPTY: +; lcbn-NEXT: .set pop +; lcbn-NEXT: #NO_APP +; lcbn-NEXT: lw $4, %lo(i)($2) +; lcbn-NEXT: lw $5, %lo(j)($3) +; lcbn-NEXT: slt $5, $4 +; lcbn-NEXT: btnez $BB3_1 +; lcbn-NEXT: $BB3_2: # %if.end +; lcbn-NEXT: jrc $ra entry: %0 = load i32, i32* @i, align 4, !tbaa !1 %1 = load i32, i32* @j, align 4, !tbaa !1 @@ -106,15 +353,6 @@ ret void } -; lcb: .ent btz -; lcbn: .ent btz -; lcb: bteqz $BB{{[0-9]+}}_{{[0-9]+}} -; lcbn-NOT: bteqz $BB{{[0-9]+}}_{{[0-9]+}} # 16 bit inst -; lcb: btnez $BB{{[0-9]+}}_{{[0-9]+}} -; lcbn-NOT: btnez $BB{{[0-9]+}}_{{[0-9]+}} # 16 bit inst -; lcb: .end btz -; lcbn: .end btz - attributes #0 = { nounwind optsize "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" } attributes #1 = { nounwind } diff --git a/llvm/test/CodeGen/Mips/lcb5.ll b/llvm/test/CodeGen/Mips/lcb5.ll --- a/llvm/test/CodeGen/Mips/lcb5.ll +++ b/llvm/test/CodeGen/Mips/lcb5.ll @@ -1,3 +1,4 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mtriple=mipsel-linux-gnu -march=mipsel -mattr=mips16 -mattr=+soft-float -mips16-hard-float -relocation-model=static < %s | FileCheck %s -check-prefix=ci @i = global i32 0, align 4 @@ -6,6 +7,41 @@ ; Function Attrs: nounwind optsize define i32 @x0() #0 { +; ci-LABEL: x0: +; ci: # %bb.0: # %entry +; ci-NEXT: li $2, %hi(i) +; ci-NEXT: sll $2, $2, 16 +; ci-NEXT: lw $3, %lo(i)($2) +; ci-NEXT: beqz $3, $BB0_2 +; ci-NEXT: # %bb.1: # %if.else +; ci-NEXT: li $3, 1 +; ci-NEXT: #APP +; ci-NEXT: .set push +; ci-NEXT: .set at +; ci-NEXT: .set macro +; ci-NEXT: .set reorder +; ci-EMPTY: +; ci-NEXT: .space 1004 +; ci-EMPTY: +; ci-NEXT: .set pop +; ci-NEXT: #NO_APP +; ci-NEXT: b $BB0_3 # 16 bit inst +; ci-NEXT: $BB0_2: # %if.then +; ci-NEXT: li $3, 0 +; ci-NEXT: #APP +; ci-NEXT: .set push +; ci-NEXT: .set at +; ci-NEXT: .set macro +; ci-NEXT: .set reorder +; ci-EMPTY: +; ci-NEXT: .space 1000 +; ci-EMPTY: +; ci-NEXT: .set pop +; ci-NEXT: #NO_APP +; ci-NEXT: $BB0_3: # %if.end +; ci-NEXT: sw $3, %lo(i)($2) +; ci-NEXT: li $2, 0 +; ci-NEXT: jrc $ra entry: %0 = load i32, i32* @i, align 4, !tbaa !1 %cmp = icmp eq i32 %0, 0 @@ -25,13 +61,48 @@ ret i32 0 } -; ci: .ent x0 -; ci: beqz $3, $BB0_2 -; ci: $BB0_2: -; ci: .end x0 - ; Function Attrs: nounwind optsize define i32 @x1() #0 { +; ci-LABEL: x1: +; ci: # %bb.0: # %entry +; ci-NEXT: li $2, %hi(i) +; ci-NEXT: sll $2, $2, 16 +; ci-NEXT: lw $3, %lo(i)($2) +; ci-NEXT: bnez $3, $BB1_1 # 16 bit inst +; ci-NEXT: jal $BB1_2 # branch +; ci-NEXT: nop +; ci-NEXT: $BB1_1: # %if.else +; ci-NEXT: li $3, 1 +; ci-NEXT: #APP +; ci-NEXT: .set push +; ci-NEXT: .set at +; ci-NEXT: .set macro +; ci-NEXT: .set reorder +; ci-EMPTY: +; ci-NEXT: .space 1000004 +; ci-EMPTY: +; ci-NEXT: .set pop +; ci-NEXT: #NO_APP +; ci-NEXT: jal $BB1_3 # branch +; ci-NEXT: nop +; ci-NEXT: .p2align 2 +; ci-NEXT: $BB1_2: # %if.then +; ci-NEXT: li $3, 0 +; ci-NEXT: #APP +; ci-NEXT: .set push +; ci-NEXT: .set at +; ci-NEXT: .set macro +; ci-NEXT: .set reorder +; ci-EMPTY: +; ci-NEXT: .space 1000000 +; ci-EMPTY: +; ci-NEXT: .set pop +; ci-NEXT: #NO_APP +; ci-NEXT: .p2align 2 +; ci-NEXT: $BB1_3: # %if.end +; ci-NEXT: sw $3, %lo(i)($2) +; ci-NEXT: li $2, 0 +; ci-NEXT: jrc $ra entry: %0 = load i32, i32* @i, align 4, !tbaa !1 %cmp = icmp eq i32 %0, 0 @@ -51,15 +122,49 @@ ret i32 0 } -; ci: .ent x1 -; ci: bnez $3, $BB1_1 # 16 bit inst -; ci: jal $BB1_2 # branch -; ci: nop -; ci: $BB1_1: -; ci: .end x1 ; Function Attrs: nounwind optsize define i32 @y0() #0 { +; ci-LABEL: y0: +; ci: # %bb.0: # %entry +; ci-NEXT: li $2, %hi(i) +; ci-NEXT: sll $2, $2, 16 +; ci-NEXT: lw $2, %lo(i)($2) +; ci-NEXT: beqz $2, $BB2_2 +; ci-NEXT: # %bb.1: # %if.else +; ci-NEXT: li $2, %hi(j) +; ci-NEXT: sll $2, $2, 16 +; ci-NEXT: li $3, 55 +; ci-NEXT: sw $3, %lo(j)($2) +; ci-NEXT: #APP +; ci-NEXT: .set push +; ci-NEXT: .set at +; ci-NEXT: .set macro +; ci-NEXT: .set reorder +; ci-EMPTY: +; ci-NEXT: .space 1004 +; ci-EMPTY: +; ci-NEXT: .set pop +; ci-NEXT: #NO_APP +; ci-NEXT: b $BB2_3 # 16 bit inst +; ci-NEXT: $BB2_2: # %if.then +; ci-NEXT: li $2, %hi(j) +; ci-NEXT: sll $2, $2, 16 +; ci-NEXT: li $3, 10 +; ci-NEXT: sw $3, %lo(j)($2) +; ci-NEXT: #APP +; ci-NEXT: .set push +; ci-NEXT: .set at +; ci-NEXT: .set macro +; ci-NEXT: .set reorder +; ci-EMPTY: +; ci-NEXT: .space 1000 +; ci-EMPTY: +; ci-NEXT: .set pop +; ci-NEXT: #NO_APP +; ci-NEXT: $BB2_3: # %if.end +; ci-NEXT: li $2, 0 +; ci-NEXT: jrc $ra entry: %0 = load i32, i32* @i, align 4, !tbaa !1 %cmp = icmp eq i32 %0, 0 @@ -79,12 +184,53 @@ ret i32 0 } -; ci: .ent y0 -; ci: beqz $2, $BB2_2 -; ci: .end y0 - ; Function Attrs: nounwind optsize define i32 @y1() #0 { +; ci-LABEL: y1: +; ci: # %bb.0: # %entry +; ci-NEXT: li $2, %hi(i) +; ci-NEXT: sll $2, $2, 16 +; ci-NEXT: lw $2, %lo(i)($2) +; ci-NEXT: bnez $2, $BB3_1 # 16 bit inst +; ci-NEXT: jal $BB3_2 # branch +; ci-NEXT: nop +; ci-NEXT: $BB3_1: # %if.else +; ci-NEXT: li $2, %hi(j) +; ci-NEXT: sll $2, $2, 16 +; ci-NEXT: li $3, 55 +; ci-NEXT: sw $3, %lo(j)($2) +; ci-NEXT: #APP +; ci-NEXT: .set push +; ci-NEXT: .set at +; ci-NEXT: .set macro +; ci-NEXT: .set reorder +; ci-EMPTY: +; ci-NEXT: .space 1000004 +; ci-EMPTY: +; ci-NEXT: .set pop +; ci-NEXT: #NO_APP +; ci-NEXT: jal $BB3_3 # branch +; ci-NEXT: nop +; ci-NEXT: .p2align 2 +; ci-NEXT: $BB3_2: # %if.then +; ci-NEXT: li $2, %hi(j) +; ci-NEXT: sll $2, $2, 16 +; ci-NEXT: li $3, 10 +; ci-NEXT: sw $3, %lo(j)($2) +; ci-NEXT: #APP +; ci-NEXT: .set push +; ci-NEXT: .set at +; ci-NEXT: .set macro +; ci-NEXT: .set reorder +; ci-EMPTY: +; ci-NEXT: .space 1000000 +; ci-EMPTY: +; ci-NEXT: .set pop +; ci-NEXT: #NO_APP +; ci-NEXT: .p2align 2 +; ci-NEXT: $BB3_3: # %if.end +; ci-NEXT: li $2, 0 +; ci-NEXT: jrc $ra entry: %0 = load i32, i32* @i, align 4, !tbaa !1 %cmp = icmp eq i32 %0, 0 @@ -104,15 +250,51 @@ ret i32 0 } -; ci: .ent y1 -; ci: bnez $2, $BB3_1 # 16 bit inst -; ci: jal $BB3_2 # branch -; ci: nop -; ci: $BB3_1: -; ci: .end y1 ; Function Attrs: nounwind optsize define void @z0() #0 { +; ci-LABEL: z0: +; ci: # %bb.0: # %entry +; ci-NEXT: li $2, %hi(j) +; ci-NEXT: sll $2, $2, 16 +; ci-NEXT: lw $2, %lo(j)($2) +; ci-NEXT: li $3, %hi(i) +; ci-NEXT: sll $3, $3, 16 +; ci-NEXT: lw $3, %lo(i)($3) +; ci-NEXT: cmp $3, $2 +; ci-NEXT: bteqz $BB4_2 +; ci-NEXT: # %bb.1: # %if.else +; ci-NEXT: li $2, %hi(k) +; ci-NEXT: sll $2, $2, 16 +; ci-NEXT: #APP +; ci-NEXT: .set push +; ci-NEXT: .set at +; ci-NEXT: .set macro +; ci-NEXT: .set reorder +; ci-EMPTY: +; ci-NEXT: .space 10004 +; ci-EMPTY: +; ci-NEXT: .set pop +; ci-NEXT: #NO_APP +; ci-NEXT: li $3, 2 +; ci-NEXT: sw $3, %lo(k)($2) +; ci-NEXT: jrc $ra +; ci-NEXT: $BB4_2: # %if.then +; ci-NEXT: li $2, %hi(k) +; ci-NEXT: sll $2, $2, 16 +; ci-NEXT: li $3, 1 +; ci-NEXT: sw $3, %lo(k)($2) +; ci-NEXT: #APP +; ci-NEXT: .set push +; ci-NEXT: .set at +; ci-NEXT: .set macro +; ci-NEXT: .set reorder +; ci-EMPTY: +; ci-NEXT: .space 10000 +; ci-EMPTY: +; ci-NEXT: .set pop +; ci-NEXT: #NO_APP +; ci-NEXT: jrc $ra entry: %0 = load i32, i32* @i, align 4, !tbaa !1 %1 = load i32, i32* @j, align 4, !tbaa !1 @@ -133,12 +315,54 @@ ret void } -; ci: .ent z0 -; ci: btnez $BB4_2 -; ci: .end z0 ; Function Attrs: nounwind optsize define void @z1() #0 { +; ci-LABEL: z1: +; ci: # %bb.0: # %entry +; ci-NEXT: li $2, %hi(j) +; ci-NEXT: sll $2, $2, 16 +; ci-NEXT: lw $2, %lo(j)($2) +; ci-NEXT: li $3, %hi(i) +; ci-NEXT: sll $3, $3, 16 +; ci-NEXT: lw $3, %lo(i)($3) +; ci-NEXT: cmp $3, $2 +; ci-NEXT: btnez $BB5_1 # 16 bit inst +; ci-NEXT: jal $BB5_2 # branch +; ci-NEXT: nop +; ci-NEXT: $BB5_1: # %if.else +; ci-NEXT: li $2, %hi(k) +; ci-NEXT: sll $2, $2, 16 +; ci-NEXT: #APP +; ci-NEXT: .set push +; ci-NEXT: .set at +; ci-NEXT: .set macro +; ci-NEXT: .set reorder +; ci-EMPTY: +; ci-NEXT: .space 10000004 +; ci-EMPTY: +; ci-NEXT: .set pop +; ci-NEXT: #NO_APP +; ci-NEXT: li $3, 2 +; ci-NEXT: sw $3, %lo(k)($2) +; ci-NEXT: jrc $ra +; ci-NEXT: .p2align 2 +; ci-NEXT: $BB5_2: # %if.then +; ci-NEXT: li $2, %hi(k) +; ci-NEXT: sll $2, $2, 16 +; ci-NEXT: li $3, 1 +; ci-NEXT: sw $3, %lo(k)($2) +; ci-NEXT: #APP +; ci-NEXT: .set push +; ci-NEXT: .set at +; ci-NEXT: .set macro +; ci-NEXT: .set reorder +; ci-EMPTY: +; ci-NEXT: .space 10000000 +; ci-EMPTY: +; ci-NEXT: .set pop +; ci-NEXT: #NO_APP +; ci-NEXT: jrc $ra entry: %0 = load i32, i32* @i, align 4, !tbaa !1 %1 = load i32, i32* @j, align 4, !tbaa !1 @@ -159,15 +383,37 @@ ret void } -; ci: .ent z1 -; ci: bteqz $BB5_1 # 16 bit inst -; ci: jal $BB5_2 # branch -; ci: nop -; ci: $BB5_1: -; ci: .end z1 ; Function Attrs: nounwind optsize define void @z3() #0 { +; ci-LABEL: z3: +; ci: # %bb.0: # %entry +; ci-NEXT: li $2, %hi(i) +; ci-NEXT: sll $2, $2, 16 +; ci-NEXT: lw $4, %lo(i)($2) +; ci-NEXT: li $3, %hi(j) +; ci-NEXT: sll $3, $3, 16 +; ci-NEXT: lw $5, %lo(j)($3) +; ci-NEXT: slt $5, $4 +; ci-NEXT: bteqz $BB6_2 +; ci-NEXT: $BB6_1: # %if.then +; ci-NEXT: # =>This Inner Loop Header: Depth=1 +; ci-NEXT: #APP +; ci-NEXT: .set push +; ci-NEXT: .set at +; ci-NEXT: .set macro +; ci-NEXT: .set reorder +; ci-EMPTY: +; ci-NEXT: .space 10000 +; ci-EMPTY: +; ci-NEXT: .set pop +; ci-NEXT: #NO_APP +; ci-NEXT: lw $4, %lo(i)($2) +; ci-NEXT: lw $5, %lo(j)($3) +; ci-NEXT: slt $5, $4 +; ci-NEXT: btnez $BB6_1 +; ci-NEXT: $BB6_2: # %if.end +; ci-NEXT: jrc $ra entry: %0 = load i32, i32* @i, align 4, !tbaa !1 %1 = load i32, i32* @j, align 4, !tbaa !1 @@ -185,12 +431,42 @@ ret void } -; ci: .ent z3 -; ci: bteqz $BB6_2 -; ci: .end z3 - ; Function Attrs: nounwind optsize define void @z4() #0 { +; ci-LABEL: z4: +; ci: # %bb.0: # %entry +; ci-NEXT: li $2, %hi(i) +; ci-NEXT: sll $2, $2, 16 +; ci-NEXT: lw $4, %lo(i)($2) +; ci-NEXT: li $3, %hi(j) +; ci-NEXT: sll $3, $3, 16 +; ci-NEXT: lw $5, %lo(j)($3) +; ci-NEXT: slt $5, $4 +; ci-NEXT: btnez $BB7_1 # 16 bit inst +; ci-NEXT: jal $BB7_2 # branch +; ci-NEXT: nop +; ci-NEXT: .p2align 2 +; ci-NEXT: $BB7_1: # %if.then +; ci-NEXT: # =>This Inner Loop Header: Depth=1 +; ci-NEXT: #APP +; ci-NEXT: .set push +; ci-NEXT: .set at +; ci-NEXT: .set macro +; ci-NEXT: .set reorder +; ci-EMPTY: +; ci-NEXT: .space 10000000 +; ci-EMPTY: +; ci-NEXT: .set pop +; ci-NEXT: #NO_APP +; ci-NEXT: lw $4, %lo(i)($2) +; ci-NEXT: lw $5, %lo(j)($3) +; ci-NEXT: slt $5, $4 +; ci-NEXT: bteqz $BB7_2 # 16 bit inst +; ci-NEXT: jal $BB7_1 # branch +; ci-NEXT: nop +; ci-NEXT: .p2align 2 +; ci-NEXT: $BB7_2: # %if.end +; ci-NEXT: jrc $ra entry: %0 = load i32, i32* @i, align 4, !tbaa !1 %1 = load i32, i32* @j, align 4, !tbaa !1 @@ -208,14 +484,6 @@ ret void } -; ci: .ent z4 -; ci: btnez $BB7_1 # 16 bit inst -; ci: jal $BB7_2 # branch -; ci: nop -; ci: .p2align 2 -; ci: $BB7_1: -; ci: .end z4 - attributes #0 = { nounwind optsize "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" } attributes #1 = { nounwind } diff --git a/llvm/test/CodeGen/Mips/longbranch/compact-branches-long-branch.ll b/llvm/test/CodeGen/Mips/longbranch/compact-branches-long-branch.ll --- a/llvm/test/CodeGen/Mips/longbranch/compact-branches-long-branch.ll +++ b/llvm/test/CodeGen/Mips/longbranch/compact-branches-long-branch.ll @@ -1,3 +1,4 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc < %s -march=mips -mcpu=mips32r6 -force-mips-long-branch | FileCheck %s ; Check that when MIPS32R6 with the static relocation model with the usage of @@ -9,11 +10,29 @@ declare i32 @g() -; CHECK-LABEL: test1: -; CHECK: bnezc -; CHECK-NEXT: nop - define i32 @test1(i32 %a) { +; CHECK-LABEL: test1: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addiu $sp, $sp, -24 +; CHECK-NEXT: .cfi_def_cfa_offset 24 +; CHECK-NEXT: sw $ra, 20($sp) # 4-byte Folded Spill +; CHECK-NEXT: .cfi_offset 31, -4 +; CHECK-NEXT: bnezc $4, $BB0_2 +; CHECK-NEXT: nop +; CHECK-NEXT: # %bb.1: # %entry +; CHECK-NEXT: bc $BB0_3 +; CHECK-NEXT: $BB0_2: # %cond.false +; CHECK-NEXT: jal g +; CHECK-NEXT: nop +; CHECK-NEXT: lw $ra, 20($sp) # 4-byte Folded Reload +; CHECK-NEXT: jr $ra +; CHECK-NEXT: addiu $sp, $sp, 24 +; CHECK-NEXT: $BB0_3: # %cond.true +; CHECK-NEXT: jal f +; CHECK-NEXT: nop +; CHECK-NEXT: lw $ra, 20($sp) # 4-byte Folded Reload +; CHECK-NEXT: jr $ra +; CHECK-NEXT: addiu $sp, $sp, 24 entry: %0 = icmp eq i32 %a, 0 br i1 %0, label %cond.true, label %cond.false @@ -25,11 +44,30 @@ ret i32 %2 } -; CHECK-LABEL: test2: -; CHECK: bgezc -; CHECK-NEXT: nop define i32 @test2(i32 %a) { +; CHECK-LABEL: test2: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addiu $sp, $sp, -24 +; CHECK-NEXT: .cfi_def_cfa_offset 24 +; CHECK-NEXT: sw $ra, 20($sp) # 4-byte Folded Spill +; CHECK-NEXT: .cfi_offset 31, -4 +; CHECK-NEXT: bgezc $4, $BB1_2 +; CHECK-NEXT: nop +; CHECK-NEXT: # %bb.1: # %entry +; CHECK-NEXT: bc $BB1_3 +; CHECK-NEXT: $BB1_2: # %cond.true +; CHECK-NEXT: jal f +; CHECK-NEXT: nop +; CHECK-NEXT: lw $ra, 20($sp) # 4-byte Folded Reload +; CHECK-NEXT: jr $ra +; CHECK-NEXT: addiu $sp, $sp, 24 +; CHECK-NEXT: $BB1_3: # %cond.false +; CHECK-NEXT: jal g +; CHECK-NEXT: nop +; CHECK-NEXT: lw $ra, 20($sp) # 4-byte Folded Reload +; CHECK-NEXT: jr $ra +; CHECK-NEXT: addiu $sp, $sp, 24 entry: %0 = icmp sge i32 %a, 0 br i1 %0, label %cond.true, label %cond.false @@ -41,11 +79,30 @@ ret i32 %2 } -; CHECK-LABEL: test3: -; CHECK: blezc -; CHECK-NEXT: nop define i32 @test3(i32 %a) { +; CHECK-LABEL: test3: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addiu $sp, $sp, -24 +; CHECK-NEXT: .cfi_def_cfa_offset 24 +; CHECK-NEXT: sw $ra, 20($sp) # 4-byte Folded Spill +; CHECK-NEXT: .cfi_offset 31, -4 +; CHECK-NEXT: blezc $4, $BB2_2 +; CHECK-NEXT: nop +; CHECK-NEXT: # %bb.1: # %entry +; CHECK-NEXT: bc $BB2_3 +; CHECK-NEXT: $BB2_2: # %cond.true +; CHECK-NEXT: jal f +; CHECK-NEXT: nop +; CHECK-NEXT: lw $ra, 20($sp) # 4-byte Folded Reload +; CHECK-NEXT: jr $ra +; CHECK-NEXT: addiu $sp, $sp, 24 +; CHECK-NEXT: $BB2_3: # %cond.false +; CHECK-NEXT: jal g +; CHECK-NEXT: nop +; CHECK-NEXT: lw $ra, 20($sp) # 4-byte Folded Reload +; CHECK-NEXT: jr $ra +; CHECK-NEXT: addiu $sp, $sp, 24 entry: %0 = icmp sle i32 %a, 0 br i1 %0, label %cond.true, label %cond.false @@ -57,11 +114,30 @@ ret i32 %2 } -; CHECK-LABEL: test4: -; CHECK: bgtzc -; CHECK-NEXT: nop define i32 @test4(i32 %a) { +; CHECK-LABEL: test4: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addiu $sp, $sp, -24 +; CHECK-NEXT: .cfi_def_cfa_offset 24 +; CHECK-NEXT: sw $ra, 20($sp) # 4-byte Folded Spill +; CHECK-NEXT: .cfi_offset 31, -4 +; CHECK-NEXT: bgtzc $4, $BB3_2 +; CHECK-NEXT: nop +; CHECK-NEXT: # %bb.1: # %entry +; CHECK-NEXT: bc $BB3_3 +; CHECK-NEXT: $BB3_2: # %cond.true +; CHECK-NEXT: jal f +; CHECK-NEXT: nop +; CHECK-NEXT: lw $ra, 20($sp) # 4-byte Folded Reload +; CHECK-NEXT: jr $ra +; CHECK-NEXT: addiu $sp, $sp, 24 +; CHECK-NEXT: $BB3_3: # %cond.false +; CHECK-NEXT: jal g +; CHECK-NEXT: nop +; CHECK-NEXT: lw $ra, 20($sp) # 4-byte Folded Reload +; CHECK-NEXT: jr $ra +; CHECK-NEXT: addiu $sp, $sp, 24 entry: %0 = icmp sgt i32 %a, 0 br i1 %0, label %cond.true, label %cond.false @@ -73,11 +149,29 @@ ret i32 %2 } -; CHECK-LABEL: test5: -; CHECK: bgezc -; CHECK-NEXT: nop - define i32 @test5(i32 %a) { +; CHECK-LABEL: test5: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addiu $sp, $sp, -24 +; CHECK-NEXT: .cfi_def_cfa_offset 24 +; CHECK-NEXT: sw $ra, 20($sp) # 4-byte Folded Spill +; CHECK-NEXT: .cfi_offset 31, -4 +; CHECK-NEXT: bgezc $4, $BB4_2 +; CHECK-NEXT: nop +; CHECK-NEXT: # %bb.1: # %entry +; CHECK-NEXT: bc $BB4_3 +; CHECK-NEXT: $BB4_2: # %cond.false +; CHECK-NEXT: jal g +; CHECK-NEXT: nop +; CHECK-NEXT: lw $ra, 20($sp) # 4-byte Folded Reload +; CHECK-NEXT: jr $ra +; CHECK-NEXT: addiu $sp, $sp, 24 +; CHECK-NEXT: $BB4_3: # %cond.true +; CHECK-NEXT: jal f +; CHECK-NEXT: nop +; CHECK-NEXT: lw $ra, 20($sp) # 4-byte Folded Reload +; CHECK-NEXT: jr $ra +; CHECK-NEXT: addiu $sp, $sp, 24 entry: %0 = icmp slt i32 %a, 0 br i1 %0, label %cond.true, label %cond.false @@ -89,11 +183,30 @@ ret i32 %2 } -; CHECK-LABEL: test6: -; CHECK: bnezc -; CHECK-NEXT: nop - define i32 @test6(i32 %a, i32 %b) { +; CHECK-LABEL: test6: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addiu $sp, $sp, -24 +; CHECK-NEXT: .cfi_def_cfa_offset 24 +; CHECK-NEXT: sw $ra, 20($sp) # 4-byte Folded Spill +; CHECK-NEXT: .cfi_offset 31, -4 +; CHECK-NEXT: sltu $1, $5, $4 +; CHECK-NEXT: bnezc $1, $BB5_2 +; CHECK-NEXT: nop +; CHECK-NEXT: # %bb.1: # %entry +; CHECK-NEXT: bc $BB5_3 +; CHECK-NEXT: $BB5_2: # %cond.true +; CHECK-NEXT: jal f +; CHECK-NEXT: nop +; CHECK-NEXT: lw $ra, 20($sp) # 4-byte Folded Reload +; CHECK-NEXT: jr $ra +; CHECK-NEXT: addiu $sp, $sp, 24 +; CHECK-NEXT: $BB5_3: # %cond.false +; CHECK-NEXT: jal g +; CHECK-NEXT: nop +; CHECK-NEXT: lw $ra, 20($sp) # 4-byte Folded Reload +; CHECK-NEXT: jr $ra +; CHECK-NEXT: addiu $sp, $sp, 24 entry: %0 = icmp ugt i32 %a, %b br i1 %0, label %cond.true, label %cond.false @@ -105,11 +218,31 @@ ret i32 %2 } -; CHECK-LABEL: test7: -; CHECK: beqzc -; CHECK-NEXT: nop define i32 @test7(i32 %a, i32 %b) { +; CHECK-LABEL: test7: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addiu $sp, $sp, -24 +; CHECK-NEXT: .cfi_def_cfa_offset 24 +; CHECK-NEXT: sw $ra, 20($sp) # 4-byte Folded Spill +; CHECK-NEXT: .cfi_offset 31, -4 +; CHECK-NEXT: sltu $1, $4, $5 +; CHECK-NEXT: bnezc $1, $BB6_2 +; CHECK-NEXT: nop +; CHECK-NEXT: # %bb.1: # %entry +; CHECK-NEXT: bc $BB6_3 +; CHECK-NEXT: $BB6_2: # %cond.false +; CHECK-NEXT: jal g +; CHECK-NEXT: nop +; CHECK-NEXT: lw $ra, 20($sp) # 4-byte Folded Reload +; CHECK-NEXT: jr $ra +; CHECK-NEXT: addiu $sp, $sp, 24 +; CHECK-NEXT: $BB6_3: # %cond.true +; CHECK-NEXT: jal f +; CHECK-NEXT: nop +; CHECK-NEXT: lw $ra, 20($sp) # 4-byte Folded Reload +; CHECK-NEXT: jr $ra +; CHECK-NEXT: addiu $sp, $sp, 24 entry: %0 = icmp uge i32 %a, %b br i1 %0, label %cond.true, label %cond.false @@ -121,11 +254,31 @@ ret i32 %2 } -; CHECK-LABEL: test8: -; CHECK: bnezc -; CHECK-NEXT: nop define i32 @test8(i32 %a, i32 %b) { +; CHECK-LABEL: test8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addiu $sp, $sp, -24 +; CHECK-NEXT: .cfi_def_cfa_offset 24 +; CHECK-NEXT: sw $ra, 20($sp) # 4-byte Folded Spill +; CHECK-NEXT: .cfi_offset 31, -4 +; CHECK-NEXT: sltu $1, $4, $5 +; CHECK-NEXT: bnezc $1, $BB7_2 +; CHECK-NEXT: nop +; CHECK-NEXT: # %bb.1: # %entry +; CHECK-NEXT: bc $BB7_3 +; CHECK-NEXT: $BB7_2: # %cond.true +; CHECK-NEXT: jal f +; CHECK-NEXT: nop +; CHECK-NEXT: lw $ra, 20($sp) # 4-byte Folded Reload +; CHECK-NEXT: jr $ra +; CHECK-NEXT: addiu $sp, $sp, 24 +; CHECK-NEXT: $BB7_3: # %cond.false +; CHECK-NEXT: jal g +; CHECK-NEXT: nop +; CHECK-NEXT: lw $ra, 20($sp) # 4-byte Folded Reload +; CHECK-NEXT: jr $ra +; CHECK-NEXT: addiu $sp, $sp, 24 entry: %0 = icmp ult i32 %a, %b br i1 %0, label %cond.true, label %cond.false @@ -137,11 +290,31 @@ ret i32 %2 } -; CHECK-LABEL: test9: -; CHECK: beqzc -; CHECK-NEXT: nop define i32 @test9(i32 %a, i32 %b) { +; CHECK-LABEL: test9: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addiu $sp, $sp, -24 +; CHECK-NEXT: .cfi_def_cfa_offset 24 +; CHECK-NEXT: sw $ra, 20($sp) # 4-byte Folded Spill +; CHECK-NEXT: .cfi_offset 31, -4 +; CHECK-NEXT: sltu $1, $5, $4 +; CHECK-NEXT: bnezc $1, $BB8_2 +; CHECK-NEXT: nop +; CHECK-NEXT: # %bb.1: # %entry +; CHECK-NEXT: bc $BB8_3 +; CHECK-NEXT: $BB8_2: # %cond.false +; CHECK-NEXT: jal g +; CHECK-NEXT: nop +; CHECK-NEXT: lw $ra, 20($sp) # 4-byte Folded Reload +; CHECK-NEXT: jr $ra +; CHECK-NEXT: addiu $sp, $sp, 24 +; CHECK-NEXT: $BB8_3: # %cond.true +; CHECK-NEXT: jal f +; CHECK-NEXT: nop +; CHECK-NEXT: lw $ra, 20($sp) # 4-byte Folded Reload +; CHECK-NEXT: jr $ra +; CHECK-NEXT: addiu $sp, $sp, 24 entry: %0 = icmp ule i32 %a, %b br i1 %0, label %cond.true, label %cond.false diff --git a/llvm/test/CodeGen/Mips/seleq.ll b/llvm/test/CodeGen/Mips/seleq.ll --- a/llvm/test/CodeGen/Mips/seleq.ll +++ b/llvm/test/CodeGen/Mips/seleq.ll @@ -1,4 +1,5 @@ -; RUN: llc -march=mipsel -mattr=mips16 -relocation-model=pic < %s | FileCheck %s -check-prefix=16 +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -march=mipsel -mattr=mips16 -relocation-model=pic < %s | FileCheck %s -check-prefix=MIPS16 @t = global i32 10, align 4 @f = global i32 199, align 4 @@ -11,6 +12,74 @@ @z4 = common global i32 0, align 4 define void @calc_seleq() nounwind { +; MIPS16-LABEL: calc_seleq: +; MIPS16: # %bb.0: # %entry +; MIPS16-NEXT: lui $2, %hi(_gp_disp) +; MIPS16-NEXT: addiu $2, $2, %lo(_gp_disp) +; MIPS16-NEXT: li $2, %hi(_gp_disp) +; MIPS16-NEXT: addiu $3, $pc, %lo(_gp_disp) +; MIPS16-NEXT: sll $2, $2, 16 +; MIPS16-NEXT: addu $2, $3, $2 +; MIPS16-NEXT: lw $4, %got(b)($2) +; MIPS16-NEXT: lw $5, 0($4) +; MIPS16-NEXT: lw $3, %got(a)($2) +; MIPS16-NEXT: lw $6, 0($3) +; MIPS16-NEXT: cmp $6, $5 +; MIPS16-NEXT: bteqz $BB0_2 # 16 bit inst +; MIPS16-NEXT: # %bb.1: # %cond.false +; MIPS16-NEXT: lw $5, %got(t)($2) +; MIPS16-NEXT: lw $5, 0($5) +; MIPS16-NEXT: b $BB0_3 # 16 bit inst +; MIPS16-NEXT: $BB0_2: # %cond.true +; MIPS16-NEXT: lw $5, %got(f)($2) +; MIPS16-NEXT: lw $5, 0($5) +; MIPS16-NEXT: $BB0_3: # %cond.end +; MIPS16-NEXT: lw $6, %got(z1)($2) +; MIPS16-NEXT: sw $5, 0($6) +; MIPS16-NEXT: lw $5, 0($3) +; MIPS16-NEXT: lw $4, 0($4) +; MIPS16-NEXT: cmp $4, $5 +; MIPS16-NEXT: bteqz $BB0_5 # 16 bit inst +; MIPS16-NEXT: # %bb.4: # %cond.false3 +; MIPS16-NEXT: lw $4, %got(t)($2) +; MIPS16-NEXT: lw $4, 0($4) +; MIPS16-NEXT: b $BB0_6 # 16 bit inst +; MIPS16-NEXT: $BB0_5: # %cond.true2 +; MIPS16-NEXT: lw $4, %got(f)($2) +; MIPS16-NEXT: lw $4, 0($4) +; MIPS16-NEXT: $BB0_6: # %cond.end4 +; MIPS16-NEXT: lw $5, %got(z2)($2) +; MIPS16-NEXT: sw $4, 0($5) +; MIPS16-NEXT: lw $5, 0($3) +; MIPS16-NEXT: lw $4, %got(c)($2) +; MIPS16-NEXT: lw $6, 0($4) +; MIPS16-NEXT: cmp $6, $5 +; MIPS16-NEXT: bteqz $BB0_8 # 16 bit inst +; MIPS16-NEXT: # %bb.7: # %cond.false8 +; MIPS16-NEXT: lw $5, %got(f)($2) +; MIPS16-NEXT: lw $5, 0($5) +; MIPS16-NEXT: b $BB0_9 # 16 bit inst +; MIPS16-NEXT: $BB0_8: # %cond.true7 +; MIPS16-NEXT: lw $5, %got(t)($2) +; MIPS16-NEXT: lw $5, 0($5) +; MIPS16-NEXT: $BB0_9: # %cond.end9 +; MIPS16-NEXT: lw $6, %got(z3)($2) +; MIPS16-NEXT: sw $5, 0($6) +; MIPS16-NEXT: lw $4, 0($4) +; MIPS16-NEXT: lw $3, 0($3) +; MIPS16-NEXT: cmp $3, $4 +; MIPS16-NEXT: bteqz $BB0_11 # 16 bit inst +; MIPS16-NEXT: # %bb.10: # %cond.false13 +; MIPS16-NEXT: lw $3, %got(f)($2) +; MIPS16-NEXT: lw $3, 0($3) +; MIPS16-NEXT: b $BB0_12 # 16 bit inst +; MIPS16-NEXT: $BB0_11: # %cond.true12 +; MIPS16-NEXT: lw $3, %got(t)($2) +; MIPS16-NEXT: lw $3, 0($3) +; MIPS16-NEXT: $BB0_12: # %cond.end14 +; MIPS16-NEXT: lw $2, %got(z4)($2) +; MIPS16-NEXT: sw $3, 0($2) +; MIPS16-NEXT: jrc $ra entry: %0 = load i32, i32* @a, align 4 %1 = load i32, i32* @b, align 4 @@ -80,16 +149,3 @@ } attributes #0 = { nounwind "target-cpu"="mips32" "target-features"="+o32,+mips32" } - -; 16: cmp ${{[0-9]+}}, ${{[0-9]+}} -; 16: btnez $BB{{[0-9]+}}_{{[0-9]}} - -; 16: cmp ${{[0-9]+}}, ${{[0-9]+}} -; 16: btnez $BB{{[0-9]+}}_{{[0-9]}} - -; 16: cmp ${{[0-9]+}}, ${{[0-9]+}} -; 16: btnez $BB{{[0-9]+}}_{{[0-9]}} - -; 16: cmp ${{[0-9]+}}, ${{[0-9]+}} -; 16: btnez $BB{{[0-9]+}}_{{[0-9]}} - diff --git a/llvm/test/CodeGen/Mips/selle.ll b/llvm/test/CodeGen/Mips/selle.ll --- a/llvm/test/CodeGen/Mips/selle.ll +++ b/llvm/test/CodeGen/Mips/selle.ll @@ -1,4 +1,5 @@ -; RUN: llc -march=mipsel -mattr=mips16 -relocation-model=pic < %s | FileCheck %s -check-prefix=16 +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -march=mipsel -mattr=mips16 -relocation-model=pic < %s | FileCheck %s -check-prefix=MIPS16 @t = global i32 10, align 4 @f = global i32 199, align 4 @@ -12,6 +13,74 @@ @.str = private unnamed_addr constant [5 x i8] c"%i \0A\00", align 1 define void @calc_z() nounwind "target-cpu"="mips16" "target-features"="+mips16,+o32" { +; MIPS16-LABEL: calc_z: +; MIPS16: # %bb.0: # %entry +; MIPS16-NEXT: lui $2, %hi(_gp_disp) +; MIPS16-NEXT: addiu $2, $2, %lo(_gp_disp) +; MIPS16-NEXT: li $2, %hi(_gp_disp) +; MIPS16-NEXT: addiu $3, $pc, %lo(_gp_disp) +; MIPS16-NEXT: sll $2, $2, 16 +; MIPS16-NEXT: addu $2, $3, $2 +; MIPS16-NEXT: lw $3, %got(a)($2) +; MIPS16-NEXT: lw $5, 0($3) +; MIPS16-NEXT: lw $4, %got(b)($2) +; MIPS16-NEXT: lw $6, 0($4) +; MIPS16-NEXT: slt $6, $5 +; MIPS16-NEXT: bteqz $BB0_2 # 16 bit inst +; MIPS16-NEXT: # %bb.1: # %cond.false +; MIPS16-NEXT: lw $5, %got(f)($2) +; MIPS16-NEXT: lw $5, 0($5) +; MIPS16-NEXT: b $BB0_3 # 16 bit inst +; MIPS16-NEXT: $BB0_2: # %cond.true +; MIPS16-NEXT: lw $5, %got(t)($2) +; MIPS16-NEXT: lw $5, 0($5) +; MIPS16-NEXT: $BB0_3: # %cond.end +; MIPS16-NEXT: lw $6, %got(z1)($2) +; MIPS16-NEXT: sw $5, 0($6) +; MIPS16-NEXT: lw $4, 0($4) +; MIPS16-NEXT: lw $5, 0($3) +; MIPS16-NEXT: slt $5, $4 +; MIPS16-NEXT: bteqz $BB0_5 # 16 bit inst +; MIPS16-NEXT: # %bb.4: # %cond.false3 +; MIPS16-NEXT: lw $4, %got(t)($2) +; MIPS16-NEXT: lw $4, 0($4) +; MIPS16-NEXT: b $BB0_6 # 16 bit inst +; MIPS16-NEXT: $BB0_5: # %cond.true2 +; MIPS16-NEXT: lw $4, %got(f)($2) +; MIPS16-NEXT: lw $4, 0($4) +; MIPS16-NEXT: $BB0_6: # %cond.end4 +; MIPS16-NEXT: lw $5, %got(z2)($2) +; MIPS16-NEXT: sw $4, 0($5) +; MIPS16-NEXT: lw $4, %got(c)($2) +; MIPS16-NEXT: lw $5, 0($4) +; MIPS16-NEXT: lw $6, 0($3) +; MIPS16-NEXT: slt $6, $5 +; MIPS16-NEXT: bteqz $BB0_8 # 16 bit inst +; MIPS16-NEXT: # %bb.7: # %cond.false8 +; MIPS16-NEXT: lw $5, %got(f)($2) +; MIPS16-NEXT: lw $5, 0($5) +; MIPS16-NEXT: b $BB0_9 # 16 bit inst +; MIPS16-NEXT: $BB0_8: # %cond.true7 +; MIPS16-NEXT: lw $5, %got(t)($2) +; MIPS16-NEXT: lw $5, 0($5) +; MIPS16-NEXT: $BB0_9: # %cond.end9 +; MIPS16-NEXT: lw $6, %got(z3)($2) +; MIPS16-NEXT: sw $5, 0($6) +; MIPS16-NEXT: lw $3, 0($3) +; MIPS16-NEXT: lw $4, 0($4) +; MIPS16-NEXT: slt $4, $3 +; MIPS16-NEXT: bteqz $BB0_11 # 16 bit inst +; MIPS16-NEXT: # %bb.10: # %cond.false13 +; MIPS16-NEXT: lw $3, %got(f)($2) +; MIPS16-NEXT: lw $3, 0($3) +; MIPS16-NEXT: b $BB0_12 # 16 bit inst +; MIPS16-NEXT: $BB0_11: # %cond.true12 +; MIPS16-NEXT: lw $3, %got(t)($2) +; MIPS16-NEXT: lw $3, 0($3) +; MIPS16-NEXT: $BB0_12: # %cond.end14 +; MIPS16-NEXT: lw $2, %got(z4)($2) +; MIPS16-NEXT: sw $3, 0($2) +; MIPS16-NEXT: jrc $ra entry: %0 = load i32, i32* @a, align 4 %1 = load i32, i32* @b, align 4 @@ -80,17 +149,6 @@ ret void } -; 16: slt ${{[0-9]+}}, ${{[0-9]+}} -; 16: btnez $BB{{[0-9]+}}_{{[0-9]}} - -; 16: slt ${{[0-9]+}}, ${{[0-9]+}} -; 16: btnez $BB{{[0-9]+}}_{{[0-9]}} - -; 16: slt ${{[0-9]+}}, ${{[0-9]+}} -; 16: btnez $BB{{[0-9]+}}_{{[0-9]}} - -; 16: slt ${{[0-9]+}}, ${{[0-9]+}} -; 16: btnez $BB{{[0-9]+}}_{{[0-9]}} attributes #0 = { nounwind "target-cpu"="mips16" "target-features"="+mips16,+o32" } attributes #1 = { "target-cpu"="mips16" "target-features"="+mips16,+o32" } diff --git a/llvm/test/CodeGen/PowerPC/brcond.ll b/llvm/test/CodeGen/PowerPC/brcond.ll --- a/llvm/test/CodeGen/PowerPC/brcond.ll +++ b/llvm/test/CodeGen/PowerPC/brcond.ll @@ -1,12 +1,22 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-unknown \ ; RUN: -ppc-reduce-cr-logicals=false < %s | FileCheck %s ; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-unknown \ ; RUN: -ppc-reduce-cr-logicals=false < %s | FileCheck %s define signext i32 @testi32slt(i32 signext %c1, i32 signext %c2, i32 signext %c3, i32 signext %c4, i32 signext %a1, i32 signext %a2) #0 { -; CHECK-LABEL: testi32slt -; CHECK: crorc [[REG:[0-9]+]], 2, 6 -; CHECK: bc 12, [[REG]], {{\.[a-zA-Z0-9_]+}} +; CHECK-LABEL: testi32slt: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: cmpw 5, 6 +; CHECK-NEXT: cmpw 1, 3, 4 +; CHECK-NEXT: crorc 20, 2, 6 +; CHECK-NEXT: bc 12, 20, .LBB0_2 +; CHECK-NEXT: # %bb.1: # %iftrue +; CHECK-NEXT: extsw 3, 7 +; CHECK-NEXT: blr +; CHECK-NEXT: .LBB0_2: # %iffalse +; CHECK-NEXT: extsw 3, 8 +; CHECK-NEXT: blr entry: %cmp1 = icmp eq i32 %c3, %c4 %cmp3tmp = icmp eq i32 %c1, %c2 @@ -19,9 +29,18 @@ } define signext i32 @testi32ult(i32 signext %c1, i32 signext %c2, i32 signext %c3, i32 signext %c4, i32 signext %a1, i32 signext %a2) #0 { -; CHECK-LABEL: testi32ult -; CHECK: crorc [[REG:[0-9]+]], 6, 2 -; CHECK: bc 12, [[REG]], {{\.[a-zA-Z0-9_]+}} +; CHECK-LABEL: testi32ult: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: cmpw 5, 6 +; CHECK-NEXT: cmpw 1, 3, 4 +; CHECK-NEXT: crorc 20, 6, 2 +; CHECK-NEXT: bc 12, 20, .LBB1_2 +; CHECK-NEXT: # %bb.1: # %iftrue +; CHECK-NEXT: extsw 3, 7 +; CHECK-NEXT: blr +; CHECK-NEXT: .LBB1_2: # %iffalse +; CHECK-NEXT: extsw 3, 8 +; CHECK-NEXT: blr entry: %cmp1 = icmp eq i32 %c3, %c4 %cmp3tmp = icmp eq i32 %c1, %c2 @@ -34,9 +53,18 @@ } define signext i32 @testi32sle(i32 signext %c1, i32 signext %c2, i32 signext %c3, i32 signext %c4, i32 signext %a1, i32 signext %a2) #0 { -; CHECK-LABEL: testi32sle -; CHECK: crandc [[REG:[0-9]+]], 2, 6 -; CHECK: bc 12, [[REG]], {{\.[a-zA-Z0-9_]+}} +; CHECK-LABEL: testi32sle: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: cmpw 5, 6 +; CHECK-NEXT: cmpw 1, 3, 4 +; CHECK-NEXT: crandc 20, 2, 6 +; CHECK-NEXT: bc 4, 20, .LBB2_2 +; CHECK-NEXT: # %bb.1: # %iffalse +; CHECK-NEXT: extsw 3, 8 +; CHECK-NEXT: blr +; CHECK-NEXT: .LBB2_2: # %iftrue +; CHECK-NEXT: extsw 3, 7 +; CHECK-NEXT: blr entry: %cmp1 = icmp eq i32 %c3, %c4 %cmp3tmp = icmp eq i32 %c1, %c2 @@ -49,9 +77,18 @@ } define signext i32 @testi32ule(i32 signext %c1, i32 signext %c2, i32 signext %c3, i32 signext %c4, i32 signext %a1, i32 signext %a2) #0 { -; CHECK-LABEL: testi32ule -; CHECK: crandc [[REG:[0-9]+]], 6, 2 -; CHECK: bc 12, [[REG]], {{\.[a-zA-Z0-9_]+}} +; CHECK-LABEL: testi32ule: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: cmpw 5, 6 +; CHECK-NEXT: cmpw 1, 3, 4 +; CHECK-NEXT: crandc 20, 6, 2 +; CHECK-NEXT: bc 4, 20, .LBB3_2 +; CHECK-NEXT: # %bb.1: # %iffalse +; CHECK-NEXT: extsw 3, 8 +; CHECK-NEXT: blr +; CHECK-NEXT: .LBB3_2: # %iftrue +; CHECK-NEXT: extsw 3, 7 +; CHECK-NEXT: blr entry: %cmp1 = icmp eq i32 %c3, %c4 %cmp3tmp = icmp eq i32 %c1, %c2 @@ -65,8 +102,17 @@ define signext i32 @testi32eq(i32 signext %c1, i32 signext %c2, i32 signext %c3, i32 signext %c4, i32 signext %a1, i32 signext %a2) #0 { ; CHECK-LABEL: testi32eq: -; CHECK: crxor [[REG:[0-9]+]], 6, 2 -; CHECK: bc 12, [[REG]], {{\.[a-zA-Z0-9_]+}} +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: cmpw 5, 6 +; CHECK-NEXT: cmpw 1, 3, 4 +; CHECK-NEXT: crxor 20, 6, 2 +; CHECK-NEXT: bc 4, 20, .LBB4_2 +; CHECK-NEXT: # %bb.1: # %iffalse +; CHECK-NEXT: extsw 3, 8 +; CHECK-NEXT: blr +; CHECK-NEXT: .LBB4_2: # %iftrue +; CHECK-NEXT: extsw 3, 7 +; CHECK-NEXT: blr entry: %cmp1 = icmp eq i32 %c3, %c4 %cmp3tmp = icmp eq i32 %c1, %c2 @@ -80,8 +126,17 @@ define signext i32 @testi32sge(i32 signext %c1, i32 signext %c2, i32 signext %c3, i32 signext %c4, i32 signext %a1, i32 signext %a2) #0 { ; CHECK-LABEL: testi32sge: -; CHECK: crandc [[REG:[0-9]+]], 6, 2 -; CHECK: bc 12, [[REG]], {{\.[a-zA-Z0-9_]+}} +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: cmpw 5, 6 +; CHECK-NEXT: cmpw 1, 3, 4 +; CHECK-NEXT: crandc 20, 6, 2 +; CHECK-NEXT: bc 4, 20, .LBB5_2 +; CHECK-NEXT: # %bb.1: # %iffalse +; CHECK-NEXT: extsw 3, 8 +; CHECK-NEXT: blr +; CHECK-NEXT: .LBB5_2: # %iftrue +; CHECK-NEXT: extsw 3, 7 +; CHECK-NEXT: blr entry: %cmp1 = icmp eq i32 %c3, %c4 %cmp3tmp = icmp eq i32 %c1, %c2 @@ -95,8 +150,17 @@ define signext i32 @testi32uge(i32 signext %c1, i32 signext %c2, i32 signext %c3, i32 signext %c4, i32 signext %a1, i32 signext %a2) #0 { ; CHECK-LABEL: testi32uge: -; CHECK: crandc [[REG:[0-9]+]], 2, 6 -; CHECK: bc 12, [[REG]], {{\.[a-zA-Z0-9_]+}} +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: cmpw 5, 6 +; CHECK-NEXT: cmpw 1, 3, 4 +; CHECK-NEXT: crandc 20, 2, 6 +; CHECK-NEXT: bc 4, 20, .LBB6_2 +; CHECK-NEXT: # %bb.1: # %iffalse +; CHECK-NEXT: extsw 3, 8 +; CHECK-NEXT: blr +; CHECK-NEXT: .LBB6_2: # %iftrue +; CHECK-NEXT: extsw 3, 7 +; CHECK-NEXT: blr entry: %cmp1 = icmp eq i32 %c3, %c4 %cmp3tmp = icmp eq i32 %c1, %c2 @@ -110,8 +174,17 @@ define signext i32 @testi32sgt(i32 signext %c1, i32 signext %c2, i32 signext %c3, i32 signext %c4, i32 signext %a1, i32 signext %a2) #0 { ; CHECK-LABEL: testi32sgt: -; CHECK: crorc [[REG:[0-9]+]], 6, 2 -; CHECK: bc 12, [[REG]], {{\.[a-zA-Z0-9_]+}} +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: cmpw 5, 6 +; CHECK-NEXT: cmpw 1, 3, 4 +; CHECK-NEXT: crorc 20, 6, 2 +; CHECK-NEXT: bc 12, 20, .LBB7_2 +; CHECK-NEXT: # %bb.1: # %iftrue +; CHECK-NEXT: extsw 3, 7 +; CHECK-NEXT: blr +; CHECK-NEXT: .LBB7_2: # %iffalse +; CHECK-NEXT: extsw 3, 8 +; CHECK-NEXT: blr entry: %cmp1 = icmp eq i32 %c3, %c4 %cmp3tmp = icmp eq i32 %c1, %c2 @@ -125,8 +198,17 @@ define signext i32 @testi32ugt(i32 signext %c1, i32 signext %c2, i32 signext %c3, i32 signext %c4, i32 signext %a1, i32 signext %a2) #0 { ; CHECK-LABEL: testi32ugt: -; CHECK: crorc [[REG:[0-9]+]], 2, 6 -; CHECK: bc 12, [[REG]], {{\.[a-zA-Z0-9_]+}} +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: cmpw 5, 6 +; CHECK-NEXT: cmpw 1, 3, 4 +; CHECK-NEXT: crorc 20, 2, 6 +; CHECK-NEXT: bc 12, 20, .LBB8_2 +; CHECK-NEXT: # %bb.1: # %iftrue +; CHECK-NEXT: extsw 3, 7 +; CHECK-NEXT: blr +; CHECK-NEXT: .LBB8_2: # %iffalse +; CHECK-NEXT: extsw 3, 8 +; CHECK-NEXT: blr entry: %cmp1 = icmp eq i32 %c3, %c4 %cmp3tmp = icmp eq i32 %c1, %c2 @@ -140,8 +222,17 @@ define signext i32 @testi32ne(i32 signext %c1, i32 signext %c2, i32 signext %c3, i32 signext %c4, i32 signext %a1, i32 signext %a2) #0 { ; CHECK-LABEL: testi32ne: -; CHECK: creqv [[REG:[0-9]+]], 6, 2 -; CHECK: bc 12, [[REG]], {{\.[a-zA-Z0-9_]+}} +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: cmpw 5, 6 +; CHECK-NEXT: cmpw 1, 3, 4 +; CHECK-NEXT: creqv 20, 6, 2 +; CHECK-NEXT: bc 12, 20, .LBB9_2 +; CHECK-NEXT: # %bb.1: # %iftrue +; CHECK-NEXT: extsw 3, 7 +; CHECK-NEXT: blr +; CHECK-NEXT: .LBB9_2: # %iffalse +; CHECK-NEXT: extsw 3, 8 +; CHECK-NEXT: blr entry: %cmp1 = icmp eq i32 %c3, %c4 %cmp3tmp = icmp eq i32 %c1, %c2 @@ -154,9 +245,18 @@ } define i64 @testi64slt(i64 %c1, i64 %c2, i64 %c3, i64 %c4, i64 %a1, i64 %a2) #0 { -; CHECK-LABEL: testi64slt -; CHECK: crorc [[REG:[0-9]+]], 2, 6 -; CHECK: bc 12, [[REG]], {{\.[a-zA-Z0-9_]+}} +; CHECK-LABEL: testi64slt: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: cmpd 5, 6 +; CHECK-NEXT: cmpd 1, 3, 4 +; CHECK-NEXT: crorc 20, 2, 6 +; CHECK-NEXT: bc 12, 20, .LBB10_2 +; CHECK-NEXT: # %bb.1: # %iftrue +; CHECK-NEXT: mr 3, 7 +; CHECK-NEXT: blr +; CHECK-NEXT: .LBB10_2: # %iffalse +; CHECK-NEXT: mr 3, 8 +; CHECK-NEXT: blr entry: %cmp1 = icmp eq i64 %c3, %c4 %cmp3tmp = icmp eq i64 %c1, %c2 @@ -169,9 +269,18 @@ } define i64 @testi64ult(i64 %c1, i64 %c2, i64 %c3, i64 %c4, i64 %a1, i64 %a2) #0 { -; CHECK-LABEL: testi64ult -; CHECK: crorc [[REG:[0-9]+]], 6, 2 -; CHECK: bc 12, [[REG]], {{\.[a-zA-Z0-9_]+}} +; CHECK-LABEL: testi64ult: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: cmpd 5, 6 +; CHECK-NEXT: cmpd 1, 3, 4 +; CHECK-NEXT: crorc 20, 6, 2 +; CHECK-NEXT: bc 12, 20, .LBB11_2 +; CHECK-NEXT: # %bb.1: # %iftrue +; CHECK-NEXT: mr 3, 7 +; CHECK-NEXT: blr +; CHECK-NEXT: .LBB11_2: # %iffalse +; CHECK-NEXT: mr 3, 8 +; CHECK-NEXT: blr entry: %cmp1 = icmp eq i64 %c3, %c4 %cmp3tmp = icmp eq i64 %c1, %c2 @@ -184,9 +293,18 @@ } define i64 @testi64sle(i64 %c1, i64 %c2, i64 %c3, i64 %c4, i64 %a1, i64 %a2) #0 { -; CHECK-LABEL: testi64sle -; CHECK: crandc [[REG:[0-9]+]], 2, 6 -; CHECK: bc 12, [[REG]], {{\.[a-zA-Z0-9_]+}} +; CHECK-LABEL: testi64sle: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: cmpd 5, 6 +; CHECK-NEXT: cmpd 1, 3, 4 +; CHECK-NEXT: crandc 20, 2, 6 +; CHECK-NEXT: bc 4, 20, .LBB12_2 +; CHECK-NEXT: # %bb.1: # %iffalse +; CHECK-NEXT: mr 3, 8 +; CHECK-NEXT: blr +; CHECK-NEXT: .LBB12_2: # %iftrue +; CHECK-NEXT: mr 3, 7 +; CHECK-NEXT: blr entry: %cmp1 = icmp eq i64 %c3, %c4 %cmp3tmp = icmp eq i64 %c1, %c2 @@ -199,9 +317,18 @@ } define i64 @testi64ule(i64 %c1, i64 %c2, i64 %c3, i64 %c4, i64 %a1, i64 %a2) #0 { -; CHECK-LABEL: testi64ule -; CHECK: crandc [[REG:[0-9]+]], 6, 2 -; CHECK: bc 12, [[REG]], {{\.[a-zA-Z0-9_]+}} +; CHECK-LABEL: testi64ule: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: cmpd 5, 6 +; CHECK-NEXT: cmpd 1, 3, 4 +; CHECK-NEXT: crandc 20, 6, 2 +; CHECK-NEXT: bc 4, 20, .LBB13_2 +; CHECK-NEXT: # %bb.1: # %iffalse +; CHECK-NEXT: mr 3, 8 +; CHECK-NEXT: blr +; CHECK-NEXT: .LBB13_2: # %iftrue +; CHECK-NEXT: mr 3, 7 +; CHECK-NEXT: blr entry: %cmp1 = icmp eq i64 %c3, %c4 %cmp3tmp = icmp eq i64 %c1, %c2 @@ -214,9 +341,18 @@ } define i64 @testi64eq(i64 %c1, i64 %c2, i64 %c3, i64 %c4, i64 %a1, i64 %a2) #0 { -; CHECK-LABEL: testi64eq -; CHECK: crxor [[REG:[0-9]+]], 6, 2 -; CHECK: bc 12, [[REG]], {{\.[a-zA-Z0-9_]+}} +; CHECK-LABEL: testi64eq: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: cmpd 5, 6 +; CHECK-NEXT: cmpd 1, 3, 4 +; CHECK-NEXT: crxor 20, 6, 2 +; CHECK-NEXT: bc 4, 20, .LBB14_2 +; CHECK-NEXT: # %bb.1: # %iffalse +; CHECK-NEXT: mr 3, 8 +; CHECK-NEXT: blr +; CHECK-NEXT: .LBB14_2: # %iftrue +; CHECK-NEXT: mr 3, 7 +; CHECK-NEXT: blr entry: %cmp1 = icmp eq i64 %c3, %c4 %cmp3tmp = icmp eq i64 %c1, %c2 @@ -229,9 +365,18 @@ } define i64 @testi64sge(i64 %c1, i64 %c2, i64 %c3, i64 %c4, i64 %a1, i64 %a2) #0 { -; CHECK-LABEL: testi64sge -; CHECK: crandc [[REG:[0-9]+]], 6, 2 -; CHECK: bc 12, [[REG]], {{\.[a-zA-Z0-9_]+}} +; CHECK-LABEL: testi64sge: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: cmpd 5, 6 +; CHECK-NEXT: cmpd 1, 3, 4 +; CHECK-NEXT: crandc 20, 6, 2 +; CHECK-NEXT: bc 4, 20, .LBB15_2 +; CHECK-NEXT: # %bb.1: # %iffalse +; CHECK-NEXT: mr 3, 8 +; CHECK-NEXT: blr +; CHECK-NEXT: .LBB15_2: # %iftrue +; CHECK-NEXT: mr 3, 7 +; CHECK-NEXT: blr entry: %cmp1 = icmp eq i64 %c3, %c4 %cmp3tmp = icmp eq i64 %c1, %c2 @@ -244,9 +389,18 @@ } define i64 @testi64uge(i64 %c1, i64 %c2, i64 %c3, i64 %c4, i64 %a1, i64 %a2) #0 { -; CHECK-LABEL: testi64uge -; CHECK: crandc [[REG:[0-9]+]], 2, 6 -; CHECK: bc 12, [[REG]], {{\.[a-zA-Z0-9_]+}} +; CHECK-LABEL: testi64uge: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: cmpd 5, 6 +; CHECK-NEXT: cmpd 1, 3, 4 +; CHECK-NEXT: crandc 20, 2, 6 +; CHECK-NEXT: bc 4, 20, .LBB16_2 +; CHECK-NEXT: # %bb.1: # %iffalse +; CHECK-NEXT: mr 3, 8 +; CHECK-NEXT: blr +; CHECK-NEXT: .LBB16_2: # %iftrue +; CHECK-NEXT: mr 3, 7 +; CHECK-NEXT: blr entry: %cmp1 = icmp eq i64 %c3, %c4 %cmp3tmp = icmp eq i64 %c1, %c2 @@ -259,9 +413,18 @@ } define i64 @testi64sgt(i64 %c1, i64 %c2, i64 %c3, i64 %c4, i64 %a1, i64 %a2) #0 { -; CHECK-LABEL: testi64sgt -; CHECK: crorc [[REG:[0-9]+]], 6, 2 -; CHECK: bc 12, [[REG]], {{\.[a-zA-Z0-9_]+}} +; CHECK-LABEL: testi64sgt: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: cmpd 5, 6 +; CHECK-NEXT: cmpd 1, 3, 4 +; CHECK-NEXT: crorc 20, 6, 2 +; CHECK-NEXT: bc 12, 20, .LBB17_2 +; CHECK-NEXT: # %bb.1: # %iftrue +; CHECK-NEXT: mr 3, 7 +; CHECK-NEXT: blr +; CHECK-NEXT: .LBB17_2: # %iffalse +; CHECK-NEXT: mr 3, 8 +; CHECK-NEXT: blr entry: %cmp1 = icmp eq i64 %c3, %c4 %cmp3tmp = icmp eq i64 %c1, %c2 @@ -274,9 +437,18 @@ } define i64 @testi64ugt(i64 %c1, i64 %c2, i64 %c3, i64 %c4, i64 %a1, i64 %a2) #0 { -; CHECK-LABEL: testi64ugt -; CHECK: crorc [[REG:[0-9]+]], 2, 6 -; CHECK: bc 12, [[REG]], {{\.[a-zA-Z0-9_]+}} +; CHECK-LABEL: testi64ugt: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: cmpd 5, 6 +; CHECK-NEXT: cmpd 1, 3, 4 +; CHECK-NEXT: crorc 20, 2, 6 +; CHECK-NEXT: bc 12, 20, .LBB18_2 +; CHECK-NEXT: # %bb.1: # %iftrue +; CHECK-NEXT: mr 3, 7 +; CHECK-NEXT: blr +; CHECK-NEXT: .LBB18_2: # %iffalse +; CHECK-NEXT: mr 3, 8 +; CHECK-NEXT: blr entry: %cmp1 = icmp eq i64 %c3, %c4 %cmp3tmp = icmp eq i64 %c1, %c2 @@ -289,9 +461,18 @@ } define i64 @testi64ne(i64 %c1, i64 %c2, i64 %c3, i64 %c4, i64 %a1, i64 %a2) #0 { -; CHECK-LABEL: testi64ne -; CHECK: creqv [[REG:[0-9]+]], 6, 2 -; CHECK: bc 12, [[REG]], {{\.[a-zA-Z0-9_]+}} +; CHECK-LABEL: testi64ne: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: cmpd 5, 6 +; CHECK-NEXT: cmpd 1, 3, 4 +; CHECK-NEXT: creqv 20, 6, 2 +; CHECK-NEXT: bc 12, 20, .LBB19_2 +; CHECK-NEXT: # %bb.1: # %iftrue +; CHECK-NEXT: mr 3, 7 +; CHECK-NEXT: blr +; CHECK-NEXT: .LBB19_2: # %iffalse +; CHECK-NEXT: mr 3, 8 +; CHECK-NEXT: blr entry: %cmp1 = icmp eq i64 %c3, %c4 %cmp3tmp = icmp eq i64 %c1, %c2 @@ -304,9 +485,18 @@ } define float @testfloatslt(float %c1, float %c2, float %c3, float %c4, float %a1, float %a2) #0 { -; CHECK-LABEL: testfloatslt -; CHECK: crorc [[REG:[0-9]+]], 2, 6 -; CHECK: bc 12, [[REG]], {{\.[a-zA-Z0-9_]+}} +; CHECK-LABEL: testfloatslt: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fcmpu 0, 3, 4 +; CHECK-NEXT: fcmpu 1, 1, 2 +; CHECK-NEXT: crorc 20, 2, 6 +; CHECK-NEXT: bc 12, 20, .LBB20_2 +; CHECK-NEXT: # %bb.1: # %iftrue +; CHECK-NEXT: fmr 1, 5 +; CHECK-NEXT: blr +; CHECK-NEXT: .LBB20_2: # %iffalse +; CHECK-NEXT: fmr 1, 6 +; CHECK-NEXT: blr entry: %cmp1 = fcmp oeq float %c3, %c4 %cmp3tmp = fcmp oeq float %c1, %c2 @@ -319,9 +509,18 @@ } define float @testfloatult(float %c1, float %c2, float %c3, float %c4, float %a1, float %a2) #0 { -; CHECK-LABEL: testfloatult -; CHECK: crorc [[REG:[0-9]+]], 6, 2 -; CHECK: bc 12, [[REG]], {{\.[a-zA-Z0-9_]+}} +; CHECK-LABEL: testfloatult: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fcmpu 0, 3, 4 +; CHECK-NEXT: fcmpu 1, 1, 2 +; CHECK-NEXT: crorc 20, 6, 2 +; CHECK-NEXT: bc 12, 20, .LBB21_2 +; CHECK-NEXT: # %bb.1: # %iftrue +; CHECK-NEXT: fmr 1, 5 +; CHECK-NEXT: blr +; CHECK-NEXT: .LBB21_2: # %iffalse +; CHECK-NEXT: fmr 1, 6 +; CHECK-NEXT: blr entry: %cmp1 = fcmp oeq float %c3, %c4 %cmp3tmp = fcmp oeq float %c1, %c2 @@ -334,9 +533,18 @@ } define float @testfloatsle(float %c1, float %c2, float %c3, float %c4, float %a1, float %a2) #0 { -; CHECK-LABEL: testfloatsle -; CHECK: crandc [[REG:[0-9]+]], 2, 6 -; CHECK: bc 12, [[REG]], {{\.[a-zA-Z0-9_]+}} +; CHECK-LABEL: testfloatsle: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fcmpu 0, 3, 4 +; CHECK-NEXT: fcmpu 1, 1, 2 +; CHECK-NEXT: crandc 20, 2, 6 +; CHECK-NEXT: bc 4, 20, .LBB22_2 +; CHECK-NEXT: # %bb.1: # %iffalse +; CHECK-NEXT: fmr 1, 6 +; CHECK-NEXT: blr +; CHECK-NEXT: .LBB22_2: # %iftrue +; CHECK-NEXT: fmr 1, 5 +; CHECK-NEXT: blr entry: %cmp1 = fcmp oeq float %c3, %c4 %cmp3tmp = fcmp oeq float %c1, %c2 @@ -349,9 +557,18 @@ } define float @testfloatule(float %c1, float %c2, float %c3, float %c4, float %a1, float %a2) #0 { -; CHECK-LABEL: testfloatule -; CHECK: crandc [[REG:[0-9]+]], 6, 2 -; CHECK: bc 12, [[REG]], {{\.[a-zA-Z0-9_]+}} +; CHECK-LABEL: testfloatule: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fcmpu 0, 3, 4 +; CHECK-NEXT: fcmpu 1, 1, 2 +; CHECK-NEXT: crandc 20, 6, 2 +; CHECK-NEXT: bc 4, 20, .LBB23_2 +; CHECK-NEXT: # %bb.1: # %iffalse +; CHECK-NEXT: fmr 1, 6 +; CHECK-NEXT: blr +; CHECK-NEXT: .LBB23_2: # %iftrue +; CHECK-NEXT: fmr 1, 5 +; CHECK-NEXT: blr entry: %cmp1 = fcmp oeq float %c3, %c4 %cmp3tmp = fcmp oeq float %c1, %c2 @@ -364,9 +581,18 @@ } define float @testfloateq(float %c1, float %c2, float %c3, float %c4, float %a1, float %a2) #0 { -; CHECK-LABEL: testfloateq -; CHECK: crxor [[REG:[0-9]+]], 6, 2 -; CHECK: bc 12, [[REG]], {{\.[a-zA-Z0-9_]+}} +; CHECK-LABEL: testfloateq: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fcmpu 0, 3, 4 +; CHECK-NEXT: fcmpu 1, 1, 2 +; CHECK-NEXT: crxor 20, 6, 2 +; CHECK-NEXT: bc 4, 20, .LBB24_2 +; CHECK-NEXT: # %bb.1: # %iffalse +; CHECK-NEXT: fmr 1, 6 +; CHECK-NEXT: blr +; CHECK-NEXT: .LBB24_2: # %iftrue +; CHECK-NEXT: fmr 1, 5 +; CHECK-NEXT: blr entry: %cmp1 = fcmp oeq float %c3, %c4 %cmp3tmp = fcmp oeq float %c1, %c2 @@ -379,9 +605,18 @@ } define float @testfloatsge(float %c1, float %c2, float %c3, float %c4, float %a1, float %a2) #0 { -; CHECK-LABEL: testfloatsge -; CHECK: crandc [[REG:[0-9]+]], 6, 2 -; CHECK: bc 12, [[REG]], {{\.[a-zA-Z0-9_]+}} +; CHECK-LABEL: testfloatsge: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fcmpu 0, 3, 4 +; CHECK-NEXT: fcmpu 1, 1, 2 +; CHECK-NEXT: crandc 20, 6, 2 +; CHECK-NEXT: bc 4, 20, .LBB25_2 +; CHECK-NEXT: # %bb.1: # %iffalse +; CHECK-NEXT: fmr 1, 6 +; CHECK-NEXT: blr +; CHECK-NEXT: .LBB25_2: # %iftrue +; CHECK-NEXT: fmr 1, 5 +; CHECK-NEXT: blr entry: %cmp1 = fcmp oeq float %c3, %c4 %cmp3tmp = fcmp oeq float %c1, %c2 @@ -394,9 +629,18 @@ } define float @testfloatuge(float %c1, float %c2, float %c3, float %c4, float %a1, float %a2) #0 { -; CHECK-LABEL: testfloatuge -; CHECK: crandc [[REG:[0-9]+]], 2, 6 -; CHECK: bc 12, [[REG]], {{\.[a-zA-Z0-9_]+}} +; CHECK-LABEL: testfloatuge: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fcmpu 0, 3, 4 +; CHECK-NEXT: fcmpu 1, 1, 2 +; CHECK-NEXT: crandc 20, 2, 6 +; CHECK-NEXT: bc 4, 20, .LBB26_2 +; CHECK-NEXT: # %bb.1: # %iffalse +; CHECK-NEXT: fmr 1, 6 +; CHECK-NEXT: blr +; CHECK-NEXT: .LBB26_2: # %iftrue +; CHECK-NEXT: fmr 1, 5 +; CHECK-NEXT: blr entry: %cmp1 = fcmp oeq float %c3, %c4 %cmp3tmp = fcmp oeq float %c1, %c2 @@ -409,9 +653,18 @@ } define float @testfloatsgt(float %c1, float %c2, float %c3, float %c4, float %a1, float %a2) #0 { -; CHECK-LABEL: testfloatsgt -; CHECK: crorc [[REG:[0-9]+]], 6, 2 -; CHECK: bc 12, [[REG]], {{\.[a-zA-Z0-9_]+}} +; CHECK-LABEL: testfloatsgt: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fcmpu 0, 3, 4 +; CHECK-NEXT: fcmpu 1, 1, 2 +; CHECK-NEXT: crorc 20, 6, 2 +; CHECK-NEXT: bc 12, 20, .LBB27_2 +; CHECK-NEXT: # %bb.1: # %iftrue +; CHECK-NEXT: fmr 1, 5 +; CHECK-NEXT: blr +; CHECK-NEXT: .LBB27_2: # %iffalse +; CHECK-NEXT: fmr 1, 6 +; CHECK-NEXT: blr entry: %cmp1 = fcmp oeq float %c3, %c4 %cmp3tmp = fcmp oeq float %c1, %c2 @@ -424,9 +677,18 @@ } define float @testfloatugt(float %c1, float %c2, float %c3, float %c4, float %a1, float %a2) #0 { -; CHECK-LABEL: testfloatugt -; CHECK: crorc [[REG:[0-9]+]], 2, 6 -; CHECK: bc 12, [[REG]], {{\.[a-zA-Z0-9_]+}} +; CHECK-LABEL: testfloatugt: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fcmpu 0, 3, 4 +; CHECK-NEXT: fcmpu 1, 1, 2 +; CHECK-NEXT: crorc 20, 2, 6 +; CHECK-NEXT: bc 12, 20, .LBB28_2 +; CHECK-NEXT: # %bb.1: # %iftrue +; CHECK-NEXT: fmr 1, 5 +; CHECK-NEXT: blr +; CHECK-NEXT: .LBB28_2: # %iffalse +; CHECK-NEXT: fmr 1, 6 +; CHECK-NEXT: blr entry: %cmp1 = fcmp oeq float %c3, %c4 %cmp3tmp = fcmp oeq float %c1, %c2 @@ -439,9 +701,18 @@ } define float @testfloatne(float %c1, float %c2, float %c3, float %c4, float %a1, float %a2) #0 { -; CHECK-LABEL: testfloatne -; CHECK: creqv [[REG:[0-9]+]], 6, 2 -; CHECK: bc 12, [[REG]], {{\.[a-zA-Z0-9_]+}} +; CHECK-LABEL: testfloatne: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fcmpu 0, 3, 4 +; CHECK-NEXT: fcmpu 1, 1, 2 +; CHECK-NEXT: creqv 20, 6, 2 +; CHECK-NEXT: bc 12, 20, .LBB29_2 +; CHECK-NEXT: # %bb.1: # %iftrue +; CHECK-NEXT: fmr 1, 5 +; CHECK-NEXT: blr +; CHECK-NEXT: .LBB29_2: # %iffalse +; CHECK-NEXT: fmr 1, 6 +; CHECK-NEXT: blr entry: %cmp1 = fcmp oeq float %c3, %c4 %cmp3tmp = fcmp oeq float %c1, %c2 @@ -454,9 +725,18 @@ } define double @testdoubleslt(double %c1, double %c2, double %c3, double %c4, double %a1, double %a2) #0 { -; CHECK-LABEL: testdoubleslt -; CHECK: crorc [[REG:[0-9]+]], 2, 6 -; CHECK: bc 12, [[REG]], {{\.[a-zA-Z0-9_]+}} +; CHECK-LABEL: testdoubleslt: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fcmpu 0, 3, 4 +; CHECK-NEXT: fcmpu 1, 1, 2 +; CHECK-NEXT: crorc 20, 2, 6 +; CHECK-NEXT: bc 12, 20, .LBB30_2 +; CHECK-NEXT: # %bb.1: # %iftrue +; CHECK-NEXT: fmr 1, 5 +; CHECK-NEXT: blr +; CHECK-NEXT: .LBB30_2: # %iffalse +; CHECK-NEXT: fmr 1, 6 +; CHECK-NEXT: blr entry: %cmp1 = fcmp oeq double %c3, %c4 %cmp3tmp = fcmp oeq double %c1, %c2 @@ -470,8 +750,17 @@ define double @testdoubleult(double %c1, double %c2, double %c3, double %c4, double %a1, double %a2) #0 { ; CHECK-LABEL: testdoubleult: -; CHECK: crorc [[REG:[0-9]+]], 6, 2 -; CHECK: bc 12, [[REG]], {{\.[a-zA-Z0-9_]+}} +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fcmpu 0, 3, 4 +; CHECK-NEXT: fcmpu 1, 1, 2 +; CHECK-NEXT: crorc 20, 6, 2 +; CHECK-NEXT: bc 12, 20, .LBB31_2 +; CHECK-NEXT: # %bb.1: # %iftrue +; CHECK-NEXT: fmr 1, 5 +; CHECK-NEXT: blr +; CHECK-NEXT: .LBB31_2: # %iffalse +; CHECK-NEXT: fmr 1, 6 +; CHECK-NEXT: blr entry: %cmp1 = fcmp oeq double %c3, %c4 %cmp3tmp = fcmp oeq double %c1, %c2 @@ -484,9 +773,18 @@ } define double @testdoublesle(double %c1, double %c2, double %c3, double %c4, double %a1, double %a2) #0 { -; CHECK-LABEL: testdoublesle -; CHECK: crandc [[REG:[0-9]+]], 2, 6 -; CHECK: bc 12, [[REG]], {{\.[a-zA-Z0-9_]+}} +; CHECK-LABEL: testdoublesle: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fcmpu 0, 3, 4 +; CHECK-NEXT: fcmpu 1, 1, 2 +; CHECK-NEXT: crandc 20, 2, 6 +; CHECK-NEXT: bc 4, 20, .LBB32_2 +; CHECK-NEXT: # %bb.1: # %iffalse +; CHECK-NEXT: fmr 1, 6 +; CHECK-NEXT: blr +; CHECK-NEXT: .LBB32_2: # %iftrue +; CHECK-NEXT: fmr 1, 5 +; CHECK-NEXT: blr entry: %cmp1 = fcmp oeq double %c3, %c4 %cmp3tmp = fcmp oeq double %c1, %c2 @@ -500,8 +798,17 @@ define double @testdoubleule(double %c1, double %c2, double %c3, double %c4, double %a1, double %a2) #0 { ; CHECK-LABEL: testdoubleule: -; CHECK: crandc [[REG:[0-9]+]], 6, 2 -; CHECK: bc 12, [[REG]], {{\.[a-zA-Z0-9_]+}} +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fcmpu 0, 3, 4 +; CHECK-NEXT: fcmpu 1, 1, 2 +; CHECK-NEXT: crandc 20, 6, 2 +; CHECK-NEXT: bc 4, 20, .LBB33_2 +; CHECK-NEXT: # %bb.1: # %iffalse +; CHECK-NEXT: fmr 1, 6 +; CHECK-NEXT: blr +; CHECK-NEXT: .LBB33_2: # %iftrue +; CHECK-NEXT: fmr 1, 5 +; CHECK-NEXT: blr entry: %cmp1 = fcmp oeq double %c3, %c4 %cmp3tmp = fcmp oeq double %c1, %c2 @@ -514,9 +821,18 @@ } define double @testdoubleeq(double %c1, double %c2, double %c3, double %c4, double %a1, double %a2) #0 { -; CHECK-LABEL: testdoubleeq -; CHECK: crxor [[REG:[0-9]+]], 6, 2 -; CHECK: bc 12, [[REG]], {{\.[a-zA-Z0-9_]+}} +; CHECK-LABEL: testdoubleeq: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fcmpu 0, 3, 4 +; CHECK-NEXT: fcmpu 1, 1, 2 +; CHECK-NEXT: crxor 20, 6, 2 +; CHECK-NEXT: bc 4, 20, .LBB34_2 +; CHECK-NEXT: # %bb.1: # %iffalse +; CHECK-NEXT: fmr 1, 6 +; CHECK-NEXT: blr +; CHECK-NEXT: .LBB34_2: # %iftrue +; CHECK-NEXT: fmr 1, 5 +; CHECK-NEXT: blr entry: %cmp1 = fcmp oeq double %c3, %c4 %cmp3tmp = fcmp oeq double %c1, %c2 @@ -529,9 +845,18 @@ } define double @testdoublesge(double %c1, double %c2, double %c3, double %c4, double %a1, double %a2) #0 { -; CHECK-LABEL: testdoublesge -; CHECK: crandc [[REG:[0-9]+]], 6, 2 -; CHECK: bc 12, [[REG]], {{\.[a-zA-Z0-9_]+}} +; CHECK-LABEL: testdoublesge: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fcmpu 0, 3, 4 +; CHECK-NEXT: fcmpu 1, 1, 2 +; CHECK-NEXT: crandc 20, 6, 2 +; CHECK-NEXT: bc 4, 20, .LBB35_2 +; CHECK-NEXT: # %bb.1: # %iffalse +; CHECK-NEXT: fmr 1, 6 +; CHECK-NEXT: blr +; CHECK-NEXT: .LBB35_2: # %iftrue +; CHECK-NEXT: fmr 1, 5 +; CHECK-NEXT: blr entry: %cmp1 = fcmp oeq double %c3, %c4 %cmp3tmp = fcmp oeq double %c1, %c2 @@ -544,9 +869,18 @@ } define double @testdoubleuge(double %c1, double %c2, double %c3, double %c4, double %a1, double %a2) #0 { -; CHECK-LABEL: testdoubleuge -; CHECK: crandc [[REG:[0-9]+]], 2, 6 -; CHECK: bc 12, [[REG]], {{\.[a-zA-Z0-9_]+}} +; CHECK-LABEL: testdoubleuge: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fcmpu 0, 3, 4 +; CHECK-NEXT: fcmpu 1, 1, 2 +; CHECK-NEXT: crandc 20, 2, 6 +; CHECK-NEXT: bc 4, 20, .LBB36_2 +; CHECK-NEXT: # %bb.1: # %iffalse +; CHECK-NEXT: fmr 1, 6 +; CHECK-NEXT: blr +; CHECK-NEXT: .LBB36_2: # %iftrue +; CHECK-NEXT: fmr 1, 5 +; CHECK-NEXT: blr entry: %cmp1 = fcmp oeq double %c3, %c4 %cmp3tmp = fcmp oeq double %c1, %c2 @@ -560,8 +894,17 @@ define double @testdoublesgt(double %c1, double %c2, double %c3, double %c4, double %a1, double %a2) #0 { ; CHECK-LABEL: testdoublesgt: -; CHECK: crorc [[REG:[0-9]+]], 6, 2 -; CHECK: bc 12, [[REG]], {{\.[a-zA-Z0-9_]+}} +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fcmpu 0, 3, 4 +; CHECK-NEXT: fcmpu 1, 1, 2 +; CHECK-NEXT: crorc 20, 6, 2 +; CHECK-NEXT: bc 12, 20, .LBB37_2 +; CHECK-NEXT: # %bb.1: # %iftrue +; CHECK-NEXT: fmr 1, 5 +; CHECK-NEXT: blr +; CHECK-NEXT: .LBB37_2: # %iffalse +; CHECK-NEXT: fmr 1, 6 +; CHECK-NEXT: blr entry: %cmp1 = fcmp oeq double %c3, %c4 %cmp3tmp = fcmp oeq double %c1, %c2 @@ -574,9 +917,18 @@ } define double @testdoubleugt(double %c1, double %c2, double %c3, double %c4, double %a1, double %a2) #0 { -; CHECK-LABEL: testdoubleugt -; CHECK: crorc [[REG:[0-9]+]], 2, 6 -; CHECK: bc 12, [[REG]], {{\.[a-zA-Z0-9_]+}} +; CHECK-LABEL: testdoubleugt: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fcmpu 0, 3, 4 +; CHECK-NEXT: fcmpu 1, 1, 2 +; CHECK-NEXT: crorc 20, 2, 6 +; CHECK-NEXT: bc 12, 20, .LBB38_2 +; CHECK-NEXT: # %bb.1: # %iftrue +; CHECK-NEXT: fmr 1, 5 +; CHECK-NEXT: blr +; CHECK-NEXT: .LBB38_2: # %iffalse +; CHECK-NEXT: fmr 1, 6 +; CHECK-NEXT: blr entry: %cmp1 = fcmp oeq double %c3, %c4 %cmp3tmp = fcmp oeq double %c1, %c2 @@ -589,9 +941,18 @@ } define double @testdoublene(double %c1, double %c2, double %c3, double %c4, double %a1, double %a2) #0 { -; CHECK-LABEL: testdoublene -; CHECK: creqv [[REG:[0-9]+]], 6, 2 -; CHECK: bc 12, [[REG]], {{\.[a-zA-Z0-9_]+}} +; CHECK-LABEL: testdoublene: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fcmpu 0, 3, 4 +; CHECK-NEXT: fcmpu 1, 1, 2 +; CHECK-NEXT: creqv 20, 6, 2 +; CHECK-NEXT: bc 12, 20, .LBB39_2 +; CHECK-NEXT: # %bb.1: # %iftrue +; CHECK-NEXT: fmr 1, 5 +; CHECK-NEXT: blr +; CHECK-NEXT: .LBB39_2: # %iffalse +; CHECK-NEXT: fmr 1, 6 +; CHECK-NEXT: blr entry: %cmp1 = fcmp oeq double %c3, %c4 %cmp3tmp = fcmp oeq double %c1, %c2 diff --git a/llvm/test/CodeGen/PowerPC/memCmpUsedInZeroEqualityComparison.ll b/llvm/test/CodeGen/PowerPC/memCmpUsedInZeroEqualityComparison.ll --- a/llvm/test/CodeGen/PowerPC/memCmpUsedInZeroEqualityComparison.ll +++ b/llvm/test/CodeGen/PowerPC/memCmpUsedInZeroEqualityComparison.ll @@ -43,12 +43,12 @@ ; CHECK-NEXT: ld 3, 8(3) ; CHECK-NEXT: ld 4, 8(4) ; CHECK-NEXT: cmpld 3, 4 -; CHECK-NEXT: li 3, 0 ; CHECK-NEXT: beq 0, .LBB1_3 ; CHECK-NEXT: .LBB1_2: # %res_block ; CHECK-NEXT: li 3, 1 -; CHECK-NEXT: .LBB1_3: # %endblock -; CHECK-NEXT: clrldi 3, 3, 32 +; CHECK-NEXT: blr +; CHECK-NEXT: .LBB1_3: +; CHECK-NEXT: li 3, 0 ; CHECK-NEXT: blr %call = tail call signext i32 @memcmp(i8* %x, i8* %y, i64 16) %not.tobool = icmp ne i32 %call, 0 @@ -73,12 +73,12 @@ ; CHECK-NEXT: lbz 3, 6(3) ; CHECK-NEXT: lbz 4, 6(4) ; CHECK-NEXT: cmplw 3, 4 -; CHECK-NEXT: li 3, 0 ; CHECK-NEXT: beq 0, .LBB2_4 ; CHECK-NEXT: .LBB2_3: # %res_block ; CHECK-NEXT: li 3, 1 -; CHECK-NEXT: .LBB2_4: # %endblock -; CHECK-NEXT: clrldi 3, 3, 32 +; CHECK-NEXT: blr +; CHECK-NEXT: .LBB2_4: +; CHECK-NEXT: li 3, 0 ; CHECK-NEXT: blr %call = tail call signext i32 @memcmp(i8* %x, i8* %y, i64 7) %not.lnot = icmp ne i32 %call, 0 @@ -136,14 +136,16 @@ ; CHECK-NEXT: sldi 4, 4, 32 ; CHECK-NEXT: ori 4, 4, 2 ; CHECK-NEXT: cmpld 3, 4 -; CHECK-NEXT: li 3, 0 -; CHECK-NEXT: beq 0, .LBB6_3 +; CHECK-NEXT: beq 0, .LBB6_4 ; CHECK-NEXT: .LBB6_2: # %res_block ; CHECK-NEXT: li 3, 1 ; CHECK-NEXT: .LBB6_3: # %endblock ; CHECK-NEXT: cntlzw 3, 3 ; CHECK-NEXT: srwi 3, 3, 5 ; CHECK-NEXT: blr +; CHECK-NEXT: .LBB6_4: +; CHECK-NEXT: li 3, 0 +; CHECK-NEXT: b .LBB6_3 %call = tail call signext i32 @memcmp(i8* bitcast ([15 x i32]* @zeroEqualityTest04.buffer1 to i8*), i8* %X, i64 16) %not.tobool = icmp eq i32 %call, 0 %cond = zext i1 %not.tobool to i32 diff --git a/llvm/test/CodeGen/PowerPC/redundant-copy-after-tail-dup.ll b/llvm/test/CodeGen/PowerPC/redundant-copy-after-tail-dup.ll --- a/llvm/test/CodeGen/PowerPC/redundant-copy-after-tail-dup.ll +++ b/llvm/test/CodeGen/PowerPC/redundant-copy-after-tail-dup.ll @@ -17,24 +17,28 @@ define dso_local i1 @t(%class.A* %this, i32 %color, i32 %vertex) local_unnamed_addr { ; CHECK-P9-LABEL: t: ; CHECK-P9: # %bb.0: # %entry -; CHECK-P9-NEXT: li r5, 1 -; CHECK-P9-NEXT: bc 12, 4*cr5+lt, .LBB0_4 +; CHECK-P9-NEXT: bc 12, 4*cr5+lt, .LBB0_3 ; CHECK-P9-NEXT: # %bb.1: # %land.lhs.true -; CHECK-P9-NEXT: bc 12, 4*cr5+lt, .LBB0_5 -; CHECK-P9-NEXT: .LBB0_2: # %for.inc +; CHECK-P9-NEXT: li r4, 1 +; CHECK-P9-NEXT: bc 4, 4*cr5+lt, .LBB0_4 +; CHECK-P9-NEXT: # %bb.2: # %cleanup16 +; CHECK-P9-NEXT: mr r3, r4 +; CHECK-P9-NEXT: blr +; CHECK-P9-NEXT: .LBB0_3: # %lor.lhs.false +; CHECK-P9-NEXT: cmplwi r4, 0 +; CHECK-P9-NEXT: beq cr0, .LBB0_6 +; CHECK-P9-NEXT: .LBB0_4: # %for.inc ; CHECK-P9-NEXT: lhz r3, 5308(r3) ; CHECK-P9-NEXT: cmplwi r3, 2 -; CHECK-P9-NEXT: bge- cr0, .LBB0_6 -; CHECK-P9-NEXT: # %bb.3: # %land.lhs.true.1 +; CHECK-P9-NEXT: bge- cr0, .LBB0_7 +; CHECK-P9-NEXT: # %bb.5: # %land.lhs.true.1 ; CHECK-P9-NEXT: li r3, 0 ; CHECK-P9-NEXT: blr -; CHECK-P9-NEXT: .LBB0_4: # %lor.lhs.false -; CHECK-P9-NEXT: cmplwi r4, 0 -; CHECK-P9-NEXT: bne cr0, .LBB0_2 -; CHECK-P9-NEXT: .LBB0_5: # %cleanup16 -; CHECK-P9-NEXT: mr r3, r5 +; CHECK-P9-NEXT: .LBB0_6: +; CHECK-P9-NEXT: li r4, 1 +; CHECK-P9-NEXT: mr r3, r4 ; CHECK-P9-NEXT: blr -; CHECK-P9-NEXT: .LBB0_6: # %lor.lhs.false.1 +; CHECK-P9-NEXT: .LBB0_7: # %lor.lhs.false.1 entry: br i1 undef, label %land.lhs.true, label %lor.lhs.false diff --git a/llvm/test/CodeGen/RISCV/branch.ll b/llvm/test/CodeGen/RISCV/branch.ll --- a/llvm/test/CodeGen/RISCV/branch.ll +++ b/llvm/test/CodeGen/RISCV/branch.ll @@ -6,41 +6,42 @@ ; RV32I-LABEL: foo: ; RV32I: # %bb.0: ; RV32I-NEXT: lw a3, 0(a1) -; RV32I-NEXT: beq a3, a0, .LBB0_12 +; RV32I-NEXT: beq a3, a0, .LBB0_2 ; RV32I-NEXT: # %bb.1: # %test2 ; RV32I-NEXT: lw a3, 0(a1) -; RV32I-NEXT: bne a3, a0, .LBB0_12 -; RV32I-NEXT: # %bb.2: # %test3 +; RV32I-NEXT: beq a3, a0, .LBB0_3 +; RV32I-NEXT: .LBB0_2: # %end +; RV32I-NEXT: ret +; RV32I-NEXT: .LBB0_3: # %test3 ; RV32I-NEXT: lw a3, 0(a1) -; RV32I-NEXT: blt a3, a0, .LBB0_12 -; RV32I-NEXT: # %bb.3: # %test4 +; RV32I-NEXT: blt a3, a0, .LBB0_2 +; RV32I-NEXT: # %bb.4: # %test4 ; RV32I-NEXT: lw a3, 0(a1) -; RV32I-NEXT: bge a3, a0, .LBB0_12 -; RV32I-NEXT: # %bb.4: # %test5 +; RV32I-NEXT: bge a3, a0, .LBB0_2 +; RV32I-NEXT: # %bb.5: # %test5 ; RV32I-NEXT: lw a3, 0(a1) -; RV32I-NEXT: bltu a3, a0, .LBB0_12 -; RV32I-NEXT: # %bb.5: # %test6 +; RV32I-NEXT: bltu a3, a0, .LBB0_2 +; RV32I-NEXT: # %bb.6: # %test6 ; RV32I-NEXT: lw a3, 0(a1) -; RV32I-NEXT: bgeu a3, a0, .LBB0_12 -; RV32I-NEXT: # %bb.6: # %test7 +; RV32I-NEXT: bgeu a3, a0, .LBB0_2 +; RV32I-NEXT: # %bb.7: # %test7 ; RV32I-NEXT: lw a3, 0(a1) -; RV32I-NEXT: blt a0, a3, .LBB0_12 -; RV32I-NEXT: # %bb.7: # %test8 +; RV32I-NEXT: blt a0, a3, .LBB0_2 +; RV32I-NEXT: # %bb.8: # %test8 ; RV32I-NEXT: lw a3, 0(a1) -; RV32I-NEXT: bge a0, a3, .LBB0_12 -; RV32I-NEXT: # %bb.8: # %test9 +; RV32I-NEXT: bge a0, a3, .LBB0_2 +; RV32I-NEXT: # %bb.9: # %test9 ; RV32I-NEXT: lw a3, 0(a1) -; RV32I-NEXT: bltu a0, a3, .LBB0_12 -; RV32I-NEXT: # %bb.9: # %test10 +; RV32I-NEXT: bltu a0, a3, .LBB0_2 +; RV32I-NEXT: # %bb.10: # %test10 ; RV32I-NEXT: lw a3, 0(a1) -; RV32I-NEXT: bgeu a0, a3, .LBB0_12 -; RV32I-NEXT: # %bb.10: # %test11 +; RV32I-NEXT: bgeu a0, a3, .LBB0_2 +; RV32I-NEXT: # %bb.11: # %test11 ; RV32I-NEXT: lw a0, 0(a1) ; RV32I-NEXT: andi a0, a2, 1 -; RV32I-NEXT: bnez a0, .LBB0_12 -; RV32I-NEXT: # %bb.11: # %test12 +; RV32I-NEXT: bnez a0, .LBB0_2 +; RV32I-NEXT: # %bb.12: # %test12 ; RV32I-NEXT: lw a0, 0(a1) -; RV32I-NEXT: .LBB0_12: # %end ; RV32I-NEXT: ret %val1 = load volatile i32, i32* %b %tst1 = icmp eq i32 %val1, %a diff --git a/llvm/test/CodeGen/RISCV/rv64m-w-insts-legalization.ll b/llvm/test/CodeGen/RISCV/rv64m-w-insts-legalization.ll --- a/llvm/test/CodeGen/RISCV/rv64m-w-insts-legalization.ll +++ b/llvm/test/CodeGen/RISCV/rv64m-w-insts-legalization.ll @@ -5,15 +5,13 @@ ; CHECK-LABEL: mulw: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: addi a2, zero, 1 -; CHECK-NEXT: bge a0, a1, .LBB0_3 -; CHECK-NEXT: # %bb.1: # %for.body.preheader -; CHECK-NEXT: addi a2, zero, 1 -; CHECK-NEXT: .LBB0_2: # %for.body +; CHECK-NEXT: bge a0, a1, .LBB0_2 +; CHECK-NEXT: .LBB0_1: # %for.body ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: mulw a2, a0, a2 ; CHECK-NEXT: addiw a0, a0, 1 -; CHECK-NEXT: blt a0, a1, .LBB0_2 -; CHECK-NEXT: .LBB0_3: # %for.cond.cleanup +; CHECK-NEXT: blt a0, a1, .LBB0_1 +; CHECK-NEXT: .LBB0_2: # %for.cond.cleanup ; CHECK-NEXT: mv a0, a2 ; CHECK-NEXT: ret entry: diff --git a/llvm/test/CodeGen/SystemZ/int-cmp-37.ll b/llvm/test/CodeGen/SystemZ/int-cmp-37.ll --- a/llvm/test/CodeGen/SystemZ/int-cmp-37.ll +++ b/llvm/test/CodeGen/SystemZ/int-cmp-37.ll @@ -1,3 +1,4 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; Test 32-bit comparisons in which the second operand is zero-extended ; from a PC-relative i16. ; @@ -9,9 +10,16 @@ ; Check unsigned comparison. define i32 @f1(i32 %src1) { ; CHECK-LABEL: f1: -; CHECK: clhrl %r2, g -; CHECK-NEXT: jl -; CHECK: br %r14 +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: clhrl %r2, g +; CHECK-NEXT: jhe .LBB0_2 +; CHECK-NEXT: # %bb.1: # %exit +; CHECK-NEXT: ahi %r2, 1 +; CHECK-NEXT: br %r14 +; CHECK-NEXT: .LBB0_2: # %mulb +; CHECK-NEXT: msr %r2, %r2 +; CHECK-NEXT: ahi %r2, 1 +; CHECK-NEXT: br %r14 entry: %val = load i16, i16 *@g %src2 = zext i16 %val to i32 @@ -29,8 +37,16 @@ ; Check signed comparison. define i32 @f2(i32 %src1) { ; CHECK-LABEL: f2: -; CHECK-NOT: clhrl -; CHECK: br %r14 +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: llhrl %r0, g +; CHECK-NEXT: crjhe %r2, %r0, .LBB1_2 +; CHECK-NEXT: # %bb.1: # %exit +; CHECK-NEXT: ahi %r2, 1 +; CHECK-NEXT: br %r14 +; CHECK-NEXT: .LBB1_2: # %mulb +; CHECK-NEXT: msr %r2, %r2 +; CHECK-NEXT: ahi %r2, 1 +; CHECK-NEXT: br %r14 entry: %val = load i16, i16 *@g %src2 = zext i16 %val to i32 @@ -48,9 +64,14 @@ ; Check equality. define i32 @f3(i32 %src1) { ; CHECK-LABEL: f3: -; CHECK: clhrl %r2, g -; CHECK-NEXT: je -; CHECK: br %r14 +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: clhrl %r2, g +; CHECK-NEXT: je .LBB2_2 +; CHECK-NEXT: # %bb.1: # %mulb +; CHECK-NEXT: msr %r2, %r2 +; CHECK-NEXT: .LBB2_2: # %exit +; CHECK-NEXT: ahi %r2, 1 +; CHECK-NEXT: br %r14 entry: %val = load i16, i16 *@g %src2 = zext i16 %val to i32 @@ -68,9 +89,16 @@ ; Check inequality. define i32 @f4(i32 %src1) { ; CHECK-LABEL: f4: -; CHECK: clhrl %r2, g -; CHECK-NEXT: jlh -; CHECK: br %r14 +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: clhrl %r2, g +; CHECK-NEXT: je .LBB3_2 +; CHECK-NEXT: # %bb.1: # %exit +; CHECK-NEXT: ahi %r2, 1 +; CHECK-NEXT: br %r14 +; CHECK-NEXT: .LBB3_2: # %mulb +; CHECK-NEXT: msr %r2, %r2 +; CHECK-NEXT: ahi %r2, 1 +; CHECK-NEXT: br %r14 entry: %val = load i16, i16 *@g %src2 = zext i16 %val to i32 @@ -88,10 +116,17 @@ ; Repeat f1 with an unaligned address. define i32 @f5(i32 %src1) { ; CHECK-LABEL: f5: -; CHECK: lgrl [[REG:%r[0-5]]], h@GOT -; CHECK: llh [[VAL:%r[0-5]]], 0([[REG]]) -; CHECK: clrjl %r2, [[VAL]], -; CHECK: br %r14 +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: lgrl %r1, h@GOT +; CHECK-NEXT: llh %r0, 0(%r1) +; CHECK-NEXT: clrjhe %r2, %r0, .LBB4_2 +; CHECK-NEXT: # %bb.1: # %exit +; CHECK-NEXT: ahi %r2, 1 +; CHECK-NEXT: br %r14 +; CHECK-NEXT: .LBB4_2: # %mulb +; CHECK-NEXT: msr %r2, %r2 +; CHECK-NEXT: ahi %r2, 1 +; CHECK-NEXT: br %r14 entry: %val = load i16, i16 *@h, align 1 %src2 = zext i16 %val to i32 @@ -109,9 +144,16 @@ ; Check the comparison can be reversed if that allows CLHRL to be used. define i32 @f6(i32 %src2) { ; CHECK-LABEL: f6: -; CHECK: clhrl %r2, g -; CHECK-NEXT: jh {{\.L.*}} -; CHECK: br %r14 +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: clhrl %r2, g +; CHECK-NEXT: jle .LBB5_2 +; CHECK-NEXT: # %bb.1: # %exit +; CHECK-NEXT: ahi %r2, 1 +; CHECK-NEXT: br %r14 +; CHECK-NEXT: .LBB5_2: # %mulb +; CHECK-NEXT: msr %r2, %r2 +; CHECK-NEXT: ahi %r2, 1 +; CHECK-NEXT: br %r14 entry: %val = load i16, i16 *@g %src1 = zext i16 %val to i32 diff --git a/llvm/test/CodeGen/SystemZ/int-cmp-40.ll b/llvm/test/CodeGen/SystemZ/int-cmp-40.ll --- a/llvm/test/CodeGen/SystemZ/int-cmp-40.ll +++ b/llvm/test/CodeGen/SystemZ/int-cmp-40.ll @@ -1,3 +1,4 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; Test 64-bit comparisons in which the second operand is zero-extended ; from a PC-relative i16. ; @@ -9,9 +10,16 @@ ; Check unsigned comparison. define i64 @f1(i64 %src1) { ; CHECK-LABEL: f1: -; CHECK: clghrl %r2, g -; CHECK-NEXT: jl -; CHECK: br %r14 +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: clghrl %r2, g +; CHECK-NEXT: jhe .LBB0_2 +; CHECK-NEXT: # %bb.1: # %exit +; CHECK-NEXT: la %r2, 1(%r2) +; CHECK-NEXT: br %r14 +; CHECK-NEXT: .LBB0_2: # %mulb +; CHECK-NEXT: msgr %r2, %r2 +; CHECK-NEXT: la %r2, 1(%r2) +; CHECK-NEXT: br %r14 entry: %val = load i16, i16 *@g %src2 = zext i16 %val to i64 @@ -29,8 +37,16 @@ ; Check signed comparison. define i64 @f2(i64 %src1) { ; CHECK-LABEL: f2: -; CHECK-NOT: clghrl -; CHECK: br %r14 +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: llghrl %r0, g +; CHECK-NEXT: cgrjhe %r2, %r0, .LBB1_2 +; CHECK-NEXT: # %bb.1: # %exit +; CHECK-NEXT: la %r2, 1(%r2) +; CHECK-NEXT: br %r14 +; CHECK-NEXT: .LBB1_2: # %mulb +; CHECK-NEXT: msgr %r2, %r2 +; CHECK-NEXT: la %r2, 1(%r2) +; CHECK-NEXT: br %r14 entry: %val = load i16, i16 *@g %src2 = zext i16 %val to i64 @@ -48,9 +64,14 @@ ; Check equality. define i64 @f3(i64 %src1) { ; CHECK-LABEL: f3: -; CHECK: clghrl %r2, g -; CHECK-NEXT: je -; CHECK: br %r14 +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: clghrl %r2, g +; CHECK-NEXT: je .LBB2_2 +; CHECK-NEXT: # %bb.1: # %mulb +; CHECK-NEXT: msgr %r2, %r2 +; CHECK-NEXT: .LBB2_2: # %exit +; CHECK-NEXT: la %r2, 1(%r2) +; CHECK-NEXT: br %r14 entry: %val = load i16, i16 *@g %src2 = zext i16 %val to i64 @@ -68,9 +89,16 @@ ; Check inequality. define i64 @f4(i64 %src1) { ; CHECK-LABEL: f4: -; CHECK: clghrl %r2, g -; CHECK-NEXT: jlh -; CHECK: br %r14 +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: clghrl %r2, g +; CHECK-NEXT: je .LBB3_2 +; CHECK-NEXT: # %bb.1: # %exit +; CHECK-NEXT: la %r2, 1(%r2) +; CHECK-NEXT: br %r14 +; CHECK-NEXT: .LBB3_2: # %mulb +; CHECK-NEXT: msgr %r2, %r2 +; CHECK-NEXT: la %r2, 1(%r2) +; CHECK-NEXT: br %r14 entry: %val = load i16, i16 *@g %src2 = zext i16 %val to i64 @@ -88,10 +116,17 @@ ; Repeat f1 with an unaligned address. define i64 @f5(i64 %src1) { ; CHECK-LABEL: f5: -; CHECK: lgrl [[REG:%r[0-5]]], h@GOT -; CHECK: llgh [[VAL:%r[0-5]]], 0([[REG]]) -; CHECK: clgrjl %r2, [[VAL]], -; CHECK: br %r14 +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: lgrl %r1, h@GOT +; CHECK-NEXT: llgh %r0, 0(%r1) +; CHECK-NEXT: clgrjhe %r2, %r0, .LBB4_2 +; CHECK-NEXT: # %bb.1: # %exit +; CHECK-NEXT: la %r2, 1(%r2) +; CHECK-NEXT: br %r14 +; CHECK-NEXT: .LBB4_2: # %mulb +; CHECK-NEXT: msgr %r2, %r2 +; CHECK-NEXT: la %r2, 1(%r2) +; CHECK-NEXT: br %r14 entry: %val = load i16, i16 *@h, align 1 %src2 = zext i16 %val to i64 @@ -109,9 +144,16 @@ ; Check the comparison can be reversed if that allows CLGHRL to be used. define i64 @f6(i64 %src2) { ; CHECK-LABEL: f6: -; CHECK: clghrl %r2, g -; CHECK-NEXT: jh {{\.L.*}} -; CHECK: br %r14 +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: clghrl %r2, g +; CHECK-NEXT: jle .LBB5_2 +; CHECK-NEXT: # %bb.1: # %exit +; CHECK-NEXT: la %r2, 1(%r2) +; CHECK-NEXT: br %r14 +; CHECK-NEXT: .LBB5_2: # %mulb +; CHECK-NEXT: msgr %r2, %r2 +; CHECK-NEXT: la %r2, 1(%r2) +; CHECK-NEXT: br %r14 entry: %val = load i16, i16 *@g %src1 = zext i16 %val to i64 diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/mve-float-loops.ll b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/mve-float-loops.ll --- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/mve-float-loops.ll +++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/mve-float-loops.ll @@ -903,8 +903,7 @@ ; CHECK-NEXT: le lr, .LBB4_4 ; CHECK-NEXT: @ %bb.5: @ %middle.block ; CHECK-NEXT: cmp r12, r3 -; CHECK-NEXT: it eq -; CHECK-NEXT: popeq {r4, r5, r6, pc} +; CHECK-NEXT: beq .LBB4_8 ; CHECK-NEXT: .LBB4_6: @ %for.body.preheader11 ; CHECK-NEXT: sub.w lr, r3, r12 ; CHECK-NEXT: add.w r0, r0, r12, lsl #2 diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/varying-outer-2d-reduction.ll b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/varying-outer-2d-reduction.ll --- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/varying-outer-2d-reduction.ll +++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/varying-outer-2d-reduction.ll @@ -17,7 +17,7 @@ define dso_local void @varying_outer_2d_reduction(i16* nocapture readonly %Input, i16* nocapture %Output, i16 signext %Size, i16 signext %N, i16 signext %Scale) local_unnamed_addr { ; ENABLED-LABEL: varying_outer_2d_reduction: ; ENABLED: @ %bb.0: @ %entry -; ENABLED-NEXT: push.w {r4, r5, r6, r7, r8, r9, r10, lr} +; ENABLED-NEXT: push.w {r4, r5, r6, r7, r9, r10, r11, lr} ; ENABLED-NEXT: sub sp, #4 ; ENABLED-NEXT: cmp r3, #1 ; ENABLED-NEXT: str r0, [sp] @ 4-byte Spill @@ -54,7 +54,7 @@ ; ENABLED-NEXT: subs r0, #4 ; ENABLED-NEXT: subs r4, r2, r6 ; ENABLED-NEXT: vmov.i32 q0, #0x0 -; ENABLED-NEXT: add.w r8, r7, r0, lsr #2 +; ENABLED-NEXT: add.w r11, r7, r0, lsr #2 ; ENABLED-NEXT: mov r7, r10 ; ENABLED-NEXT: dlstp.32 lr, r4 ; ENABLED-NEXT: ldr r0, [sp] @ 4-byte Reload @@ -63,9 +63,9 @@ ; ENABLED-NEXT: @ => This Inner Loop Header: Depth=2 ; ENABLED-NEXT: vldrh.s32 q1, [r0], #8 ; ENABLED-NEXT: vldrh.s32 q2, [r7], #8 -; ENABLED-NEXT: mov lr, r8 +; ENABLED-NEXT: mov lr, r11 ; ENABLED-NEXT: vmul.i32 q1, q2, q1 -; ENABLED-NEXT: sub.w r8, r8, #1 +; ENABLED-NEXT: sub.w r11, r11, #1 ; ENABLED-NEXT: vshl.s32 q1, r5 ; ENABLED-NEXT: vadd.i32 q0, q1, q0 ; ENABLED-NEXT: letp lr, .LBB0_6 @@ -75,11 +75,11 @@ ; ENABLED-NEXT: b .LBB0_3 ; ENABLED-NEXT: .LBB0_8: @ %for.end17 ; ENABLED-NEXT: add sp, #4 -; ENABLED-NEXT: pop.w {r4, r5, r6, r7, r8, r9, r10, pc} +; ENABLED-NEXT: pop.w {r4, r5, r6, r7, r9, r10, r11, pc} ; ; NOREDUCTIONS-LABEL: varying_outer_2d_reduction: ; NOREDUCTIONS: @ %bb.0: @ %entry -; NOREDUCTIONS-NEXT: push.w {r4, r5, r6, r7, r8, r9, r10, lr} +; NOREDUCTIONS-NEXT: push.w {r4, r5, r6, r7, r9, r10, r11, lr} ; NOREDUCTIONS-NEXT: sub sp, #4 ; NOREDUCTIONS-NEXT: cmp r3, #1 ; NOREDUCTIONS-NEXT: str r0, [sp] @ 4-byte Spill @@ -116,7 +116,7 @@ ; NOREDUCTIONS-NEXT: subs r0, #4 ; NOREDUCTIONS-NEXT: subs r4, r2, r6 ; NOREDUCTIONS-NEXT: vmov.i32 q0, #0x0 -; NOREDUCTIONS-NEXT: add.w r8, r7, r0, lsr #2 +; NOREDUCTIONS-NEXT: add.w r11, r7, r0, lsr #2 ; NOREDUCTIONS-NEXT: mov r7, r10 ; NOREDUCTIONS-NEXT: dlstp.32 lr, r4 ; NOREDUCTIONS-NEXT: ldr r0, [sp] @ 4-byte Reload @@ -125,9 +125,9 @@ ; NOREDUCTIONS-NEXT: @ => This Inner Loop Header: Depth=2 ; NOREDUCTIONS-NEXT: vldrh.s32 q1, [r0], #8 ; NOREDUCTIONS-NEXT: vldrh.s32 q2, [r7], #8 -; NOREDUCTIONS-NEXT: mov lr, r8 +; NOREDUCTIONS-NEXT: mov lr, r11 ; NOREDUCTIONS-NEXT: vmul.i32 q1, q2, q1 -; NOREDUCTIONS-NEXT: sub.w r8, r8, #1 +; NOREDUCTIONS-NEXT: sub.w r11, r11, #1 ; NOREDUCTIONS-NEXT: vshl.s32 q1, r5 ; NOREDUCTIONS-NEXT: vadd.i32 q0, q1, q0 ; NOREDUCTIONS-NEXT: letp lr, .LBB0_6 @@ -137,8 +137,7 @@ ; NOREDUCTIONS-NEXT: b .LBB0_3 ; NOREDUCTIONS-NEXT: .LBB0_8: @ %for.end17 ; NOREDUCTIONS-NEXT: add sp, #4 -; NOREDUCTIONS-NEXT: pop.w {r4, r5, r6, r7, r8, r9, r10, pc} -; +; NOREDUCTIONS-NEXT: pop.w {r4, r5, r6, r7, r9, r10, r11, pc} entry: %conv = sext i16 %N to i32 %cmp36 = icmp sgt i16 %N, 0 diff --git a/llvm/test/CodeGen/Thumb2/mve-postinc-distribute.ll b/llvm/test/CodeGen/Thumb2/mve-postinc-distribute.ll --- a/llvm/test/CodeGen/Thumb2/mve-postinc-distribute.ll +++ b/llvm/test/CodeGen/Thumb2/mve-postinc-distribute.ll @@ -302,8 +302,7 @@ ; CHECK-NEXT: le lr, .LBB2_4 ; CHECK-NEXT: @ %bb.5: @ %middle.block ; CHECK-NEXT: cmp r12, r3 -; CHECK-NEXT: it eq -; CHECK-NEXT: popeq {r4, r5, r6, pc} +; CHECK-NEXT: beq .LBB2_8 ; CHECK-NEXT: .LBB2_6: @ %for.body.preheader12 ; CHECK-NEXT: sub.w lr, r3, r12 ; CHECK-NEXT: add.w r0, r0, r12, lsl #2 diff --git a/llvm/test/CodeGen/Thumb2/mve-postinc-lsr.ll b/llvm/test/CodeGen/Thumb2/mve-postinc-lsr.ll --- a/llvm/test/CodeGen/Thumb2/mve-postinc-lsr.ll +++ b/llvm/test/CodeGen/Thumb2/mve-postinc-lsr.ll @@ -36,8 +36,7 @@ ; CHECK-NEXT: le lr, .LBB0_4 ; CHECK-NEXT: @ %bb.5: @ %middle.block ; CHECK-NEXT: cmp r12, r3 -; CHECK-NEXT: it eq -; CHECK-NEXT: popeq {r4, r5, r6, pc} +; CHECK-NEXT: beq .LBB0_8 ; CHECK-NEXT: .LBB0_6: @ %for.body.preheader12 ; CHECK-NEXT: sub.w lr, r3, r12 ; CHECK-NEXT: add.w r0, r0, r12, lsl #2 @@ -220,7 +219,7 @@ ; CHECK-NEXT: ldr r3, [sp, #64] ; CHECK-NEXT: mov.w r11, #0 ; CHECK-NEXT: ldr r1, [sp, #16] @ 4-byte Reload -; CHECK-NEXT: ldr.w r9, [sp, #56] +; CHECK-NEXT: ldr r4, [sp, #56] ; CHECK-NEXT: add.w r0, r1, r3, lsl #1 ; CHECK-NEXT: str r0, [sp, #8] @ 4-byte Spill ; CHECK-NEXT: adds r0, r1, r3 @@ -229,56 +228,56 @@ ; CHECK-NEXT: add r0, r1 ; CHECK-NEXT: str r0, [sp] @ 4-byte Spill ; CHECK-NEXT: adds r0, r3, #7 -; CHECK-NEXT: lsrs r0, r0, #3 +; CHECK-NEXT: lsr.w r9, r0, #3 ; CHECK-NEXT: b .LBB2_5 ; CHECK-NEXT: .LBB2_3: @ in Loop: Header=BB2_5 Depth=1 -; CHECK-NEXT: mov r8, r12 -; CHECK-NEXT: mov r10, r12 -; CHECK-NEXT: mov r6, r12 +; CHECK-NEXT: mov r8, r10 +; CHECK-NEXT: mov r12, r10 +; CHECK-NEXT: mov r6, r10 ; CHECK-NEXT: .LBB2_4: @ %for.cond.cleanup23 ; CHECK-NEXT: @ in Loop: Header=BB2_5 Depth=1 -; CHECK-NEXT: ldr r3, [sp, #72] -; CHECK-NEXT: add.w r1, r10, r8 -; CHECK-NEXT: add r1, r6 -; CHECK-NEXT: add r1, r12 -; CHECK-NEXT: strb.w r1, [r3, r11] +; CHECK-NEXT: ldr r1, [sp, #72] +; CHECK-NEXT: add.w r0, r12, r8 +; CHECK-NEXT: add r0, r6 +; CHECK-NEXT: add r0, r10 +; CHECK-NEXT: strb.w r0, [r1, r11] ; CHECK-NEXT: add.w r11, r11, #1 ; CHECK-NEXT: cmp r11, r2 ; CHECK-NEXT: beq .LBB2_8 ; CHECK-NEXT: .LBB2_5: @ %for.body ; CHECK-NEXT: @ =>This Loop Header: Depth=1 ; CHECK-NEXT: @ Child Loop BB2_7 Depth 2 -; CHECK-NEXT: ldr r1, [sp, #68] -; CHECK-NEXT: subs.w lr, r0, r0 -; CHECK-NEXT: ldr.w r12, [r1, r11, lsl #2] +; CHECK-NEXT: ldr r0, [sp, #68] +; CHECK-NEXT: subs.w lr, r9, r9 +; CHECK-NEXT: ldr.w r10, [r0, r11, lsl #2] ; CHECK-NEXT: ble .LBB2_3 ; CHECK-NEXT: @ %bb.6: @ %for.body24.preheader ; CHECK-NEXT: @ in Loop: Header=BB2_5 Depth=1 -; CHECK-NEXT: ldr r3, [sp, #64] -; CHECK-NEXT: mov r6, r12 -; CHECK-NEXT: ldr r1, [sp, #12] @ 4-byte Reload +; CHECK-NEXT: ldr r1, [sp, #64] +; CHECK-NEXT: mov r6, r10 +; CHECK-NEXT: ldr r0, [sp, #12] @ 4-byte Reload ; CHECK-NEXT: dls lr, lr -; CHECK-NEXT: ldr r5, [sp, #8] @ 4-byte Reload -; CHECK-NEXT: mov r10, r12 -; CHECK-NEXT: mla r7, r11, r3, r1 +; CHECK-NEXT: ldr r3, [sp, #8] @ 4-byte Reload +; CHECK-NEXT: mov r12, r10 +; CHECK-NEXT: mla r7, r11, r1, r0 ; CHECK-NEXT: ldr r1, [sp, #16] @ 4-byte Reload -; CHECK-NEXT: ldrd r4, r3, [sp] @ 8-byte Folded Reload -; CHECK-NEXT: mov r8, r12 +; CHECK-NEXT: ldrd r5, r0, [sp] @ 8-byte Folded Reload +; CHECK-NEXT: mov r8, r10 ; CHECK-NEXT: .LBB2_7: @ %for.body24 ; CHECK-NEXT: @ Parent Loop BB2_5 Depth=1 ; CHECK-NEXT: @ => This Inner Loop Header: Depth=2 -; CHECK-NEXT: vldrb.s16 q0, [r4], #8 -; CHECK-NEXT: vadd.i16 q1, q0, r9 +; CHECK-NEXT: vldrb.s16 q0, [r5], #8 +; CHECK-NEXT: vadd.i16 q1, q0, r4 ; CHECK-NEXT: vldrb.s16 q0, [r7], #8 -; CHECK-NEXT: vmlava.s16 r12, q0, q1 -; CHECK-NEXT: vldrb.s16 q1, [r5], #8 -; CHECK-NEXT: vadd.i16 q1, q1, r9 -; CHECK-NEXT: vmlava.s16 r6, q0, q1 -; CHECK-NEXT: vldrb.s16 q1, [r3], #8 -; CHECK-NEXT: vadd.i16 q1, q1, r9 ; CHECK-NEXT: vmlava.s16 r10, q0, q1 +; CHECK-NEXT: vldrb.s16 q1, [r3], #8 +; CHECK-NEXT: vadd.i16 q1, q1, r4 +; CHECK-NEXT: vmlava.s16 r6, q0, q1 +; CHECK-NEXT: vldrb.s16 q1, [r0], #8 +; CHECK-NEXT: vadd.i16 q1, q1, r4 +; CHECK-NEXT: vmlava.s16 r12, q0, q1 ; CHECK-NEXT: vldrb.s16 q1, [r1], #8 -; CHECK-NEXT: vadd.i16 q1, q1, r9 +; CHECK-NEXT: vadd.i16 q1, q1, r4 ; CHECK-NEXT: vmlava.s16 r8, q0, q1 ; CHECK-NEXT: le lr, .LBB2_7 ; CHECK-NEXT: b .LBB2_4 @@ -403,7 +402,7 @@ ; CHECK-NEXT: ldr r3, [sp, #64] ; CHECK-NEXT: mov.w r11, #0 ; CHECK-NEXT: ldr r1, [sp, #16] @ 4-byte Reload -; CHECK-NEXT: ldr.w r9, [sp, #56] +; CHECK-NEXT: ldr r4, [sp, #56] ; CHECK-NEXT: add.w r0, r1, r3, lsl #1 ; CHECK-NEXT: str r0, [sp, #8] @ 4-byte Spill ; CHECK-NEXT: adds r0, r1, r3 @@ -412,55 +411,55 @@ ; CHECK-NEXT: add r0, r1 ; CHECK-NEXT: str r0, [sp] @ 4-byte Spill ; CHECK-NEXT: adds r0, r3, #7 -; CHECK-NEXT: lsrs r0, r0, #3 +; CHECK-NEXT: lsr.w r9, r0, #3 ; CHECK-NEXT: .LBB3_3: @ %for.body ; CHECK-NEXT: @ =>This Loop Header: Depth=1 ; CHECK-NEXT: @ Child Loop BB3_5 Depth 2 -; CHECK-NEXT: ldr r1, [sp, #68] -; CHECK-NEXT: subs.w lr, r0, r0 -; CHECK-NEXT: ldr.w r12, [r1, r11, lsl #2] +; CHECK-NEXT: ldr r0, [sp, #68] +; CHECK-NEXT: subs.w lr, r9, r9 +; CHECK-NEXT: ldr.w r10, [r0, r11, lsl #2] ; CHECK-NEXT: ble .LBB3_6 ; CHECK-NEXT: @ %bb.4: @ %for.body24.preheader ; CHECK-NEXT: @ in Loop: Header=BB3_3 Depth=1 -; CHECK-NEXT: ldr r3, [sp, #64] -; CHECK-NEXT: mov r6, r12 -; CHECK-NEXT: ldr r1, [sp, #12] @ 4-byte Reload +; CHECK-NEXT: ldr r1, [sp, #64] +; CHECK-NEXT: mov r6, r10 +; CHECK-NEXT: ldr r0, [sp, #12] @ 4-byte Reload ; CHECK-NEXT: dls lr, lr -; CHECK-NEXT: ldr r5, [sp, #8] @ 4-byte Reload -; CHECK-NEXT: mov r10, r12 -; CHECK-NEXT: mla r7, r11, r3, r1 +; CHECK-NEXT: ldr r3, [sp, #8] @ 4-byte Reload +; CHECK-NEXT: mov r12, r10 +; CHECK-NEXT: mla r7, r11, r1, r0 ; CHECK-NEXT: ldr r1, [sp, #16] @ 4-byte Reload -; CHECK-NEXT: ldrd r4, r3, [sp] @ 8-byte Folded Reload -; CHECK-NEXT: mov r8, r12 +; CHECK-NEXT: ldrd r5, r0, [sp] @ 8-byte Folded Reload +; CHECK-NEXT: mov r8, r10 ; CHECK-NEXT: .LBB3_5: @ %for.body24 ; CHECK-NEXT: @ Parent Loop BB3_3 Depth=1 ; CHECK-NEXT: @ => This Inner Loop Header: Depth=2 -; CHECK-NEXT: vldrb.s16 q0, [r4], #8 -; CHECK-NEXT: vadd.i16 q1, q0, r9 +; CHECK-NEXT: vldrb.s16 q0, [r5], #8 +; CHECK-NEXT: vadd.i16 q1, q0, r4 ; CHECK-NEXT: vldrb.s16 q0, [r7], #8 -; CHECK-NEXT: vmlava.s16 r12, q0, q1 -; CHECK-NEXT: vldrb.s16 q1, [r5], #8 -; CHECK-NEXT: vadd.i16 q1, q1, r9 -; CHECK-NEXT: vmlava.s16 r6, q0, q1 -; CHECK-NEXT: vldrb.s16 q1, [r3], #8 -; CHECK-NEXT: vadd.i16 q1, q1, r9 ; CHECK-NEXT: vmlava.s16 r10, q0, q1 +; CHECK-NEXT: vldrb.s16 q1, [r3], #8 +; CHECK-NEXT: vadd.i16 q1, q1, r4 +; CHECK-NEXT: vmlava.s16 r6, q0, q1 +; CHECK-NEXT: vldrb.s16 q1, [r0], #8 +; CHECK-NEXT: vadd.i16 q1, q1, r4 +; CHECK-NEXT: vmlava.s16 r12, q0, q1 ; CHECK-NEXT: vldrb.s16 q1, [r1], #8 -; CHECK-NEXT: vadd.i16 q1, q1, r9 +; CHECK-NEXT: vadd.i16 q1, q1, r4 ; CHECK-NEXT: vmlava.s16 r8, q0, q1 ; CHECK-NEXT: le lr, .LBB3_5 ; CHECK-NEXT: b .LBB3_7 ; CHECK-NEXT: .LBB3_6: @ in Loop: Header=BB3_3 Depth=1 -; CHECK-NEXT: mov r8, r12 -; CHECK-NEXT: mov r10, r12 -; CHECK-NEXT: mov r6, r12 +; CHECK-NEXT: mov r8, r10 +; CHECK-NEXT: mov r12, r10 +; CHECK-NEXT: mov r6, r10 ; CHECK-NEXT: .LBB3_7: @ %for.cond.cleanup23 ; CHECK-NEXT: @ in Loop: Header=BB3_3 Depth=1 -; CHECK-NEXT: ldr r3, [sp, #72] -; CHECK-NEXT: add.w r1, r10, r8 -; CHECK-NEXT: add r1, r6 -; CHECK-NEXT: add r1, r12 -; CHECK-NEXT: strb.w r1, [r3, r11] +; CHECK-NEXT: ldr r1, [sp, #72] +; CHECK-NEXT: add.w r0, r12, r8 +; CHECK-NEXT: add r0, r6 +; CHECK-NEXT: add r0, r10 +; CHECK-NEXT: strb.w r0, [r1, r11] ; CHECK-NEXT: add.w r11, r11, #1 ; CHECK-NEXT: cmp r11, r2 ; CHECK-NEXT: bne .LBB3_3 @@ -737,8 +736,8 @@ ; CHECK-NEXT: ldr r1, [sp, #16] @ 4-byte Reload ; CHECK-NEXT: mov r8, r12 ; CHECK-NEXT: mla r5, r11, r3, r0 -; CHECK-NEXT: ldr r0, [sp, #8] @ 4-byte Reload -; CHECK-NEXT: ldrd r4, r7, [sp] @ 8-byte Folded Reload +; CHECK-NEXT: ldr r7, [sp, #8] @ 4-byte Reload +; CHECK-NEXT: ldrd r4, r0, [sp] @ 8-byte Folded Reload ; CHECK-NEXT: mov r10, r12 ; CHECK-NEXT: .LBB5_7: @ %for.body24 ; CHECK-NEXT: @ Parent Loop BB5_5 Depth=1 @@ -747,10 +746,10 @@ ; CHECK-NEXT: vadd.i16 q2, q1, q0 ; CHECK-NEXT: vldrb.s16 q1, [r5], #8 ; CHECK-NEXT: vmlava.s16 r12, q1, q2 -; CHECK-NEXT: vldrb.s16 q2, [r0], #8 +; CHECK-NEXT: vldrb.s16 q2, [r7], #8 ; CHECK-NEXT: vadd.i16 q2, q2, q0 ; CHECK-NEXT: vmlava.s16 r6, q1, q2 -; CHECK-NEXT: vldrb.s16 q2, [r7], #8 +; CHECK-NEXT: vldrb.s16 q2, [r0], #8 ; CHECK-NEXT: vadd.i16 q2, q2, q0 ; CHECK-NEXT: vmlava.s16 r8, q1, q2 ; CHECK-NEXT: vldrb.s16 q2, [r1], #8 @@ -908,8 +907,8 @@ ; CHECK-NEXT: ldr r1, [sp, #16] @ 4-byte Reload ; CHECK-NEXT: mov r8, r12 ; CHECK-NEXT: mla r5, r11, r3, r0 -; CHECK-NEXT: ldr r0, [sp, #8] @ 4-byte Reload -; CHECK-NEXT: ldrd r4, r7, [sp] @ 8-byte Folded Reload +; CHECK-NEXT: ldr r7, [sp, #8] @ 4-byte Reload +; CHECK-NEXT: ldrd r4, r0, [sp] @ 8-byte Folded Reload ; CHECK-NEXT: mov r10, r12 ; CHECK-NEXT: .LBB6_5: @ %for.body24 ; CHECK-NEXT: @ Parent Loop BB6_3 Depth=1 @@ -918,10 +917,10 @@ ; CHECK-NEXT: vadd.i16 q2, q1, q0 ; CHECK-NEXT: vldrb.s16 q1, [r5], #8 ; CHECK-NEXT: vmlava.s16 r12, q1, q2 -; CHECK-NEXT: vldrb.s16 q2, [r0], #8 +; CHECK-NEXT: vldrb.s16 q2, [r7], #8 ; CHECK-NEXT: vadd.i16 q2, q2, q0 ; CHECK-NEXT: vmlava.s16 r6, q1, q2 -; CHECK-NEXT: vldrb.s16 q2, [r7], #8 +; CHECK-NEXT: vldrb.s16 q2, [r0], #8 ; CHECK-NEXT: vadd.i16 q2, q2, q0 ; CHECK-NEXT: vmlava.s16 r8, q1, q2 ; CHECK-NEXT: vldrb.s16 q2, [r1], #8 diff --git a/llvm/test/CodeGen/Thumb2/mve-satmul-loops.ll b/llvm/test/CodeGen/Thumb2/mve-satmul-loops.ll --- a/llvm/test/CodeGen/Thumb2/mve-satmul-loops.ll +++ b/llvm/test/CodeGen/Thumb2/mve-satmul-loops.ll @@ -20,50 +20,50 @@ ; CHECK-NEXT: @ %bb.2: ; CHECK-NEXT: movs r7, #0 ; CHECK-NEXT: mov r12, r0 -; CHECK-NEXT: mov r9, r1 +; CHECK-NEXT: mov r6, r1 ; CHECK-NEXT: mov r11, r2 ; CHECK-NEXT: b .LBB0_6 ; CHECK-NEXT: .LBB0_3: @ %vector.ph ; CHECK-NEXT: str r3, [sp, #4] @ 4-byte Spill ; CHECK-NEXT: bic r3, r3, #1 ; CHECK-NEXT: subs r7, r3, #2 -; CHECK-NEXT: adr r4, .LCPI0_0 ; CHECK-NEXT: movs r6, #1 +; CHECK-NEXT: adr r4, .LCPI0_0 ; CHECK-NEXT: add.w r11, r2, r3, lsl #2 ; CHECK-NEXT: add.w lr, r6, r7, lsr #1 -; CHECK-NEXT: add.w r9, r1, r3, lsl #2 +; CHECK-NEXT: add.w r6, r1, r3, lsl #2 ; CHECK-NEXT: add.w r12, r0, r3, lsl #2 ; CHECK-NEXT: vldrw.u32 q0, [r4] +; CHECK-NEXT: mvn r10, #-2147483648 ; CHECK-NEXT: vmvn.i32 q1, #0x80000000 -; CHECK-NEXT: mov.w r10, #-1 ; CHECK-NEXT: dls lr, lr ; CHECK-NEXT: str r3, [sp] @ 4-byte Spill ; CHECK-NEXT: .LBB0_4: @ %vector.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: ldrd r4, r5, [r0] +; CHECK-NEXT: ldrd r4, r8, [r0] ; CHECK-NEXT: adds r0, #8 -; CHECK-NEXT: ldrd r7, r6, [r1] +; CHECK-NEXT: ldrd r7, r5, [r1] ; CHECK-NEXT: adds r1, #8 -; CHECK-NEXT: smull r8, r5, r6, r5 +; CHECK-NEXT: smull r8, r5, r5, r8 ; CHECK-NEXT: smull r4, r7, r7, r4 ; CHECK-NEXT: asrl r8, r5, #31 ; CHECK-NEXT: asrl r4, r7, #31 -; CHECK-NEXT: rsbs.w r3, r4, #-2147483648 +; CHECK-NEXT: rsbs.w r9, r4, #-2147483648 ; CHECK-NEXT: vmov.32 q4[0], r4 -; CHECK-NEXT: sbcs.w r3, r10, r7 -; CHECK-NEXT: vmov.32 q4[1], r7 +; CHECK-NEXT: mov.w r9, #-1 +; CHECK-NEXT: sbcs.w r3, r9, r7 ; CHECK-NEXT: mov.w r3, #0 -; CHECK-NEXT: vmov.32 q4[2], r8 +; CHECK-NEXT: vmov.32 q4[1], r7 ; CHECK-NEXT: it lt ; CHECK-NEXT: movlt r3, #1 ; CHECK-NEXT: cmp r3, #0 ; CHECK-NEXT: csetm r3, ne -; CHECK-NEXT: vmov.32 q4[3], r5 +; CHECK-NEXT: vmov.32 q4[2], r8 ; CHECK-NEXT: vmov.32 q2[0], r3 +; CHECK-NEXT: vmov.32 q4[3], r5 ; CHECK-NEXT: vmov.32 q2[1], r3 ; CHECK-NEXT: rsbs.w r3, r8, #-2147483648 -; CHECK-NEXT: sbcs.w r3, r10, r5 -; CHECK-NEXT: mvn r5, #-2147483648 +; CHECK-NEXT: sbcs.w r3, r9, r5 ; CHECK-NEXT: mov.w r3, #0 ; CHECK-NEXT: it lt ; CHECK-NEXT: movlt r3, #1 @@ -76,7 +76,7 @@ ; CHECK-NEXT: vorr q2, q2, q3 ; CHECK-NEXT: vmov r4, s8 ; CHECK-NEXT: vmov r3, s9 -; CHECK-NEXT: subs r4, r4, r5 +; CHECK-NEXT: subs.w r4, r4, r10 ; CHECK-NEXT: sbcs r3, r3, #0 ; CHECK-NEXT: vmov r4, s10 ; CHECK-NEXT: mov.w r3, #0 @@ -87,7 +87,7 @@ ; CHECK-NEXT: vmov.32 q3[0], r3 ; CHECK-NEXT: vmov.32 q3[1], r3 ; CHECK-NEXT: vmov r3, s11 -; CHECK-NEXT: subs r4, r4, r5 +; CHECK-NEXT: subs.w r4, r4, r10 ; CHECK-NEXT: sbcs r3, r3, #0 ; CHECK-NEXT: mov.w r3, #0 ; CHECK-NEXT: it lt @@ -116,7 +116,7 @@ ; CHECK-NEXT: .LBB0_7: @ %for.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: ldr r3, [r12], #4 -; CHECK-NEXT: ldr r4, [r9], #4 +; CHECK-NEXT: ldr r4, [r6], #4 ; CHECK-NEXT: smull r4, r3, r4, r3 ; CHECK-NEXT: asrl r4, r3, #31 ; CHECK-NEXT: subs r5, r1, r4 @@ -908,36 +908,41 @@ define arm_aapcs_vfpcc void @usatmul_4_q31(i32* nocapture readonly %pSrcA, i32* nocapture readonly %pSrcB, i32* noalias nocapture %pDst, i32 %N) { ; CHECK-LABEL: usatmul_4_q31: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, lr} -; CHECK-NEXT: push.w {r4, r5, r6, r7, r8, r9, r10, lr} +; CHECK-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, r11, lr} +; CHECK-NEXT: push.w {r4, r5, r6, r7, r8, r9, r10, r11, lr} +; CHECK-NEXT: .pad #4 +; CHECK-NEXT: sub sp, #4 ; CHECK-NEXT: .vsave {d8, d9} ; CHECK-NEXT: vpush {d8, d9} ; CHECK-NEXT: cmp r3, #0 ; CHECK-NEXT: beq.w .LBB4_8 ; CHECK-NEXT: @ %bb.1: @ %for.body.preheader -; CHECK-NEXT: mov.w r8, #0 +; CHECK-NEXT: mov.w r11, #0 ; CHECK-NEXT: cmp r3, #3 ; CHECK-NEXT: bhi .LBB4_3 ; CHECK-NEXT: @ %bb.2: ; CHECK-NEXT: mov r12, r0 -; CHECK-NEXT: mov r9, r1 -; CHECK-NEXT: mov r10, r2 +; CHECK-NEXT: mov r8, r1 +; CHECK-NEXT: mov r9, r2 ; CHECK-NEXT: b .LBB4_6 ; CHECK-NEXT: .LBB4_3: @ %vector.ph -; CHECK-NEXT: bic r8, r3, #3 +; CHECK-NEXT: bic r11, r3, #3 ; CHECK-NEXT: movs r6, #1 -; CHECK-NEXT: sub.w r7, r8, #4 -; CHECK-NEXT: add.w r10, r2, r8, lsl #2 -; CHECK-NEXT: add.w r9, r1, r8, lsl #2 -; CHECK-NEXT: add.w lr, r6, r7, lsr #2 -; CHECK-NEXT: add.w r12, r0, r8, lsl #2 -; CHECK-NEXT: dls lr, lr +; CHECK-NEXT: sub.w r7, r11, #4 +; CHECK-NEXT: add.w r9, r2, r11, lsl #2 +; CHECK-NEXT: add.w r8, r1, r11, lsl #2 +; CHECK-NEXT: add.w r7, r6, r7, lsr #2 +; CHECK-NEXT: add.w r12, r0, r11, lsl #2 +; CHECK-NEXT: mov r10, r7 ; CHECK-NEXT: .LBB4_4: @ %vector.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: vldrw.u32 q0, [r0], #16 ; CHECK-NEXT: vldrw.u32 q2, [r1], #16 +; CHECK-NEXT: mov lr, r10 ; CHECK-NEXT: vmov.f32 s4, s2 +; CHECK-NEXT: sub.w lr, lr, #1 ; CHECK-NEXT: vmov.f32 s12, s10 +; CHECK-NEXT: mov r10, lr ; CHECK-NEXT: vmov.f32 s6, s3 ; CHECK-NEXT: vmov.f32 s14, s11 ; CHECK-NEXT: vmullb.u32 q4, q3, q1 @@ -1002,28 +1007,31 @@ ; CHECK-NEXT: vmov.f32 s2, s4 ; CHECK-NEXT: vmov.f32 s3, s6 ; CHECK-NEXT: vstrb.8 q0, [r2], #16 -; CHECK-NEXT: le lr, .LBB4_4 -; CHECK-NEXT: @ %bb.5: @ %middle.block -; CHECK-NEXT: cmp r8, r3 +; CHECK-NEXT: cmp.w lr, #0 +; CHECK-NEXT: bne.w .LBB4_4 +; CHECK-NEXT: b .LBB4_5 +; CHECK-NEXT: .LBB4_5: @ %middle.block +; CHECK-NEXT: cmp r11, r3 ; CHECK-NEXT: beq .LBB4_8 ; CHECK-NEXT: .LBB4_6: @ %for.body.preheader21 -; CHECK-NEXT: sub.w lr, r3, r8 +; CHECK-NEXT: sub.w lr, r3, r11 ; CHECK-NEXT: dls lr, lr ; CHECK-NEXT: .LBB4_7: @ %for.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: ldr r0, [r12], #4 -; CHECK-NEXT: ldr r1, [r9], #4 +; CHECK-NEXT: ldr r1, [r8], #4 ; CHECK-NEXT: umull r0, r1, r1, r0 ; CHECK-NEXT: lsrl r0, r1, #31 ; CHECK-NEXT: subs.w r2, r0, #-1 ; CHECK-NEXT: sbcs r1, r1, #0 ; CHECK-NEXT: it hs ; CHECK-NEXT: movhs.w r0, #-1 -; CHECK-NEXT: str r0, [r10], #4 +; CHECK-NEXT: str r0, [r9], #4 ; CHECK-NEXT: le lr, .LBB4_7 ; CHECK-NEXT: .LBB4_8: @ %for.cond.cleanup ; CHECK-NEXT: vpop {d8, d9} -; CHECK-NEXT: pop.w {r4, r5, r6, r7, r8, r9, r10, pc} +; CHECK-NEXT: add sp, #4 +; CHECK-NEXT: pop.w {r4, r5, r6, r7, r8, r9, r10, r11, pc} entry: %cmp8 = icmp eq i32 %N, 0 br i1 %cmp8, label %for.cond.cleanup, label %for.body.preheader @@ -1136,8 +1144,7 @@ ; CHECK-NEXT: le lr, .LBB5_4 ; CHECK-NEXT: @ %bb.5: @ %middle.block ; CHECK-NEXT: cmp r5, r3 -; CHECK-NEXT: it eq -; CHECK-NEXT: popeq {r4, r5, r6, pc} +; CHECK-NEXT: beq .LBB5_8 ; CHECK-NEXT: .LBB5_6: @ %for.body.preheader21 ; CHECK-NEXT: movw r0, #32768 ; CHECK-NEXT: sub.w lr, r3, r5 @@ -1277,8 +1284,7 @@ ; CHECK-NEXT: le lr, .LBB6_4 ; CHECK-NEXT: @ %bb.5: @ %middle.block ; CHECK-NEXT: cmp r5, r3 -; CHECK-NEXT: it eq -; CHECK-NEXT: popeq {r4, r5, r6, pc} +; CHECK-NEXT: beq .LBB6_8 ; CHECK-NEXT: .LBB6_6: @ %for.body.preheader21 ; CHECK-NEXT: movw r0, #32768 ; CHECK-NEXT: sub.w lr, r3, r5 @@ -1415,8 +1421,7 @@ ; CHECK-NEXT: le lr, .LBB7_4 ; CHECK-NEXT: @ %bb.5: @ %middle.block ; CHECK-NEXT: cmp r5, r3 -; CHECK-NEXT: it eq -; CHECK-NEXT: popeq {r4, r5, r6, pc} +; CHECK-NEXT: beq .LBB7_8 ; CHECK-NEXT: .LBB7_6: @ %for.body.preheader21 ; CHECK-NEXT: movw r0, #32768 ; CHECK-NEXT: sub.w lr, r3, r5 @@ -1959,8 +1964,7 @@ ; CHECK-NEXT: le lr, .LBB11_4 ; CHECK-NEXT: @ %bb.5: @ %middle.block ; CHECK-NEXT: cmp r5, r3 -; CHECK-NEXT: it eq -; CHECK-NEXT: popeq {r4, r5, r6, pc} +; CHECK-NEXT: beq .LBB11_8 ; CHECK-NEXT: .LBB11_6: @ %for.body.preheader21 ; CHECK-NEXT: sub.w lr, r3, r5 ; CHECK-NEXT: movw r0, #65535 @@ -2093,8 +2097,7 @@ ; CHECK-NEXT: le lr, .LBB12_4 ; CHECK-NEXT: @ %bb.5: @ %middle.block ; CHECK-NEXT: cmp r5, r3 -; CHECK-NEXT: it eq -; CHECK-NEXT: popeq {r4, r5, r6, pc} +; CHECK-NEXT: beq .LBB12_8 ; CHECK-NEXT: .LBB12_6: @ %for.body.preheader21 ; CHECK-NEXT: sub.w lr, r3, r5 ; CHECK-NEXT: movw r0, #65535 @@ -2229,8 +2232,7 @@ ; CHECK-NEXT: le lr, .LBB13_4 ; CHECK-NEXT: @ %bb.5: @ %middle.block ; CHECK-NEXT: cmp r5, r3 -; CHECK-NEXT: it eq -; CHECK-NEXT: popeq {r4, r5, r6, pc} +; CHECK-NEXT: beq .LBB13_8 ; CHECK-NEXT: .LBB13_6: @ %for.body.preheader21 ; CHECK-NEXT: sub.w lr, r3, r5 ; CHECK-NEXT: dls lr, lr @@ -2364,8 +2366,7 @@ ; CHECK-NEXT: le lr, .LBB14_4 ; CHECK-NEXT: @ %bb.5: @ %middle.block ; CHECK-NEXT: cmp r5, r3 -; CHECK-NEXT: it eq -; CHECK-NEXT: popeq {r4, r5, r6, pc} +; CHECK-NEXT: beq .LBB14_8 ; CHECK-NEXT: .LBB14_6: @ %for.body.preheader23 ; CHECK-NEXT: sub.w lr, r3, r5 ; CHECK-NEXT: mvn r0, #127 @@ -2504,8 +2505,7 @@ ; CHECK-NEXT: le lr, .LBB15_4 ; CHECK-NEXT: @ %bb.5: @ %middle.block ; CHECK-NEXT: cmp r5, r3 -; CHECK-NEXT: it eq -; CHECK-NEXT: popeq {r4, r5, r6, pc} +; CHECK-NEXT: beq .LBB15_8 ; CHECK-NEXT: .LBB15_6: @ %for.body.preheader23 ; CHECK-NEXT: sub.w lr, r3, r5 ; CHECK-NEXT: mvn r0, #127 @@ -2641,8 +2641,7 @@ ; CHECK-NEXT: le lr, .LBB16_4 ; CHECK-NEXT: @ %bb.5: @ %middle.block ; CHECK-NEXT: cmp r5, r3 -; CHECK-NEXT: it eq -; CHECK-NEXT: popeq {r4, r5, r6, pc} +; CHECK-NEXT: beq .LBB16_8 ; CHECK-NEXT: .LBB16_6: @ %for.body.preheader23 ; CHECK-NEXT: sub.w lr, r3, r5 ; CHECK-NEXT: mvn r0, #127 @@ -3422,8 +3421,7 @@ ; CHECK-NEXT: le lr, .LBB20_4 ; CHECK-NEXT: @ %bb.5: @ %middle.block ; CHECK-NEXT: cmp r5, r3 -; CHECK-NEXT: it eq -; CHECK-NEXT: popeq {r4, r5, r6, pc} +; CHECK-NEXT: beq .LBB20_8 ; CHECK-NEXT: .LBB20_6: @ %for.body.preheader23 ; CHECK-NEXT: sub.w lr, r3, r5 ; CHECK-NEXT: dls lr, lr @@ -3557,8 +3555,7 @@ ; CHECK-NEXT: le lr, .LBB21_4 ; CHECK-NEXT: @ %bb.5: @ %middle.block ; CHECK-NEXT: cmp r5, r3 -; CHECK-NEXT: it eq -; CHECK-NEXT: popeq {r4, r5, r6, pc} +; CHECK-NEXT: beq .LBB21_8 ; CHECK-NEXT: .LBB21_6: @ %for.body.preheader23 ; CHECK-NEXT: sub.w lr, r3, r5 ; CHECK-NEXT: dls lr, lr diff --git a/llvm/test/CodeGen/Thumb2/thumb2-branch.ll b/llvm/test/CodeGen/Thumb2/thumb2-branch.ll --- a/llvm/test/CodeGen/Thumb2/thumb2-branch.ll +++ b/llvm/test/CodeGen/Thumb2/thumb2-branch.ll @@ -8,7 +8,7 @@ define i32 @f1(i32 %a, i32 %b, i32* %v) { entry: ; CHECK-LABEL: f1: -; CHECK: bne LBB +; CHECK: beq LBB %tmp = icmp eq i32 %a, %b ; [#uses=1] br i1 %tmp, label %cond_true, label %return @@ -59,7 +59,7 @@ define i32 @f4(i32 %a, i32 %b, i32* %v) { entry: ; CHECK-LABEL: f4: -; CHECK: blo LBB +; CHECK: bhs LBB %tmp = icmp uge i32 %a, %b ; [#uses=1] br i1 %tmp, label %cond_true, label %return diff --git a/llvm/test/CodeGen/X86/3addr-16bit.ll b/llvm/test/CodeGen/X86/3addr-16bit.ll --- a/llvm/test/CodeGen/X86/3addr-16bit.ll +++ b/llvm/test/CodeGen/X86/3addr-16bit.ll @@ -12,8 +12,11 @@ ; X64-NEXT: movl %esi, %eax ; X64-NEXT: incl %eax ; X64-NEXT: cmpw %di, %si -; X64-NEXT: jne LBB0_2 -; X64-NEXT: ## %bb.1: ## %bb +; X64-NEXT: je LBB0_1 +; X64-NEXT: ## %bb.2: ## %bb1 +; X64-NEXT: movzwl %ax, %eax +; X64-NEXT: retq +; X64-NEXT: LBB0_1: ## %bb ; X64-NEXT: pushq %rbx ; X64-NEXT: movzwl %ax, %ebx ; X64-NEXT: movl %ebx, %edi @@ -21,9 +24,6 @@ ; X64-NEXT: movl %ebx, %eax ; X64-NEXT: popq %rbx ; X64-NEXT: retq -; X64-NEXT: LBB0_2: ## %bb1 -; X64-NEXT: movzwl %ax, %eax -; X64-NEXT: retq ; ; X32-LABEL: test1: ; X32: ## %bb.0: ## %entry @@ -33,15 +33,15 @@ ; X32-NEXT: movl %ecx, %eax ; X32-NEXT: incl %eax ; X32-NEXT: cmpw {{[0-9]+}}(%esp), %cx -; X32-NEXT: jne LBB0_2 -; X32-NEXT: ## %bb.1: ## %bb +; X32-NEXT: je LBB0_1 +; X32-NEXT: ## %bb.2: ## %bb1 +; X32-NEXT: movzwl %ax, %eax +; X32-NEXT: jmp LBB0_3 +; X32-NEXT: LBB0_1: ## %bb ; X32-NEXT: movzwl %ax, %esi ; X32-NEXT: movl %esi, (%esp) ; X32-NEXT: calll _foo ; X32-NEXT: movl %esi, %eax -; X32-NEXT: jmp LBB0_3 -; X32-NEXT: LBB0_2: ## %bb1 -; X32-NEXT: movzwl %ax, %eax ; X32-NEXT: LBB0_3: ## %bb1 ; X32-NEXT: addl $8, %esp ; X32-NEXT: popl %esi @@ -65,8 +65,11 @@ ; X64-NEXT: movl %esi, %eax ; X64-NEXT: decl %eax ; X64-NEXT: cmpw %di, %si -; X64-NEXT: jne LBB1_2 -; X64-NEXT: ## %bb.1: ## %bb +; X64-NEXT: je LBB1_1 +; X64-NEXT: ## %bb.2: ## %bb1 +; X64-NEXT: movzwl %ax, %eax +; X64-NEXT: retq +; X64-NEXT: LBB1_1: ## %bb ; X64-NEXT: pushq %rbx ; X64-NEXT: movzwl %ax, %ebx ; X64-NEXT: movl %ebx, %edi @@ -74,9 +77,6 @@ ; X64-NEXT: movl %ebx, %eax ; X64-NEXT: popq %rbx ; X64-NEXT: retq -; X64-NEXT: LBB1_2: ## %bb1 -; X64-NEXT: movzwl %ax, %eax -; X64-NEXT: retq ; ; X32-LABEL: test2: ; X32: ## %bb.0: ## %entry @@ -86,15 +86,15 @@ ; X32-NEXT: movl %ecx, %eax ; X32-NEXT: decl %eax ; X32-NEXT: cmpw {{[0-9]+}}(%esp), %cx -; X32-NEXT: jne LBB1_2 -; X32-NEXT: ## %bb.1: ## %bb +; X32-NEXT: je LBB1_1 +; X32-NEXT: ## %bb.2: ## %bb1 +; X32-NEXT: movzwl %ax, %eax +; X32-NEXT: jmp LBB1_3 +; X32-NEXT: LBB1_1: ## %bb ; X32-NEXT: movzwl %ax, %esi ; X32-NEXT: movl %esi, (%esp) ; X32-NEXT: calll _foo ; X32-NEXT: movl %esi, %eax -; X32-NEXT: jmp LBB1_3 -; X32-NEXT: LBB1_2: ## %bb1 -; X32-NEXT: movzwl %ax, %eax ; X32-NEXT: LBB1_3: ## %bb1 ; X32-NEXT: addl $8, %esp ; X32-NEXT: popl %esi @@ -120,8 +120,11 @@ ; X64-NEXT: movl %esi, %eax ; X64-NEXT: addl $2, %eax ; X64-NEXT: cmpw %di, %si -; X64-NEXT: jne LBB2_2 -; X64-NEXT: ## %bb.1: ## %bb +; X64-NEXT: je LBB2_1 +; X64-NEXT: ## %bb.2: ## %bb1 +; X64-NEXT: movzwl %ax, %eax +; X64-NEXT: retq +; X64-NEXT: LBB2_1: ## %bb ; X64-NEXT: pushq %rbx ; X64-NEXT: movzwl %ax, %ebx ; X64-NEXT: movl %ebx, %edi @@ -129,9 +132,6 @@ ; X64-NEXT: movl %ebx, %eax ; X64-NEXT: popq %rbx ; X64-NEXT: retq -; X64-NEXT: LBB2_2: ## %bb1 -; X64-NEXT: movzwl %ax, %eax -; X64-NEXT: retq ; ; X32-LABEL: test3: ; X32: ## %bb.0: ## %entry @@ -141,15 +141,15 @@ ; X32-NEXT: movl %ecx, %eax ; X32-NEXT: addl $2, %eax ; X32-NEXT: cmpw {{[0-9]+}}(%esp), %cx -; X32-NEXT: jne LBB2_2 -; X32-NEXT: ## %bb.1: ## %bb +; X32-NEXT: je LBB2_1 +; X32-NEXT: ## %bb.2: ## %bb1 +; X32-NEXT: movzwl %ax, %eax +; X32-NEXT: jmp LBB2_3 +; X32-NEXT: LBB2_1: ## %bb ; X32-NEXT: movzwl %ax, %esi ; X32-NEXT: movl %esi, (%esp) ; X32-NEXT: calll _foo ; X32-NEXT: movl %esi, %eax -; X32-NEXT: jmp LBB2_3 -; X32-NEXT: LBB2_2: ## %bb1 -; X32-NEXT: movzwl %ax, %eax ; X32-NEXT: LBB2_3: ## %bb1 ; X32-NEXT: addl $8, %esp ; X32-NEXT: popl %esi @@ -173,8 +173,11 @@ ; X64-NEXT: movl %esi, %eax ; X64-NEXT: addl %edi, %eax ; X64-NEXT: cmpw %di, %si -; X64-NEXT: jne LBB3_2 -; X64-NEXT: ## %bb.1: ## %bb +; X64-NEXT: je LBB3_1 +; X64-NEXT: ## %bb.2: ## %bb1 +; X64-NEXT: movzwl %ax, %eax +; X64-NEXT: retq +; X64-NEXT: LBB3_1: ## %bb ; X64-NEXT: pushq %rbx ; X64-NEXT: movzwl %ax, %ebx ; X64-NEXT: movl %ebx, %edi @@ -182,9 +185,6 @@ ; X64-NEXT: movl %ebx, %eax ; X64-NEXT: popq %rbx ; X64-NEXT: retq -; X64-NEXT: LBB3_2: ## %bb1 -; X64-NEXT: movzwl %ax, %eax -; X64-NEXT: retq ; ; X32-LABEL: test4: ; X32: ## %bb.0: ## %entry @@ -195,15 +195,15 @@ ; X32-NEXT: movl %edx, %eax ; X32-NEXT: addl %ecx, %eax ; X32-NEXT: cmpw %cx, %dx -; X32-NEXT: jne LBB3_2 -; X32-NEXT: ## %bb.1: ## %bb +; X32-NEXT: je LBB3_1 +; X32-NEXT: ## %bb.2: ## %bb1 +; X32-NEXT: movzwl %ax, %eax +; X32-NEXT: jmp LBB3_3 +; X32-NEXT: LBB3_1: ## %bb ; X32-NEXT: movzwl %ax, %esi ; X32-NEXT: movl %esi, (%esp) ; X32-NEXT: calll _foo ; X32-NEXT: movl %esi, %eax -; X32-NEXT: jmp LBB3_3 -; X32-NEXT: LBB3_2: ## %bb1 -; X32-NEXT: movzwl %ax, %eax ; X32-NEXT: LBB3_3: ## %bb1 ; X32-NEXT: addl $8, %esp ; X32-NEXT: popl %esi diff --git a/llvm/test/CodeGen/X86/absolute-cmp.ll b/llvm/test/CodeGen/X86/absolute-cmp.ll --- a/llvm/test/CodeGen/X86/absolute-cmp.ll +++ b/llvm/test/CodeGen/X86/absolute-cmp.ll @@ -17,12 +17,16 @@ ; NOPIC-NEXT: .cfi_def_cfa_offset 16 ; NOPIC-NEXT: cmpq $cmp8@ABS8, %rdi # encoding: [0x48,0x83,0xff,A] ; NOPIC-NEXT: # fixup A - offset: 3, value: cmp8@ABS8, kind: FK_Data_1 -; NOPIC-NEXT: ja .LBB0_2 # encoding: [0x77,A] -; NOPIC-NEXT: # fixup A - offset: 1, value: .LBB0_2-1, kind: FK_PCRel_1 -; NOPIC-NEXT: # %bb.1: # %t +; NOPIC-NEXT: jbe .LBB0_1 # encoding: [0x76,A] +; NOPIC-NEXT: # fixup A - offset: 1, value: .LBB0_1-1, kind: FK_PCRel_1 +; NOPIC-NEXT: # %bb.2: # %f +; NOPIC-NEXT: popq %rax # encoding: [0x58] +; NOPIC-NEXT: .cfi_def_cfa_offset 8 +; NOPIC-NEXT: retq # encoding: [0xc3] +; NOPIC-NEXT: .LBB0_1: # %t +; NOPIC-NEXT: .cfi_def_cfa_offset 16 ; NOPIC-NEXT: callq f # encoding: [0xe8,A,A,A,A] ; NOPIC-NEXT: # fixup A - offset: 1, value: f-4, kind: reloc_branch_4byte_pcrel -; NOPIC-NEXT: .LBB0_2: # %f ; NOPIC-NEXT: popq %rax # encoding: [0x58] ; NOPIC-NEXT: .cfi_def_cfa_offset 8 ; NOPIC-NEXT: retq # encoding: [0xc3] @@ -33,12 +37,16 @@ ; PIC-NEXT: .cfi_def_cfa_offset 16 ; PIC-NEXT: cmpq $cmp8@ABS8, %rdi # encoding: [0x48,0x83,0xff,A] ; PIC-NEXT: # fixup A - offset: 3, value: cmp8@ABS8, kind: FK_Data_1 -; PIC-NEXT: ja .LBB0_2 # encoding: [0x77,A] -; PIC-NEXT: # fixup A - offset: 1, value: .LBB0_2-1, kind: FK_PCRel_1 -; PIC-NEXT: # %bb.1: # %t +; PIC-NEXT: jbe .LBB0_1 # encoding: [0x76,A] +; PIC-NEXT: # fixup A - offset: 1, value: .LBB0_1-1, kind: FK_PCRel_1 +; PIC-NEXT: # %bb.2: # %f +; PIC-NEXT: popq %rax # encoding: [0x58] +; PIC-NEXT: .cfi_def_cfa_offset 8 +; PIC-NEXT: retq # encoding: [0xc3] +; PIC-NEXT: .LBB0_1: # %t +; PIC-NEXT: .cfi_def_cfa_offset 16 ; PIC-NEXT: callq f@PLT # encoding: [0xe8,A,A,A,A] ; PIC-NEXT: # fixup A - offset: 1, value: f@PLT-4, kind: FK_PCRel_4 -; PIC-NEXT: .LBB0_2: # %f ; PIC-NEXT: popq %rax # encoding: [0x58] ; PIC-NEXT: .cfi_def_cfa_offset 8 ; PIC-NEXT: retq # encoding: [0xc3] @@ -60,12 +68,16 @@ ; NOPIC-NEXT: .cfi_def_cfa_offset 16 ; NOPIC-NEXT: cmpq $cmp32, %rdi # encoding: [0x48,0x81,0xff,A,A,A,A] ; NOPIC-NEXT: # fixup A - offset: 3, value: cmp32, kind: reloc_signed_4byte -; NOPIC-NEXT: ja .LBB1_2 # encoding: [0x77,A] -; NOPIC-NEXT: # fixup A - offset: 1, value: .LBB1_2-1, kind: FK_PCRel_1 -; NOPIC-NEXT: # %bb.1: # %t +; NOPIC-NEXT: jbe .LBB1_1 # encoding: [0x76,A] +; NOPIC-NEXT: # fixup A - offset: 1, value: .LBB1_1-1, kind: FK_PCRel_1 +; NOPIC-NEXT: # %bb.2: # %f +; NOPIC-NEXT: popq %rax # encoding: [0x58] +; NOPIC-NEXT: .cfi_def_cfa_offset 8 +; NOPIC-NEXT: retq # encoding: [0xc3] +; NOPIC-NEXT: .LBB1_1: # %t +; NOPIC-NEXT: .cfi_def_cfa_offset 16 ; NOPIC-NEXT: callq f # encoding: [0xe8,A,A,A,A] ; NOPIC-NEXT: # fixup A - offset: 1, value: f-4, kind: reloc_branch_4byte_pcrel -; NOPIC-NEXT: .LBB1_2: # %f ; NOPIC-NEXT: popq %rax # encoding: [0x58] ; NOPIC-NEXT: .cfi_def_cfa_offset 8 ; NOPIC-NEXT: retq # encoding: [0xc3] @@ -76,12 +88,16 @@ ; PIC-NEXT: .cfi_def_cfa_offset 16 ; PIC-NEXT: cmpq $cmp32, %rdi # encoding: [0x48,0x81,0xff,A,A,A,A] ; PIC-NEXT: # fixup A - offset: 3, value: cmp32, kind: reloc_signed_4byte -; PIC-NEXT: ja .LBB1_2 # encoding: [0x77,A] -; PIC-NEXT: # fixup A - offset: 1, value: .LBB1_2-1, kind: FK_PCRel_1 -; PIC-NEXT: # %bb.1: # %t +; PIC-NEXT: jbe .LBB1_1 # encoding: [0x76,A] +; PIC-NEXT: # fixup A - offset: 1, value: .LBB1_1-1, kind: FK_PCRel_1 +; PIC-NEXT: # %bb.2: # %f +; PIC-NEXT: popq %rax # encoding: [0x58] +; PIC-NEXT: .cfi_def_cfa_offset 8 +; PIC-NEXT: retq # encoding: [0xc3] +; PIC-NEXT: .LBB1_1: # %t +; PIC-NEXT: .cfi_def_cfa_offset 16 ; PIC-NEXT: callq f@PLT # encoding: [0xe8,A,A,A,A] ; PIC-NEXT: # fixup A - offset: 1, value: f@PLT-4, kind: FK_PCRel_4 -; PIC-NEXT: .LBB1_2: # %f ; PIC-NEXT: popq %rax # encoding: [0x58] ; PIC-NEXT: .cfi_def_cfa_offset 8 ; PIC-NEXT: retq # encoding: [0xc3] diff --git a/llvm/test/CodeGen/X86/atomic-flags.ll b/llvm/test/CodeGen/X86/atomic-flags.ll --- a/llvm/test/CodeGen/X86/atomic-flags.ll +++ b/llvm/test/CodeGen/X86/atomic-flags.ll @@ -1,20 +1,55 @@ -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -verify-machineinstrs | FileCheck %s -; RUN: llc < %s -mtriple=i686-unknown-unknown -verify-machineinstrs | FileCheck %s +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -verify-machineinstrs | FileCheck %s --check-prefixes=CHECK,X86-64 +; RUN: llc < %s -mtriple=i686-unknown-unknown -verify-machineinstrs | FileCheck %s --check-prefixes=CHECK,X86-32 ; Make sure that flags are properly preserved despite atomic optimizations. define i32 @atomic_and_flags_1(i8* %p, i32 %a, i32 %b) { -; CHECK-LABEL: atomic_and_flags_1: - ; Generate flags value, and use it. - ; CHECK: cmpl - ; CHECK-NEXT: jne +; X86-64-LABEL: atomic_and_flags_1: +; X86-64: # %bb.0: +; X86-64-NEXT: cmpl %edx, %esi +; X86-64-NEXT: je .LBB0_1 +; X86-64-NEXT: # %bb.3: # %L2 +; X86-64-NEXT: movl $2, %eax +; X86-64-NEXT: retq +; X86-64-NEXT: .LBB0_1: # %L1 +; X86-64-NEXT: incb (%rdi) +; X86-64-NEXT: cmpl %edx, %esi +; X86-64-NEXT: je .LBB0_4 +; X86-64-NEXT: # %bb.2: # %L4 +; X86-64-NEXT: movl $4, %eax +; X86-64-NEXT: retq +; X86-64-NEXT: .LBB0_4: # %L3 +; X86-64-NEXT: movl $3, %eax +; X86-64-NEXT: retq +; +; X86-32-LABEL: atomic_and_flags_1: +; X86-32: # %bb.0: +; X86-32-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-32-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-32-NEXT: cmpl %eax, %ecx +; X86-32-NEXT: je .LBB0_1 +; X86-32-NEXT: # %bb.3: # %L2 +; X86-32-NEXT: movl $2, %eax +; X86-32-NEXT: retl +; X86-32-NEXT: .LBB0_1: # %L1 +; X86-32-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-32-NEXT: incb (%edx) +; X86-32-NEXT: cmpl %eax, %ecx +; X86-32-NEXT: je .LBB0_4 +; X86-32-NEXT: # %bb.2: # %L4 +; X86-32-NEXT: movl $4, %eax +; X86-32-NEXT: retl +; X86-32-NEXT: .LBB0_4: # %L3 +; X86-32-NEXT: movl $3, %eax +; X86-32-NEXT: retl %cmp = icmp eq i32 %a, %b br i1 %cmp, label %L1, label %L2 L1: ; The following pattern will get folded. - ; CHECK: incb + %1 = load atomic i8, i8* %p seq_cst, align 1 %2 = add i8 %1, 1 ; This forces the INC instruction to be generated. store atomic i8 %2, i8* %p release, align 1 @@ -23,8 +58,7 @@ ; somehow. This test checks that cmpl gets emitted again, but any ; rematerialization would work (the optimizer used to clobber the flags with ; the add). - ; CHECK-NEXT: cmpl - ; CHECK-NEXT: jne + br i1 %cmp, label %L3, label %L4 L2: @@ -39,18 +73,51 @@ ; Same as above, but using 2 as immediate to avoid the INC instruction. define i32 @atomic_and_flags_2(i8* %p, i32 %a, i32 %b) { -; CHECK-LABEL: atomic_and_flags_2: - ; CHECK: cmpl - ; CHECK-NEXT: jne +; X86-64-LABEL: atomic_and_flags_2: +; X86-64: # %bb.0: +; X86-64-NEXT: cmpl %edx, %esi +; X86-64-NEXT: je .LBB1_1 +; X86-64-NEXT: # %bb.3: # %L2 +; X86-64-NEXT: movl $2, %eax +; X86-64-NEXT: retq +; X86-64-NEXT: .LBB1_1: # %L1 +; X86-64-NEXT: addb $2, (%rdi) +; X86-64-NEXT: cmpl %edx, %esi +; X86-64-NEXT: je .LBB1_4 +; X86-64-NEXT: # %bb.2: # %L4 +; X86-64-NEXT: movl $4, %eax +; X86-64-NEXT: retq +; X86-64-NEXT: .LBB1_4: # %L3 +; X86-64-NEXT: movl $3, %eax +; X86-64-NEXT: retq +; +; X86-32-LABEL: atomic_and_flags_2: +; X86-32: # %bb.0: +; X86-32-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-32-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-32-NEXT: cmpl %eax, %ecx +; X86-32-NEXT: je .LBB1_1 +; X86-32-NEXT: # %bb.3: # %L2 +; X86-32-NEXT: movl $2, %eax +; X86-32-NEXT: retl +; X86-32-NEXT: .LBB1_1: # %L1 +; X86-32-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-32-NEXT: addb $2, (%edx) +; X86-32-NEXT: cmpl %eax, %ecx +; X86-32-NEXT: je .LBB1_4 +; X86-32-NEXT: # %bb.2: # %L4 +; X86-32-NEXT: movl $4, %eax +; X86-32-NEXT: retl +; X86-32-NEXT: .LBB1_4: # %L3 +; X86-32-NEXT: movl $3, %eax +; X86-32-NEXT: retl %cmp = icmp eq i32 %a, %b br i1 %cmp, label %L1, label %L2 L1: - ; CHECK: addb %1 = load atomic i8, i8* %p seq_cst, align 1 %2 = add i8 %1, 2 store atomic i8 %2, i8* %p release, align 1 - ; CHECK-NEXT: cmpl - ; CHECK-NEXT: jne + br i1 %cmp, label %L3, label %L4 L2: ret i32 2 diff --git a/llvm/test/CodeGen/X86/atomic-unordered.ll b/llvm/test/CodeGen/X86/atomic-unordered.ll --- a/llvm/test/CodeGen/X86/atomic-unordered.ll +++ b/llvm/test/CodeGen/X86/atomic-unordered.ll @@ -2716,24 +2716,24 @@ ; CHECK-O3-CUR-NEXT: movl (%rdi), %eax ; CHECK-O3-CUR-NEXT: mfence ; CHECK-O3-CUR-NEXT: cmpl %eax, %esi -; CHECK-O3-CUR-NEXT: jne .LBB116_2 -; CHECK-O3-CUR-NEXT: # %bb.1: # %taken -; CHECK-O3-CUR-NEXT: movb $1, %al -; CHECK-O3-CUR-NEXT: retq -; CHECK-O3-CUR-NEXT: .LBB116_2: # %untaken +; CHECK-O3-CUR-NEXT: je .LBB116_1 +; CHECK-O3-CUR-NEXT: # %bb.2: # %untaken ; CHECK-O3-CUR-NEXT: xorl %eax, %eax ; CHECK-O3-CUR-NEXT: retq +; CHECK-O3-CUR-NEXT: .LBB116_1: # %taken +; CHECK-O3-CUR-NEXT: movb $1, %al +; CHECK-O3-CUR-NEXT: retq ; ; CHECK-O3-EX-LABEL: fold_cmp_over_fence: ; CHECK-O3-EX: # %bb.0: ; CHECK-O3-EX-NEXT: cmpl (%rdi), %esi ; CHECK-O3-EX-NEXT: mfence -; CHECK-O3-EX-NEXT: jne .LBB116_2 -; CHECK-O3-EX-NEXT: # %bb.1: # %taken -; CHECK-O3-EX-NEXT: movb $1, %al -; CHECK-O3-EX-NEXT: retq -; CHECK-O3-EX-NEXT: .LBB116_2: # %untaken +; CHECK-O3-EX-NEXT: je .LBB116_1 +; CHECK-O3-EX-NEXT: # %bb.2: # %untaken ; CHECK-O3-EX-NEXT: xorl %eax, %eax +; CHECK-O3-EX-NEXT: retq +; CHECK-O3-EX-NEXT: .LBB116_1: # %taken +; CHECK-O3-EX-NEXT: movb $1, %al ; CHECK-O3-EX-NEXT: retq %v2 = load atomic i32, i32* %p unordered, align 4 fence seq_cst diff --git a/llvm/test/CodeGen/X86/bmi.ll b/llvm/test/CodeGen/X86/bmi.ll --- a/llvm/test/CodeGen/X86/bmi.ll +++ b/llvm/test/CodeGen/X86/bmi.ll @@ -1249,20 +1249,20 @@ ; X86-LABEL: pr42118_i32: ; X86: # %bb.0: ; X86-NEXT: blsrl {{[0-9]+}}(%esp), %eax -; X86-NEXT: jne .LBB50_1 -; X86-NEXT: # %bb.2: -; X86-NEXT: jmp bar # TAILCALL -; X86-NEXT: .LBB50_1: +; X86-NEXT: je .LBB50_2 +; X86-NEXT: # %bb.1: ; X86-NEXT: retl +; X86-NEXT: .LBB50_2: +; X86-NEXT: jmp bar # TAILCALL ; ; X64-LABEL: pr42118_i32: ; X64: # %bb.0: ; X64-NEXT: blsrl %edi, %eax -; X64-NEXT: jne .LBB50_1 -; X64-NEXT: # %bb.2: -; X64-NEXT: jmp bar # TAILCALL -; X64-NEXT: .LBB50_1: +; X64-NEXT: je .LBB50_2 +; X64-NEXT: # %bb.1: ; X64-NEXT: retq +; X64-NEXT: .LBB50_2: +; X64-NEXT: jmp bar # TAILCALL %tmp = sub i32 0, %x %tmp1 = and i32 %tmp, %x %cmp = icmp eq i32 %tmp1, %x @@ -1289,25 +1289,25 @@ ; X86-NEXT: andl %eax, %edx ; X86-NEXT: andl %ecx, %esi ; X86-NEXT: orl %edx, %esi -; X86-NEXT: jne .LBB51_1 -; X86-NEXT: # %bb.2: +; X86-NEXT: je .LBB51_2 +; X86-NEXT: # %bb.1: ; X86-NEXT: popl %esi ; X86-NEXT: .cfi_def_cfa_offset 4 -; X86-NEXT: jmp bar # TAILCALL -; X86-NEXT: .LBB51_1: +; X86-NEXT: retl +; X86-NEXT: .LBB51_2: ; X86-NEXT: .cfi_def_cfa_offset 8 ; X86-NEXT: popl %esi ; X86-NEXT: .cfi_def_cfa_offset 4 -; X86-NEXT: retl +; X86-NEXT: jmp bar # TAILCALL ; ; X64-LABEL: pr42118_i64: ; X64: # %bb.0: ; X64-NEXT: blsrq %rdi, %rax -; X64-NEXT: jne .LBB51_1 -; X64-NEXT: # %bb.2: -; X64-NEXT: jmp bar # TAILCALL -; X64-NEXT: .LBB51_1: +; X64-NEXT: je .LBB51_2 +; X64-NEXT: # %bb.1: ; X64-NEXT: retq +; X64-NEXT: .LBB51_2: +; X64-NEXT: jmp bar # TAILCALL %tmp = sub i64 0, %x %tmp1 = and i64 %tmp, %x %cmp = icmp eq i64 %tmp1, %x diff --git a/llvm/test/CodeGen/X86/bt.ll b/llvm/test/CodeGen/X86/bt.ll --- a/llvm/test/CodeGen/X86/bt.ll +++ b/llvm/test/CodeGen/X86/bt.ll @@ -611,21 +611,23 @@ ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx ; X86-NEXT: btl %ecx, %eax -; X86-NEXT: jae .LBB16_2 -; X86-NEXT: # %bb.1: # %bb +; X86-NEXT: jb .LBB16_1 +; X86-NEXT: # %bb.2: # %UnifiedReturnBlock +; X86-NEXT: retl +; X86-NEXT: .LBB16_1: # %bb ; X86-NEXT: calll foo -; X86-NEXT: .LBB16_2: # %UnifiedReturnBlock ; X86-NEXT: retl ; ; X64-LABEL: query3: ; X64: # %bb.0: # %entry ; X64-NEXT: btl %esi, %edi -; X64-NEXT: jae .LBB16_2 -; X64-NEXT: # %bb.1: # %bb +; X64-NEXT: jb .LBB16_1 +; X64-NEXT: # %bb.2: # %UnifiedReturnBlock +; X64-NEXT: retq +; X64-NEXT: .LBB16_1: # %bb ; X64-NEXT: pushq %rax ; X64-NEXT: callq foo ; X64-NEXT: popq %rax -; X64-NEXT: .LBB16_2: # %UnifiedReturnBlock ; X64-NEXT: retq entry: %tmp29 = shl i32 1, %n @@ -647,21 +649,23 @@ ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx ; X86-NEXT: btl %ecx, %eax -; X86-NEXT: jae .LBB17_2 -; X86-NEXT: # %bb.1: # %bb +; X86-NEXT: jb .LBB17_1 +; X86-NEXT: # %bb.2: # %UnifiedReturnBlock +; X86-NEXT: retl +; X86-NEXT: .LBB17_1: # %bb ; X86-NEXT: calll foo -; X86-NEXT: .LBB17_2: # %UnifiedReturnBlock ; X86-NEXT: retl ; ; X64-LABEL: query3b: ; X64: # %bb.0: # %entry ; X64-NEXT: btl %esi, %edi -; X64-NEXT: jae .LBB17_2 -; X64-NEXT: # %bb.1: # %bb +; X64-NEXT: jb .LBB17_1 +; X64-NEXT: # %bb.2: # %UnifiedReturnBlock +; X64-NEXT: retq +; X64-NEXT: .LBB17_1: # %bb ; X64-NEXT: pushq %rax ; X64-NEXT: callq foo ; X64-NEXT: popq %rax -; X64-NEXT: .LBB17_2: # %UnifiedReturnBlock ; X64-NEXT: retq entry: %tmp29 = shl i32 1, %n @@ -683,21 +687,23 @@ ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx ; X86-NEXT: btl %ecx, %eax -; X86-NEXT: jae .LBB18_2 -; X86-NEXT: # %bb.1: # %bb +; X86-NEXT: jb .LBB18_1 +; X86-NEXT: # %bb.2: # %UnifiedReturnBlock +; X86-NEXT: retl +; X86-NEXT: .LBB18_1: # %bb ; X86-NEXT: calll foo -; X86-NEXT: .LBB18_2: # %UnifiedReturnBlock ; X86-NEXT: retl ; ; X64-LABEL: query3x: ; X64: # %bb.0: # %entry ; X64-NEXT: btl %esi, %edi -; X64-NEXT: jae .LBB18_2 -; X64-NEXT: # %bb.1: # %bb +; X64-NEXT: jb .LBB18_1 +; X64-NEXT: # %bb.2: # %UnifiedReturnBlock +; X64-NEXT: retq +; X64-NEXT: .LBB18_1: # %bb ; X64-NEXT: pushq %rax ; X64-NEXT: callq foo ; X64-NEXT: popq %rax -; X64-NEXT: .LBB18_2: # %UnifiedReturnBlock ; X64-NEXT: retq entry: %tmp29 = shl i32 1, %n @@ -719,21 +725,23 @@ ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx ; X86-NEXT: btl %ecx, %eax -; X86-NEXT: jae .LBB19_2 -; X86-NEXT: # %bb.1: # %bb +; X86-NEXT: jb .LBB19_1 +; X86-NEXT: # %bb.2: # %UnifiedReturnBlock +; X86-NEXT: retl +; X86-NEXT: .LBB19_1: # %bb ; X86-NEXT: calll foo -; X86-NEXT: .LBB19_2: # %UnifiedReturnBlock ; X86-NEXT: retl ; ; X64-LABEL: query3bx: ; X64: # %bb.0: # %entry ; X64-NEXT: btl %esi, %edi -; X64-NEXT: jae .LBB19_2 -; X64-NEXT: # %bb.1: # %bb +; X64-NEXT: jb .LBB19_1 +; X64-NEXT: # %bb.2: # %UnifiedReturnBlock +; X64-NEXT: retq +; X64-NEXT: .LBB19_1: # %bb ; X64-NEXT: pushq %rax ; X64-NEXT: callq foo ; X64-NEXT: popq %rax -; X64-NEXT: .LBB19_2: # %UnifiedReturnBlock ; X64-NEXT: retq entry: %tmp29 = shl i32 1, %n diff --git a/llvm/test/CodeGen/X86/conditional-tailcall-pgso.ll b/llvm/test/CodeGen/X86/conditional-tailcall-pgso.ll --- a/llvm/test/CodeGen/X86/conditional-tailcall-pgso.ll +++ b/llvm/test/CodeGen/X86/conditional-tailcall-pgso.ll @@ -63,21 +63,21 @@ ; CHECK32-NEXT: #APP ; CHECK32-NEXT: #NO_APP ; CHECK32-NEXT: cmpl {{[0-9]+}}(%esp), %eax # encoding: [0x3b,0x44,0x24,0x0c] -; CHECK32-NEXT: jne .LBB1_2 # encoding: [0x75,A] -; CHECK32-NEXT: # fixup A - offset: 1, value: .LBB1_2-1, kind: FK_PCRel_1 -; CHECK32-NEXT: # %bb.1: # %bb1 +; CHECK32-NEXT: je .LBB1_1 # encoding: [0x74,A] +; CHECK32-NEXT: # fixup A - offset: 1, value: .LBB1_1-1, kind: FK_PCRel_1 +; CHECK32-NEXT: # %bb.2: # %bb2 ; CHECK32-NEXT: popl %ebx # encoding: [0x5b] ; CHECK32-NEXT: .cfi_def_cfa_offset 4 -; CHECK32-NEXT: jmp foo # TAILCALL +; CHECK32-NEXT: jmp bar # TAILCALL ; CHECK32-NEXT: # encoding: [0xeb,A] -; CHECK32-NEXT: # fixup A - offset: 1, value: foo-1, kind: FK_PCRel_1 -; CHECK32-NEXT: .LBB1_2: # %bb2 +; CHECK32-NEXT: # fixup A - offset: 1, value: bar-1, kind: FK_PCRel_1 +; CHECK32-NEXT: .LBB1_1: # %bb1 ; CHECK32-NEXT: .cfi_def_cfa_offset 8 ; CHECK32-NEXT: popl %ebx # encoding: [0x5b] ; CHECK32-NEXT: .cfi_def_cfa_offset 4 -; CHECK32-NEXT: jmp bar # TAILCALL +; CHECK32-NEXT: jmp foo # TAILCALL ; CHECK32-NEXT: # encoding: [0xeb,A] -; CHECK32-NEXT: # fixup A - offset: 1, value: bar-1, kind: FK_PCRel_1 +; CHECK32-NEXT: # fixup A - offset: 1, value: foo-1, kind: FK_PCRel_1 ; ; CHECK64-LABEL: f_non_leaf: ; CHECK64: # %bb.0: # %entry @@ -87,21 +87,21 @@ ; CHECK64-NEXT: #APP ; CHECK64-NEXT: #NO_APP ; CHECK64-NEXT: cmpl %esi, %edi # encoding: [0x39,0xf7] -; CHECK64-NEXT: jne .LBB1_2 # encoding: [0x75,A] -; CHECK64-NEXT: # fixup A - offset: 1, value: .LBB1_2-1, kind: FK_PCRel_1 -; CHECK64-NEXT: # %bb.1: # %bb1 +; CHECK64-NEXT: je .LBB1_1 # encoding: [0x74,A] +; CHECK64-NEXT: # fixup A - offset: 1, value: .LBB1_1-1, kind: FK_PCRel_1 +; CHECK64-NEXT: # %bb.2: # %bb2 ; CHECK64-NEXT: popq %rbx # encoding: [0x5b] ; CHECK64-NEXT: .cfi_def_cfa_offset 8 -; CHECK64-NEXT: jmp foo # TAILCALL +; CHECK64-NEXT: jmp bar # TAILCALL ; CHECK64-NEXT: # encoding: [0xeb,A] -; CHECK64-NEXT: # fixup A - offset: 1, value: foo-1, kind: FK_PCRel_1 -; CHECK64-NEXT: .LBB1_2: # %bb2 +; CHECK64-NEXT: # fixup A - offset: 1, value: bar-1, kind: FK_PCRel_1 +; CHECK64-NEXT: .LBB1_1: # %bb1 ; CHECK64-NEXT: .cfi_def_cfa_offset 16 ; CHECK64-NEXT: popq %rbx # encoding: [0x5b] ; CHECK64-NEXT: .cfi_def_cfa_offset 8 -; CHECK64-NEXT: jmp bar # TAILCALL +; CHECK64-NEXT: jmp foo # TAILCALL ; CHECK64-NEXT: # encoding: [0xeb,A] -; CHECK64-NEXT: # fixup A - offset: 1, value: bar-1, kind: FK_PCRel_1 +; CHECK64-NEXT: # fixup A - offset: 1, value: foo-1, kind: FK_PCRel_1 ; ; WIN64-LABEL: f_non_leaf: ; WIN64: # %bb.0: # %entry @@ -111,19 +111,19 @@ ; WIN64-NEXT: #APP ; WIN64-NEXT: #NO_APP ; WIN64-NEXT: cmpl %edx, %ecx # encoding: [0x39,0xd1] -; WIN64-NEXT: jne .LBB1_2 # encoding: [0x75,A] -; WIN64-NEXT: # fixup A - offset: 1, value: .LBB1_2-1, kind: FK_PCRel_1 -; WIN64-NEXT: # %bb.1: # %bb1 +; WIN64-NEXT: je .LBB1_1 # encoding: [0x74,A] +; WIN64-NEXT: # fixup A - offset: 1, value: .LBB1_1-1, kind: FK_PCRel_1 +; WIN64-NEXT: # %bb.2: # %bb2 ; WIN64-NEXT: popq %rbx # encoding: [0x5b] -; WIN64-NEXT: jmp foo # TAILCALL +; WIN64-NEXT: jmp bar # TAILCALL ; WIN64-NEXT: # encoding: [0xeb,A] -; WIN64-NEXT: # fixup A - offset: 1, value: foo-1, kind: FK_PCRel_1 -; WIN64-NEXT: .LBB1_2: # %bb2 +; WIN64-NEXT: # fixup A - offset: 1, value: bar-1, kind: FK_PCRel_1 +; WIN64-NEXT: .LBB1_1: # %bb1 ; WIN64-NEXT: nop # encoding: [0x90] ; WIN64-NEXT: popq %rbx # encoding: [0x5b] -; WIN64-NEXT: jmp bar # TAILCALL +; WIN64-NEXT: jmp foo # TAILCALL ; WIN64-NEXT: # encoding: [0xeb,A] -; WIN64-NEXT: # fixup A - offset: 1, value: bar-1, kind: FK_PCRel_1 +; WIN64-NEXT: # fixup A - offset: 1, value: foo-1, kind: FK_PCRel_1 ; WIN64-NEXT: .seh_handlerdata ; WIN64-NEXT: .text ; WIN64-NEXT: .seh_endproc diff --git a/llvm/test/CodeGen/X86/conditional-tailcall.ll b/llvm/test/CodeGen/X86/conditional-tailcall.ll --- a/llvm/test/CodeGen/X86/conditional-tailcall.ll +++ b/llvm/test/CodeGen/X86/conditional-tailcall.ll @@ -63,21 +63,21 @@ ; CHECK32-NEXT: #APP ; CHECK32-NEXT: #NO_APP ; CHECK32-NEXT: cmpl {{[0-9]+}}(%esp), %eax # encoding: [0x3b,0x44,0x24,0x0c] -; CHECK32-NEXT: jne .LBB1_2 # encoding: [0x75,A] -; CHECK32-NEXT: # fixup A - offset: 1, value: .LBB1_2-1, kind: FK_PCRel_1 -; CHECK32-NEXT: # %bb.1: # %bb1 +; CHECK32-NEXT: je .LBB1_1 # encoding: [0x74,A] +; CHECK32-NEXT: # fixup A - offset: 1, value: .LBB1_1-1, kind: FK_PCRel_1 +; CHECK32-NEXT: # %bb.2: # %bb2 ; CHECK32-NEXT: popl %ebx # encoding: [0x5b] ; CHECK32-NEXT: .cfi_def_cfa_offset 4 -; CHECK32-NEXT: jmp foo # TAILCALL +; CHECK32-NEXT: jmp bar # TAILCALL ; CHECK32-NEXT: # encoding: [0xeb,A] -; CHECK32-NEXT: # fixup A - offset: 1, value: foo-1, kind: FK_PCRel_1 -; CHECK32-NEXT: .LBB1_2: # %bb2 +; CHECK32-NEXT: # fixup A - offset: 1, value: bar-1, kind: FK_PCRel_1 +; CHECK32-NEXT: .LBB1_1: # %bb1 ; CHECK32-NEXT: .cfi_def_cfa_offset 8 ; CHECK32-NEXT: popl %ebx # encoding: [0x5b] ; CHECK32-NEXT: .cfi_def_cfa_offset 4 -; CHECK32-NEXT: jmp bar # TAILCALL +; CHECK32-NEXT: jmp foo # TAILCALL ; CHECK32-NEXT: # encoding: [0xeb,A] -; CHECK32-NEXT: # fixup A - offset: 1, value: bar-1, kind: FK_PCRel_1 +; CHECK32-NEXT: # fixup A - offset: 1, value: foo-1, kind: FK_PCRel_1 ; ; CHECK64-LABEL: f_non_leaf: ; CHECK64: # %bb.0: # %entry @@ -87,21 +87,21 @@ ; CHECK64-NEXT: #APP ; CHECK64-NEXT: #NO_APP ; CHECK64-NEXT: cmpl %esi, %edi # encoding: [0x39,0xf7] -; CHECK64-NEXT: jne .LBB1_2 # encoding: [0x75,A] -; CHECK64-NEXT: # fixup A - offset: 1, value: .LBB1_2-1, kind: FK_PCRel_1 -; CHECK64-NEXT: # %bb.1: # %bb1 +; CHECK64-NEXT: je .LBB1_1 # encoding: [0x74,A] +; CHECK64-NEXT: # fixup A - offset: 1, value: .LBB1_1-1, kind: FK_PCRel_1 +; CHECK64-NEXT: # %bb.2: # %bb2 ; CHECK64-NEXT: popq %rbx # encoding: [0x5b] ; CHECK64-NEXT: .cfi_def_cfa_offset 8 -; CHECK64-NEXT: jmp foo # TAILCALL +; CHECK64-NEXT: jmp bar # TAILCALL ; CHECK64-NEXT: # encoding: [0xeb,A] -; CHECK64-NEXT: # fixup A - offset: 1, value: foo-1, kind: FK_PCRel_1 -; CHECK64-NEXT: .LBB1_2: # %bb2 +; CHECK64-NEXT: # fixup A - offset: 1, value: bar-1, kind: FK_PCRel_1 +; CHECK64-NEXT: .LBB1_1: # %bb1 ; CHECK64-NEXT: .cfi_def_cfa_offset 16 ; CHECK64-NEXT: popq %rbx # encoding: [0x5b] ; CHECK64-NEXT: .cfi_def_cfa_offset 8 -; CHECK64-NEXT: jmp bar # TAILCALL +; CHECK64-NEXT: jmp foo # TAILCALL ; CHECK64-NEXT: # encoding: [0xeb,A] -; CHECK64-NEXT: # fixup A - offset: 1, value: bar-1, kind: FK_PCRel_1 +; CHECK64-NEXT: # fixup A - offset: 1, value: foo-1, kind: FK_PCRel_1 ; ; WIN64-LABEL: f_non_leaf: ; WIN64: # %bb.0: # %entry @@ -111,19 +111,19 @@ ; WIN64-NEXT: #APP ; WIN64-NEXT: #NO_APP ; WIN64-NEXT: cmpl %edx, %ecx # encoding: [0x39,0xd1] -; WIN64-NEXT: jne .LBB1_2 # encoding: [0x75,A] -; WIN64-NEXT: # fixup A - offset: 1, value: .LBB1_2-1, kind: FK_PCRel_1 -; WIN64-NEXT: # %bb.1: # %bb1 +; WIN64-NEXT: je .LBB1_1 # encoding: [0x74,A] +; WIN64-NEXT: # fixup A - offset: 1, value: .LBB1_1-1, kind: FK_PCRel_1 +; WIN64-NEXT: # %bb.2: # %bb2 ; WIN64-NEXT: popq %rbx # encoding: [0x5b] -; WIN64-NEXT: jmp foo # TAILCALL +; WIN64-NEXT: jmp bar # TAILCALL ; WIN64-NEXT: # encoding: [0xeb,A] -; WIN64-NEXT: # fixup A - offset: 1, value: foo-1, kind: FK_PCRel_1 -; WIN64-NEXT: .LBB1_2: # %bb2 +; WIN64-NEXT: # fixup A - offset: 1, value: bar-1, kind: FK_PCRel_1 +; WIN64-NEXT: .LBB1_1: # %bb1 ; WIN64-NEXT: nop # encoding: [0x90] ; WIN64-NEXT: popq %rbx # encoding: [0x5b] -; WIN64-NEXT: jmp bar # TAILCALL +; WIN64-NEXT: jmp foo # TAILCALL ; WIN64-NEXT: # encoding: [0xeb,A] -; WIN64-NEXT: # fixup A - offset: 1, value: bar-1, kind: FK_PCRel_1 +; WIN64-NEXT: # fixup A - offset: 1, value: foo-1, kind: FK_PCRel_1 ; WIN64-NEXT: .seh_handlerdata ; WIN64-NEXT: .text ; WIN64-NEXT: .seh_endproc diff --git a/llvm/test/CodeGen/X86/fast-isel-cmp-branch2.ll b/llvm/test/CodeGen/X86/fast-isel-cmp-branch2.ll --- a/llvm/test/CodeGen/X86/fast-isel-cmp-branch2.ll +++ b/llvm/test/CodeGen/X86/fast-isel-cmp-branch2.ll @@ -1,11 +1,19 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc < %s -mtriple=x86_64-apple-darwin10 | FileCheck %s ; RUN: llc < %s -fast-isel -fast-isel-abort=1 -mtriple=x86_64-apple-darwin10 | FileCheck %s define i32 @fcmp_oeq(float %x, float %y) { -; CHECK-LABEL: fcmp_oeq -; CHECK: ucomiss %xmm1, %xmm0 -; CHECK-NEXT: jne {{LBB.+_1}} -; CHECK-NEXT: jp {{LBB.+_1}} +; CHECK-LABEL: fcmp_oeq: +; CHECK: ## %bb.0: +; CHECK-NEXT: ucomiss %xmm1, %xmm0 +; CHECK-NEXT: jne LBB0_1 +; CHECK-NEXT: jp LBB0_1 +; CHECK-NEXT: ## %bb.2: ## %bb1 +; CHECK-NEXT: xorl %eax, %eax +; CHECK-NEXT: retq +; CHECK-NEXT: LBB0_1: ## %bb2 +; CHECK-NEXT: movl $1, %eax +; CHECK-NEXT: retq %1 = fcmp oeq float %x, %y br i1 %1, label %bb1, label %bb2 bb2: @@ -15,9 +23,16 @@ } define i32 @fcmp_ogt(float %x, float %y) { -; CHECK-LABEL: fcmp_ogt -; CHECK: ucomiss %xmm1, %xmm0 -; CHECK-NEXT: jbe {{LBB.+_1}} +; CHECK-LABEL: fcmp_ogt: +; CHECK: ## %bb.0: +; CHECK-NEXT: ucomiss %xmm1, %xmm0 +; CHECK-NEXT: jbe LBB1_1 +; CHECK-NEXT: ## %bb.2: ## %bb1 +; CHECK-NEXT: xorl %eax, %eax +; CHECK-NEXT: retq +; CHECK-NEXT: LBB1_1: ## %bb2 +; CHECK-NEXT: movl $1, %eax +; CHECK-NEXT: retq %1 = fcmp ogt float %x, %y br i1 %1, label %bb1, label %bb2 bb2: @@ -27,9 +42,16 @@ } define i32 @fcmp_oge(float %x, float %y) { -; CHECK-LABEL: fcmp_oge -; CHECK: ucomiss %xmm1, %xmm0 -; CHECK-NEXT: jb {{LBB.+_1}} +; CHECK-LABEL: fcmp_oge: +; CHECK: ## %bb.0: +; CHECK-NEXT: ucomiss %xmm1, %xmm0 +; CHECK-NEXT: jb LBB2_1 +; CHECK-NEXT: ## %bb.2: ## %bb1 +; CHECK-NEXT: xorl %eax, %eax +; CHECK-NEXT: retq +; CHECK-NEXT: LBB2_1: ## %bb2 +; CHECK-NEXT: movl $1, %eax +; CHECK-NEXT: retq %1 = fcmp oge float %x, %y br i1 %1, label %bb1, label %bb2 bb2: @@ -39,9 +61,16 @@ } define i32 @fcmp_olt(float %x, float %y) { -; CHECK-LABEL: fcmp_olt -; CHECK: ucomiss %xmm0, %xmm1 -; CHECK-NEXT: jbe {{LBB.+_1}} +; CHECK-LABEL: fcmp_olt: +; CHECK: ## %bb.0: +; CHECK-NEXT: ucomiss %xmm0, %xmm1 +; CHECK-NEXT: jbe LBB3_1 +; CHECK-NEXT: ## %bb.2: ## %bb1 +; CHECK-NEXT: xorl %eax, %eax +; CHECK-NEXT: retq +; CHECK-NEXT: LBB3_1: ## %bb2 +; CHECK-NEXT: movl $1, %eax +; CHECK-NEXT: retq %1 = fcmp olt float %x, %y br i1 %1, label %bb1, label %bb2 bb2: @@ -51,9 +80,16 @@ } define i32 @fcmp_ole(float %x, float %y) { -; CHECK-LABEL: fcmp_ole -; CHECK: ucomiss %xmm0, %xmm1 -; CHECK-NEXT: jb {{LBB.+_1}} +; CHECK-LABEL: fcmp_ole: +; CHECK: ## %bb.0: +; CHECK-NEXT: ucomiss %xmm0, %xmm1 +; CHECK-NEXT: jb LBB4_1 +; CHECK-NEXT: ## %bb.2: ## %bb1 +; CHECK-NEXT: xorl %eax, %eax +; CHECK-NEXT: retq +; CHECK-NEXT: LBB4_1: ## %bb2 +; CHECK-NEXT: movl $1, %eax +; CHECK-NEXT: retq %1 = fcmp ole float %x, %y br i1 %1, label %bb1, label %bb2 bb2: @@ -63,9 +99,16 @@ } define i32 @fcmp_one(float %x, float %y) { -; CHECK-LABEL: fcmp_one -; CHECK: ucomiss %xmm1, %xmm0 -; CHECK-NEXT: je {{LBB.+_1}} +; CHECK-LABEL: fcmp_one: +; CHECK: ## %bb.0: +; CHECK-NEXT: ucomiss %xmm1, %xmm0 +; CHECK-NEXT: je LBB5_1 +; CHECK-NEXT: ## %bb.2: ## %bb1 +; CHECK-NEXT: xorl %eax, %eax +; CHECK-NEXT: retq +; CHECK-NEXT: LBB5_1: ## %bb2 +; CHECK-NEXT: movl $1, %eax +; CHECK-NEXT: retq %1 = fcmp one float %x, %y br i1 %1, label %bb1, label %bb2 bb2: @@ -75,9 +118,16 @@ } define i32 @fcmp_ord(float %x, float %y) { -; CHECK-LABEL: fcmp_ord -; CHECK: ucomiss %xmm1, %xmm0 -; CHECK-NEXT: jp {{LBB.+_1}} +; CHECK-LABEL: fcmp_ord: +; CHECK: ## %bb.0: +; CHECK-NEXT: ucomiss %xmm1, %xmm0 +; CHECK-NEXT: jp LBB6_1 +; CHECK-NEXT: ## %bb.2: ## %bb1 +; CHECK-NEXT: xorl %eax, %eax +; CHECK-NEXT: retq +; CHECK-NEXT: LBB6_1: ## %bb2 +; CHECK-NEXT: movl $1, %eax +; CHECK-NEXT: retq %1 = fcmp ord float %x, %y br i1 %1, label %bb1, label %bb2 bb2: @@ -87,9 +137,16 @@ } define i32 @fcmp_uno(float %x, float %y) { -; CHECK-LABEL: fcmp_uno -; CHECK: ucomiss %xmm1, %xmm0 -; CHECK-NEXT: jp {{LBB.+_2}} +; CHECK-LABEL: fcmp_uno: +; CHECK: ## %bb.0: +; CHECK-NEXT: ucomiss %xmm1, %xmm0 +; CHECK-NEXT: jp LBB7_2 +; CHECK-NEXT: ## %bb.1: ## %bb2 +; CHECK-NEXT: movl $1, %eax +; CHECK-NEXT: retq +; CHECK-NEXT: LBB7_2: ## %bb1 +; CHECK-NEXT: xorl %eax, %eax +; CHECK-NEXT: retq %1 = fcmp uno float %x, %y br i1 %1, label %bb1, label %bb2 bb2: @@ -99,9 +156,16 @@ } define i32 @fcmp_ueq(float %x, float %y) { -; CHECK-LABEL: fcmp_ueq -; CHECK: ucomiss %xmm1, %xmm0 -; CHECK-NEXT: je {{LBB.+_2}} +; CHECK-LABEL: fcmp_ueq: +; CHECK: ## %bb.0: +; CHECK-NEXT: ucomiss %xmm1, %xmm0 +; CHECK-NEXT: je LBB8_2 +; CHECK-NEXT: ## %bb.1: ## %bb2 +; CHECK-NEXT: movl $1, %eax +; CHECK-NEXT: retq +; CHECK-NEXT: LBB8_2: ## %bb1 +; CHECK-NEXT: xorl %eax, %eax +; CHECK-NEXT: retq %1 = fcmp ueq float %x, %y br i1 %1, label %bb1, label %bb2 bb2: @@ -111,9 +175,16 @@ } define i32 @fcmp_ugt(float %x, float %y) { -; CHECK-LABEL: fcmp_ugt -; CHECK: ucomiss %xmm0, %xmm1 -; CHECK-NEXT: jae {{LBB.+_1}} +; CHECK-LABEL: fcmp_ugt: +; CHECK: ## %bb.0: +; CHECK-NEXT: ucomiss %xmm0, %xmm1 +; CHECK-NEXT: jae LBB9_1 +; CHECK-NEXT: ## %bb.2: ## %bb1 +; CHECK-NEXT: xorl %eax, %eax +; CHECK-NEXT: retq +; CHECK-NEXT: LBB9_1: ## %bb2 +; CHECK-NEXT: movl $1, %eax +; CHECK-NEXT: retq %1 = fcmp ugt float %x, %y br i1 %1, label %bb1, label %bb2 bb2: @@ -123,9 +194,16 @@ } define i32 @fcmp_uge(float %x, float %y) { -; CHECK-LABEL: fcmp_uge -; CHECK: ucomiss %xmm0, %xmm1 -; CHECK-NEXT: ja {{LBB.+_1}} +; CHECK-LABEL: fcmp_uge: +; CHECK: ## %bb.0: +; CHECK-NEXT: ucomiss %xmm0, %xmm1 +; CHECK-NEXT: ja LBB10_1 +; CHECK-NEXT: ## %bb.2: ## %bb1 +; CHECK-NEXT: xorl %eax, %eax +; CHECK-NEXT: retq +; CHECK-NEXT: LBB10_1: ## %bb2 +; CHECK-NEXT: movl $1, %eax +; CHECK-NEXT: retq %1 = fcmp uge float %x, %y br i1 %1, label %bb1, label %bb2 bb2: @@ -135,9 +213,16 @@ } define i32 @fcmp_ult(float %x, float %y) { -; CHECK-LABEL: fcmp_ult -; CHECK: ucomiss %xmm1, %xmm0 -; CHECK-NEXT: jae {{LBB.+_1}} +; CHECK-LABEL: fcmp_ult: +; CHECK: ## %bb.0: +; CHECK-NEXT: ucomiss %xmm1, %xmm0 +; CHECK-NEXT: jae LBB11_1 +; CHECK-NEXT: ## %bb.2: ## %bb1 +; CHECK-NEXT: xorl %eax, %eax +; CHECK-NEXT: retq +; CHECK-NEXT: LBB11_1: ## %bb2 +; CHECK-NEXT: movl $1, %eax +; CHECK-NEXT: retq %1 = fcmp ult float %x, %y br i1 %1, label %bb1, label %bb2 bb2: @@ -147,9 +232,16 @@ } define i32 @fcmp_ule(float %x, float %y) { -; CHECK-LABEL: fcmp_ule -; CHECK: ucomiss %xmm1, %xmm0 -; CHECK-NEXT: ja {{LBB.+_1}} +; CHECK-LABEL: fcmp_ule: +; CHECK: ## %bb.0: +; CHECK-NEXT: ucomiss %xmm1, %xmm0 +; CHECK-NEXT: ja LBB12_1 +; CHECK-NEXT: ## %bb.2: ## %bb1 +; CHECK-NEXT: xorl %eax, %eax +; CHECK-NEXT: retq +; CHECK-NEXT: LBB12_1: ## %bb2 +; CHECK-NEXT: movl $1, %eax +; CHECK-NEXT: retq %1 = fcmp ule float %x, %y br i1 %1, label %bb1, label %bb2 bb2: @@ -159,10 +251,17 @@ } define i32 @fcmp_une(float %x, float %y) { -; CHECK-LABEL: fcmp_une -; CHECK: ucomiss %xmm1, %xmm0 -; CHECK-NEXT: jne {{LBB.+_2}} -; CHECK-NEXT: jnp {{LBB.+_1}} +; CHECK-LABEL: fcmp_une: +; CHECK: ## %bb.0: +; CHECK-NEXT: ucomiss %xmm1, %xmm0 +; CHECK-NEXT: jne LBB13_2 +; CHECK-NEXT: jnp LBB13_1 +; CHECK-NEXT: LBB13_2: ## %bb1 +; CHECK-NEXT: xorl %eax, %eax +; CHECK-NEXT: retq +; CHECK-NEXT: LBB13_1: ## %bb2 +; CHECK-NEXT: movl $1, %eax +; CHECK-NEXT: retq %1 = fcmp une float %x, %y br i1 %1, label %bb1, label %bb2 bb2: @@ -172,9 +271,16 @@ } define i32 @icmp_eq(i32 %x, i32 %y) { -; CHECK-LABEL: icmp_eq -; CHECK: cmpl %esi, %edi -; CHECK-NEXT: jne {{LBB.+_1}} +; CHECK-LABEL: icmp_eq: +; CHECK: ## %bb.0: +; CHECK-NEXT: cmpl %esi, %edi +; CHECK-NEXT: je LBB14_2 +; CHECK-NEXT: ## %bb.1: ## %bb2 +; CHECK-NEXT: movl $1, %eax +; CHECK-NEXT: retq +; CHECK-NEXT: LBB14_2: ## %bb1 +; CHECK-NEXT: xorl %eax, %eax +; CHECK-NEXT: retq %1 = icmp eq i32 %x, %y br i1 %1, label %bb1, label %bb2 bb2: @@ -184,9 +290,16 @@ } define i32 @icmp_ne(i32 %x, i32 %y) { -; CHECK-LABEL: icmp_ne -; CHECK: cmpl %esi, %edi -; CHECK-NEXT: je {{LBB.+_1}} +; CHECK-LABEL: icmp_ne: +; CHECK: ## %bb.0: +; CHECK-NEXT: cmpl %esi, %edi +; CHECK-NEXT: je LBB15_1 +; CHECK-NEXT: ## %bb.2: ## %bb1 +; CHECK-NEXT: xorl %eax, %eax +; CHECK-NEXT: retq +; CHECK-NEXT: LBB15_1: ## %bb2 +; CHECK-NEXT: movl $1, %eax +; CHECK-NEXT: retq %1 = icmp ne i32 %x, %y br i1 %1, label %bb1, label %bb2 bb2: @@ -196,9 +309,16 @@ } define i32 @icmp_ugt(i32 %x, i32 %y) { -; CHECK-LABEL: icmp_ugt -; CHECK: cmpl %esi, %edi -; CHECK-NEXT: jbe {{LBB.+_1}} +; CHECK-LABEL: icmp_ugt: +; CHECK: ## %bb.0: +; CHECK-NEXT: cmpl %esi, %edi +; CHECK-NEXT: jbe LBB16_1 +; CHECK-NEXT: ## %bb.2: ## %bb1 +; CHECK-NEXT: xorl %eax, %eax +; CHECK-NEXT: retq +; CHECK-NEXT: LBB16_1: ## %bb2 +; CHECK-NEXT: movl $1, %eax +; CHECK-NEXT: retq %1 = icmp ugt i32 %x, %y br i1 %1, label %bb1, label %bb2 bb2: @@ -208,9 +328,16 @@ } define i32 @icmp_uge(i32 %x, i32 %y) { -; CHECK-LABEL: icmp_uge -; CHECK: cmpl %esi, %edi -; CHECK-NEXT: jb {{LBB.+_1}} +; CHECK-LABEL: icmp_uge: +; CHECK: ## %bb.0: +; CHECK-NEXT: cmpl %esi, %edi +; CHECK-NEXT: jae LBB17_2 +; CHECK-NEXT: ## %bb.1: ## %bb2 +; CHECK-NEXT: movl $1, %eax +; CHECK-NEXT: retq +; CHECK-NEXT: LBB17_2: ## %bb1 +; CHECK-NEXT: xorl %eax, %eax +; CHECK-NEXT: retq %1 = icmp uge i32 %x, %y br i1 %1, label %bb1, label %bb2 bb2: @@ -220,9 +347,16 @@ } define i32 @icmp_ult(i32 %x, i32 %y) { -; CHECK-LABEL: icmp_ult -; CHECK: cmpl %esi, %edi -; CHECK-NEXT: jae {{LBB.+_1}} +; CHECK-LABEL: icmp_ult: +; CHECK: ## %bb.0: +; CHECK-NEXT: cmpl %esi, %edi +; CHECK-NEXT: jae LBB18_1 +; CHECK-NEXT: ## %bb.2: ## %bb1 +; CHECK-NEXT: xorl %eax, %eax +; CHECK-NEXT: retq +; CHECK-NEXT: LBB18_1: ## %bb2 +; CHECK-NEXT: movl $1, %eax +; CHECK-NEXT: retq %1 = icmp ult i32 %x, %y br i1 %1, label %bb1, label %bb2 bb2: @@ -232,9 +366,16 @@ } define i32 @icmp_ule(i32 %x, i32 %y) { -; CHECK-LABEL: icmp_ule -; CHECK: cmpl %esi, %edi -; CHECK-NEXT: ja {{LBB.+_1}} +; CHECK-LABEL: icmp_ule: +; CHECK: ## %bb.0: +; CHECK-NEXT: cmpl %esi, %edi +; CHECK-NEXT: jbe LBB19_2 +; CHECK-NEXT: ## %bb.1: ## %bb2 +; CHECK-NEXT: movl $1, %eax +; CHECK-NEXT: retq +; CHECK-NEXT: LBB19_2: ## %bb1 +; CHECK-NEXT: xorl %eax, %eax +; CHECK-NEXT: retq %1 = icmp ule i32 %x, %y br i1 %1, label %bb1, label %bb2 bb2: @@ -244,9 +385,16 @@ } define i32 @icmp_sgt(i32 %x, i32 %y) { -; CHECK-LABEL: icmp_sgt -; CHECK: cmpl %esi, %edi -; CHECK-NEXT: jle {{LBB.+_1}} +; CHECK-LABEL: icmp_sgt: +; CHECK: ## %bb.0: +; CHECK-NEXT: cmpl %esi, %edi +; CHECK-NEXT: jle LBB20_1 +; CHECK-NEXT: ## %bb.2: ## %bb1 +; CHECK-NEXT: xorl %eax, %eax +; CHECK-NEXT: retq +; CHECK-NEXT: LBB20_1: ## %bb2 +; CHECK-NEXT: movl $1, %eax +; CHECK-NEXT: retq %1 = icmp sgt i32 %x, %y br i1 %1, label %bb1, label %bb2 bb2: @@ -256,9 +404,16 @@ } define i32 @icmp_sge(i32 %x, i32 %y) { -; CHECK-LABEL: icmp_sge -; CHECK: cmpl %esi, %edi -; CHECK-NEXT: jl {{LBB.+_1}} +; CHECK-LABEL: icmp_sge: +; CHECK: ## %bb.0: +; CHECK-NEXT: cmpl %esi, %edi +; CHECK-NEXT: jge LBB21_2 +; CHECK-NEXT: ## %bb.1: ## %bb2 +; CHECK-NEXT: movl $1, %eax +; CHECK-NEXT: retq +; CHECK-NEXT: LBB21_2: ## %bb1 +; CHECK-NEXT: xorl %eax, %eax +; CHECK-NEXT: retq %1 = icmp sge i32 %x, %y br i1 %1, label %bb1, label %bb2 bb2: @@ -268,9 +423,16 @@ } define i32 @icmp_slt(i32 %x, i32 %y) { -; CHECK-LABEL: icmp_slt -; CHECK: cmpl %esi, %edi -; CHECK-NEXT: jge {{LBB.+_1}} +; CHECK-LABEL: icmp_slt: +; CHECK: ## %bb.0: +; CHECK-NEXT: cmpl %esi, %edi +; CHECK-NEXT: jge LBB22_1 +; CHECK-NEXT: ## %bb.2: ## %bb1 +; CHECK-NEXT: xorl %eax, %eax +; CHECK-NEXT: retq +; CHECK-NEXT: LBB22_1: ## %bb2 +; CHECK-NEXT: movl $1, %eax +; CHECK-NEXT: retq %1 = icmp slt i32 %x, %y br i1 %1, label %bb1, label %bb2 bb2: @@ -280,9 +442,16 @@ } define i32 @icmp_sle(i32 %x, i32 %y) { -; CHECK-LABEL: icmp_sle -; CHECK: cmpl %esi, %edi -; CHECK-NEXT: jg {{LBB.+_1}} +; CHECK-LABEL: icmp_sle: +; CHECK: ## %bb.0: +; CHECK-NEXT: cmpl %esi, %edi +; CHECK-NEXT: jle LBB23_2 +; CHECK-NEXT: ## %bb.1: ## %bb2 +; CHECK-NEXT: movl $1, %eax +; CHECK-NEXT: retq +; CHECK-NEXT: LBB23_2: ## %bb1 +; CHECK-NEXT: xorl %eax, %eax +; CHECK-NEXT: retq %1 = icmp sle i32 %x, %y br i1 %1, label %bb1, label %bb2 bb2: diff --git a/llvm/test/CodeGen/X86/funnel-shift.ll b/llvm/test/CodeGen/X86/funnel-shift.ll --- a/llvm/test/CodeGen/X86/funnel-shift.ll +++ b/llvm/test/CodeGen/X86/funnel-shift.ll @@ -941,17 +941,17 @@ ; X32-SSE2-NEXT: shldl $24, %ebx, %edi ; X32-SSE2-NEXT: xorl %eax, %edi ; X32-SSE2-NEXT: orl %edi, %ecx -; X32-SSE2-NEXT: jne .LBB44_1 -; X32-SSE2-NEXT: # %bb.2: +; X32-SSE2-NEXT: je .LBB44_2 +; X32-SSE2-NEXT: # %bb.1: ; X32-SSE2-NEXT: popl %esi ; X32-SSE2-NEXT: popl %edi ; X32-SSE2-NEXT: popl %ebx -; X32-SSE2-NEXT: jmp _Z3foov # TAILCALL -; X32-SSE2-NEXT: .LBB44_1: +; X32-SSE2-NEXT: retl +; X32-SSE2-NEXT: .LBB44_2: ; X32-SSE2-NEXT: popl %esi ; X32-SSE2-NEXT: popl %edi ; X32-SSE2-NEXT: popl %ebx -; X32-SSE2-NEXT: retl +; X32-SSE2-NEXT: jmp _Z3foov # TAILCALL ; ; X64-AVX2-LABEL: PR45265: ; X64-AVX2: # %bb.0: @@ -964,11 +964,11 @@ ; X64-AVX2-NEXT: movq (%rsi,%rcx,4), %rcx ; X64-AVX2-NEXT: shrdq $40, %rdi, %rcx ; X64-AVX2-NEXT: cmpq %rax, %rcx -; X64-AVX2-NEXT: jne .LBB44_1 -; X64-AVX2-NEXT: # %bb.2: -; X64-AVX2-NEXT: jmp _Z3foov # TAILCALL -; X64-AVX2-NEXT: .LBB44_1: +; X64-AVX2-NEXT: je .LBB44_2 +; X64-AVX2-NEXT: # %bb.1: ; X64-AVX2-NEXT: retq +; X64-AVX2-NEXT: .LBB44_2: +; X64-AVX2-NEXT: jmp _Z3foov # TAILCALL %3 = sext i32 %0 to i64 %4 = getelementptr inbounds %struct.S, %struct.S* %1, i64 %3 %5 = bitcast %struct.S* %4 to i88* diff --git a/llvm/test/CodeGen/X86/indirect-branch-tracking-eh2.ll b/llvm/test/CodeGen/X86/indirect-branch-tracking-eh2.ll --- a/llvm/test/CodeGen/X86/indirect-branch-tracking-eh2.ll +++ b/llvm/test/CodeGen/X86/indirect-branch-tracking-eh2.ll @@ -1,59 +1,183 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mtriple x86_64-unknown-unknown -exception-model sjlj -verify-machineinstrs=0 < %s | FileCheck %s --check-prefix=NUM ; RUN: llc -mtriple x86_64-unknown-unknown -exception-model sjlj -verify-machineinstrs=0 < %s | FileCheck %s --check-prefix=SJLJ -; NUM-COUNT-3: endbr64 - -;SJLJ: main: # @main -;SJLJ-NEXT: .Lfunc_begin0: -;SJLJ-NEXT: # %bb.0: # %entry -;SJLJ-NEXT: endbr64 -;SJLJ-NEXT: pushq %rbp -;SJLJ: callq _Unwind_SjLj_Register -;SJLJ-NEXT: .Ltmp0: -;SJLJ-NEXT: callq _Z3foov -;SJLJ-NEXT: .Ltmp1: -;SJLJ-NEXT: # %bb.1: # %invoke.cont -;SJLJ-NEXT: movl -;SJLJ-NEXT: .LBB0_7: # %return -;SJLJ: callq _Unwind_SjLj_Unregister -;SJLJ: retq -;SJLJ-NEXT: .LBB0_9: -;SJLJ-NEXT: endbr64 -;SJLJ-NEXT: movl -;SJLJ-NEXT: cmpl -;SJLJ-NEXT: jb .LBB0_10 -;SJLJ-NEXT: # %bb.11: -;SJLJ-NEXT: ud2 -;SJLJ-NEXT: .LBB0_10: -;SJLJ-NEXT: leaq .LJTI0_0(%rip), %rcx -;SJLJ-NEXT: jmpq *(%rcx,%rax,8) -;SJLJ-NEXT: .LBB0_2: # %lpad -;SJLJ-NEXT: .Ltmp2: -;SJLJ-NEXT: endbr64 -;SJLJ: jne .LBB0_4 -;SJLJ-NEXT: # %bb.3: # %catch3 -;SJLJ: callq __cxa_begin_catch -;SJLJ: jmp .LBB0_6 -;SJLJ-NEXT: .LBB0_4: # %catch.fallthrough -;SJLJ-NEXT: cmpl -;SJLJ-NEXT: jne .LBB0_8 -;SJLJ-NEXT: # %bb.5: # %catch -;SJLJ: callq __cxa_begin_catch -;SJLJ: cmpb -;SJLJ-NEXT: .LBB0_6: # %return -;SJLJ: callq __cxa_end_catch -;SJLJ-NEXT: jmp .LBB0_7 -;SJLJ-NEXT: .LBB0_8: # %eh.resume -;SJLJ-NEXT: movl -;SJLJ-NEXT: .Lfunc_end0: -;SJLJ: .LJTI0_0: -;SJLJ-NEXT: .quad .LBB0_2 - @_ZTIi = external dso_local constant i8* @_ZTIc = external dso_local constant i8* ; Function Attrs: noinline norecurse optnone uwtable define dso_local i32 @main() #0 personality i8* bitcast (i32 (...)* @__gxx_personality_sj0 to i8*) { +; NUM-LABEL: main: +; NUM: # %bb.0: # %entry +; NUM-NEXT: endbr64 +; NUM-NEXT: pushq %rbp +; NUM-NEXT: movq %rsp, %rbp +; NUM-NEXT: pushq %r15 +; NUM-NEXT: pushq %r14 +; NUM-NEXT: pushq %r13 +; NUM-NEXT: pushq %r12 +; NUM-NEXT: pushq %rbx +; NUM-NEXT: subq $120, %rsp +; NUM-NEXT: movl $0, -44(%rbp) +; NUM-NEXT: movq $__gxx_personality_sj0, -120(%rbp) +; NUM-NEXT: movq $GCC_except_table0, -112(%rbp) +; NUM-NEXT: movq %rbp, -104(%rbp) +; NUM-NEXT: movq %rsp, -88(%rbp) +; NUM-NEXT: movq $.LBB0_9, -96(%rbp) +; NUM-NEXT: movl $1, -144(%rbp) +; NUM-NEXT: leaq -152(%rbp), %rdi +; NUM-NEXT: callq _Unwind_SjLj_Register +; NUM-NEXT: .Ltmp0: +; NUM-NEXT: callq _Z3foov +; NUM-NEXT: .Ltmp1: +; NUM-NEXT: # %bb.1: # %invoke.cont +; NUM-NEXT: movl $1, -44(%rbp) +; NUM-NEXT: .LBB0_7: # %return +; NUM-NEXT: movl -44(%rbp), %ebx +; NUM-NEXT: leaq -152(%rbp), %rdi +; NUM-NEXT: callq _Unwind_SjLj_Unregister +; NUM-NEXT: movl %ebx, %eax +; NUM-NEXT: addq $120, %rsp +; NUM-NEXT: popq %rbx +; NUM-NEXT: popq %r12 +; NUM-NEXT: popq %r13 +; NUM-NEXT: popq %r14 +; NUM-NEXT: popq %r15 +; NUM-NEXT: popq %rbp +; NUM-NEXT: retq +; NUM-NEXT: .LBB0_9: +; NUM-NEXT: endbr64 +; NUM-NEXT: movl -144(%rbp), %eax +; NUM-NEXT: cmpl $1, %eax +; NUM-NEXT: jb .LBB0_10 +; NUM-NEXT: # %bb.11: +; NUM-NEXT: ud2 +; NUM-NEXT: .LBB0_10: +; NUM-NEXT: leaq {{.*}}(%rip), %rcx +; NUM-NEXT: jmpq *(%rcx,%rax,8) +; NUM-NEXT: .LBB0_2: # %lpad +; NUM-NEXT: .Ltmp2: +; NUM-NEXT: endbr64 +; NUM-NEXT: movl -140(%rbp), %ecx +; NUM-NEXT: movl -136(%rbp), %eax +; NUM-NEXT: movq %rcx, -56(%rbp) +; NUM-NEXT: movl %eax, -64(%rbp) +; NUM-NEXT: cmpl $2, %eax +; NUM-NEXT: je .LBB0_3 +; NUM-NEXT: # %bb.4: # %catch.fallthrough +; NUM-NEXT: cmpl $1, %eax +; NUM-NEXT: jne .LBB0_8 +; NUM-NEXT: # %bb.5: # %catch +; NUM-NEXT: movq -56(%rbp), %rdi +; NUM-NEXT: movl $-1, -144(%rbp) +; NUM-NEXT: callq __cxa_begin_catch +; NUM-NEXT: movb (%rax), %al +; NUM-NEXT: movb %al, -45(%rbp) +; NUM-NEXT: xorl %ecx, %ecx +; NUM-NEXT: cmpb $3, %al +; NUM-NEXT: jmp .LBB0_6 +; NUM-NEXT: .LBB0_3: # %catch3 +; NUM-NEXT: movq -56(%rbp), %rdi +; NUM-NEXT: movl $-1, -144(%rbp) +; NUM-NEXT: callq __cxa_begin_catch +; NUM-NEXT: movl (%rax), %eax +; NUM-NEXT: movl %eax, -60(%rbp) +; NUM-NEXT: xorl %ecx, %ecx +; NUM-NEXT: cmpl $5, %eax +; NUM-NEXT: .LBB0_6: # %return +; NUM-NEXT: setne %cl +; NUM-NEXT: movl %ecx, -44(%rbp) +; NUM-NEXT: movl $-1, -144(%rbp) +; NUM-NEXT: callq __cxa_end_catch +; NUM-NEXT: jmp .LBB0_7 +; NUM-NEXT: .LBB0_8: # %eh.resume +; NUM-NEXT: movl $-1, -144(%rbp) +; +; SJLJ-LABEL: main: +; SJLJ: # %bb.0: # %entry +; SJLJ-NEXT: endbr64 +; SJLJ-NEXT: pushq %rbp +; SJLJ-NEXT: movq %rsp, %rbp +; SJLJ-NEXT: pushq %r15 +; SJLJ-NEXT: pushq %r14 +; SJLJ-NEXT: pushq %r13 +; SJLJ-NEXT: pushq %r12 +; SJLJ-NEXT: pushq %rbx +; SJLJ-NEXT: subq $120, %rsp +; SJLJ-NEXT: movl $0, -44(%rbp) +; SJLJ-NEXT: movq $__gxx_personality_sj0, -120(%rbp) +; SJLJ-NEXT: movq $GCC_except_table0, -112(%rbp) +; SJLJ-NEXT: movq %rbp, -104(%rbp) +; SJLJ-NEXT: movq %rsp, -88(%rbp) +; SJLJ-NEXT: movq $.LBB0_9, -96(%rbp) +; SJLJ-NEXT: movl $1, -144(%rbp) +; SJLJ-NEXT: leaq -152(%rbp), %rdi +; SJLJ-NEXT: callq _Unwind_SjLj_Register +; SJLJ-NEXT: .Ltmp0: +; SJLJ-NEXT: callq _Z3foov +; SJLJ-NEXT: .Ltmp1: +; SJLJ-NEXT: # %bb.1: # %invoke.cont +; SJLJ-NEXT: movl $1, -44(%rbp) +; SJLJ-NEXT: .LBB0_7: # %return +; SJLJ-NEXT: movl -44(%rbp), %ebx +; SJLJ-NEXT: leaq -152(%rbp), %rdi +; SJLJ-NEXT: callq _Unwind_SjLj_Unregister +; SJLJ-NEXT: movl %ebx, %eax +; SJLJ-NEXT: addq $120, %rsp +; SJLJ-NEXT: popq %rbx +; SJLJ-NEXT: popq %r12 +; SJLJ-NEXT: popq %r13 +; SJLJ-NEXT: popq %r14 +; SJLJ-NEXT: popq %r15 +; SJLJ-NEXT: popq %rbp +; SJLJ-NEXT: retq +; SJLJ-NEXT: .LBB0_9: +; SJLJ-NEXT: endbr64 +; SJLJ-NEXT: movl -144(%rbp), %eax +; SJLJ-NEXT: cmpl $1, %eax +; SJLJ-NEXT: jb .LBB0_10 +; SJLJ-NEXT: # %bb.11: +; SJLJ-NEXT: ud2 +; SJLJ-NEXT: .LBB0_10: +; SJLJ-NEXT: leaq {{.*}}(%rip), %rcx +; SJLJ-NEXT: jmpq *(%rcx,%rax,8) +; SJLJ-NEXT: .LBB0_2: # %lpad +; SJLJ-NEXT: .Ltmp2: +; SJLJ-NEXT: endbr64 +; SJLJ-NEXT: movl -140(%rbp), %ecx +; SJLJ-NEXT: movl -136(%rbp), %eax +; SJLJ-NEXT: movq %rcx, -56(%rbp) +; SJLJ-NEXT: movl %eax, -64(%rbp) +; SJLJ-NEXT: cmpl $2, %eax +; SJLJ-NEXT: je .LBB0_3 +; SJLJ-NEXT: # %bb.4: # %catch.fallthrough +; SJLJ-NEXT: cmpl $1, %eax +; SJLJ-NEXT: jne .LBB0_8 +; SJLJ-NEXT: # %bb.5: # %catch +; SJLJ-NEXT: movq -56(%rbp), %rdi +; SJLJ-NEXT: movl $-1, -144(%rbp) +; SJLJ-NEXT: callq __cxa_begin_catch +; SJLJ-NEXT: movb (%rax), %al +; SJLJ-NEXT: movb %al, -45(%rbp) +; SJLJ-NEXT: xorl %ecx, %ecx +; SJLJ-NEXT: cmpb $3, %al +; SJLJ-NEXT: jmp .LBB0_6 +; SJLJ-NEXT: .LBB0_3: # %catch3 +; SJLJ-NEXT: movq -56(%rbp), %rdi +; SJLJ-NEXT: movl $-1, -144(%rbp) +; SJLJ-NEXT: callq __cxa_begin_catch +; SJLJ-NEXT: movl (%rax), %eax +; SJLJ-NEXT: movl %eax, -60(%rbp) +; SJLJ-NEXT: xorl %ecx, %ecx +; SJLJ-NEXT: cmpl $5, %eax +; SJLJ-NEXT: .LBB0_6: # %return +; SJLJ-NEXT: setne %cl +; SJLJ-NEXT: movl %ecx, -44(%rbp) +; SJLJ-NEXT: movl $-1, -144(%rbp) +; SJLJ-NEXT: callq __cxa_end_catch +; SJLJ-NEXT: jmp .LBB0_7 +; SJLJ-NEXT: .LBB0_8: # %eh.resume +; SJLJ-NEXT: movl $-1, -144(%rbp) entry: %retval = alloca i32, align 4 %exn.slot = alloca i8* diff --git a/llvm/test/CodeGen/X86/jump_sign.ll b/llvm/test/CodeGen/X86/jump_sign.ll --- a/llvm/test/CodeGen/X86/jump_sign.ll +++ b/llvm/test/CodeGen/X86/jump_sign.ll @@ -139,11 +139,12 @@ ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ecx ; CHECK-NEXT: movl %ecx, %eax ; CHECK-NEXT: subl %edx, %eax -; CHECK-NEXT: jne .LBB8_2 -; CHECK-NEXT: # %bb.1: # %if.then +; CHECK-NEXT: je .LBB8_1 +; CHECK-NEXT: # %bb.2: # %if.else +; CHECK-NEXT: retl +; CHECK-NEXT: .LBB8_1: # %if.then ; CHECK-NEXT: cmpl %ecx, %edx ; CHECK-NEXT: cmovlel %ecx, %eax -; CHECK-NEXT: .LBB8_2: # %if.else ; CHECK-NEXT: retl %cmp = icmp eq i32 %b, %a %sub = sub nsw i32 %a, %b @@ -329,12 +330,13 @@ ; CHECK-NEXT: movl (%edx), %ecx ; CHECK-NEXT: xorl %eax, %eax ; CHECK-NEXT: subl {{[0-9]+}}(%esp), %ecx -; CHECK-NEXT: jl .LBB15_2 -; CHECK-NEXT: # %bb.1: # %if.end +; CHECK-NEXT: jge .LBB15_1 +; CHECK-NEXT: # %bb.2: # %return +; CHECK-NEXT: retl +; CHECK-NEXT: .LBB15_1: # %if.end ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax ; CHECK-NEXT: movl %ecx, (%edx) ; CHECK-NEXT: addl %ecx, %eax -; CHECK-NEXT: .LBB15_2: # %return ; CHECK-NEXT: retl entry: %0 = load i32, i32* %offset, align 8 diff --git a/llvm/test/CodeGen/X86/lsr-negative-stride.ll b/llvm/test/CodeGen/X86/lsr-negative-stride.ll --- a/llvm/test/CodeGen/X86/lsr-negative-stride.ll +++ b/llvm/test/CodeGen/X86/lsr-negative-stride.ll @@ -19,11 +19,7 @@ ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ecx ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %edx ; CHECK-NEXT: cmpl %ecx, %edx -; CHECK-NEXT: jne .LBB0_2 -; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: movl %edx, %eax -; CHECK-NEXT: retl -; CHECK-NEXT: .p2align 4, 0x90 +; CHECK-NEXT: je .LBB0_1 ; CHECK-NEXT: .LBB0_2: # %bb.outer ; CHECK-NEXT: # =>This Loop Header: Depth=1 ; CHECK-NEXT: # Child Loop BB0_3 Depth 2 @@ -49,6 +45,9 @@ ; CHECK-NEXT: jne .LBB0_2 ; CHECK-NEXT: .LBB0_6: # %bb17 ; CHECK-NEXT: retl +; CHECK-NEXT: .LBB0_1: +; CHECK-NEXT: movl %edx, %eax +; CHECK-NEXT: retl entry: %tmp1434 = icmp eq i32 %a, %b ; [#uses=1] br i1 %tmp1434, label %bb17, label %bb.outer diff --git a/llvm/test/CodeGen/X86/machine-cse.ll b/llvm/test/CodeGen/X86/machine-cse.ll --- a/llvm/test/CodeGen/X86/machine-cse.ll +++ b/llvm/test/CodeGen/X86/machine-cse.ll @@ -110,10 +110,11 @@ ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: movl $1, %eax ; CHECK-NEXT: cmpl %esi, %edi -; CHECK-NEXT: ja .LBB2_2 -; CHECK-NEXT: # %bb.1: # %if.end +; CHECK-NEXT: jbe .LBB2_1 +; CHECK-NEXT: # %bb.2: # %return +; CHECK-NEXT: retq +; CHECK-NEXT: .LBB2_1: # %if.end ; CHECK-NEXT: sbbl %eax, %eax -; CHECK-NEXT: .LBB2_2: # %return ; CHECK-NEXT: retq entry: %cmp = icmp ugt i32 %a, %b diff --git a/llvm/test/CodeGen/X86/memcmp-more-load-pairs.ll b/llvm/test/CodeGen/X86/memcmp-more-load-pairs.ll --- a/llvm/test/CodeGen/X86/memcmp-more-load-pairs.ll +++ b/llvm/test/CodeGen/X86/memcmp-more-load-pairs.ll @@ -239,19 +239,19 @@ ; X86-NEXT: rolw $8, %dx ; X86-NEXT: rolw $8, %si ; X86-NEXT: cmpw %si, %dx -; X86-NEXT: jne .LBB9_3 -; X86-NEXT: # %bb.1: # %loadbb1 -; X86-NEXT: movzbl 2(%eax), %eax -; X86-NEXT: movzbl 2(%ecx), %ecx -; X86-NEXT: subl %ecx, %eax -; X86-NEXT: popl %esi -; X86-NEXT: retl -; X86-NEXT: .LBB9_3: # %res_block +; X86-NEXT: je .LBB9_1 +; X86-NEXT: # %bb.3: # %res_block ; X86-NEXT: setae %al ; X86-NEXT: movzbl %al, %eax ; X86-NEXT: leal -1(%eax,%eax), %eax ; X86-NEXT: popl %esi ; X86-NEXT: retl +; X86-NEXT: .LBB9_1: # %loadbb1 +; X86-NEXT: movzbl 2(%eax), %eax +; X86-NEXT: movzbl 2(%ecx), %ecx +; X86-NEXT: subl %ecx, %eax +; X86-NEXT: popl %esi +; X86-NEXT: retl ; ; X64-LABEL: length3: ; X64: # %bb.0: @@ -260,16 +260,16 @@ ; X64-NEXT: rolw $8, %ax ; X64-NEXT: rolw $8, %cx ; X64-NEXT: cmpw %cx, %ax -; X64-NEXT: jne .LBB9_3 -; X64-NEXT: # %bb.1: # %loadbb1 -; X64-NEXT: movzbl 2(%rdi), %eax -; X64-NEXT: movzbl 2(%rsi), %ecx -; X64-NEXT: subl %ecx, %eax -; X64-NEXT: retq -; X64-NEXT: .LBB9_3: # %res_block +; X64-NEXT: je .LBB9_1 +; X64-NEXT: # %bb.3: # %res_block ; X64-NEXT: setae %al ; X64-NEXT: movzbl %al, %eax ; X64-NEXT: leal -1(%rax,%rax), %eax +; X64-NEXT: retq +; X64-NEXT: .LBB9_1: # %loadbb1 +; X64-NEXT: movzbl 2(%rdi), %eax +; X64-NEXT: movzbl 2(%rsi), %ecx +; X64-NEXT: subl %ecx, %eax ; X64-NEXT: retq %m = tail call i32 @memcmp(i8* %X, i8* %Y, i64 3) nounwind ret i32 %m @@ -454,19 +454,19 @@ ; X86-NEXT: bswapl %edx ; X86-NEXT: bswapl %esi ; X86-NEXT: cmpl %esi, %edx -; X86-NEXT: jne .LBB16_3 -; X86-NEXT: # %bb.1: # %loadbb1 -; X86-NEXT: movzbl 4(%eax), %eax -; X86-NEXT: movzbl 4(%ecx), %ecx -; X86-NEXT: subl %ecx, %eax -; X86-NEXT: popl %esi -; X86-NEXT: retl -; X86-NEXT: .LBB16_3: # %res_block +; X86-NEXT: je .LBB16_1 +; X86-NEXT: # %bb.3: # %res_block ; X86-NEXT: setae %al ; X86-NEXT: movzbl %al, %eax ; X86-NEXT: leal -1(%eax,%eax), %eax ; X86-NEXT: popl %esi ; X86-NEXT: retl +; X86-NEXT: .LBB16_1: # %loadbb1 +; X86-NEXT: movzbl 4(%eax), %eax +; X86-NEXT: movzbl 4(%ecx), %ecx +; X86-NEXT: subl %ecx, %eax +; X86-NEXT: popl %esi +; X86-NEXT: retl ; ; X64-LABEL: length5: ; X64: # %bb.0: @@ -475,16 +475,16 @@ ; X64-NEXT: bswapl %eax ; X64-NEXT: bswapl %ecx ; X64-NEXT: cmpl %ecx, %eax -; X64-NEXT: jne .LBB16_3 -; X64-NEXT: # %bb.1: # %loadbb1 -; X64-NEXT: movzbl 4(%rdi), %eax -; X64-NEXT: movzbl 4(%rsi), %ecx -; X64-NEXT: subl %ecx, %eax -; X64-NEXT: retq -; X64-NEXT: .LBB16_3: # %res_block +; X64-NEXT: je .LBB16_1 +; X64-NEXT: # %bb.3: # %res_block ; X64-NEXT: setae %al ; X64-NEXT: movzbl %al, %eax ; X64-NEXT: leal -1(%rax,%rax), %eax +; X64-NEXT: retq +; X64-NEXT: .LBB16_1: # %loadbb1 +; X64-NEXT: movzbl 4(%rdi), %eax +; X64-NEXT: movzbl 4(%rsi), %ecx +; X64-NEXT: subl %ecx, %eax ; X64-NEXT: retq %m = tail call i32 @memcmp(i8* %X, i8* %Y, i64 5) nounwind ret i32 %m @@ -530,16 +530,16 @@ ; X86-NEXT: bswapl %edx ; X86-NEXT: bswapl %esi ; X86-NEXT: cmpl %esi, %edx -; X86-NEXT: jne .LBB18_3 -; X86-NEXT: # %bb.1: # %loadbb1 -; X86-NEXT: movzbl 4(%eax), %eax -; X86-NEXT: movzbl 4(%ecx), %ecx -; X86-NEXT: subl %ecx, %eax -; X86-NEXT: jmp .LBB18_2 -; X86-NEXT: .LBB18_3: # %res_block +; X86-NEXT: je .LBB18_1 +; X86-NEXT: # %bb.3: # %res_block ; X86-NEXT: setae %al ; X86-NEXT: movzbl %al, %eax ; X86-NEXT: leal -1(%eax,%eax), %eax +; X86-NEXT: jmp .LBB18_2 +; X86-NEXT: .LBB18_1: # %loadbb1 +; X86-NEXT: movzbl 4(%eax), %eax +; X86-NEXT: movzbl 4(%ecx), %ecx +; X86-NEXT: subl %ecx, %eax ; X86-NEXT: .LBB18_2: # %endblock ; X86-NEXT: shrl $31, %eax ; X86-NEXT: # kill: def $al killed $al killed $eax @@ -553,20 +553,20 @@ ; X64-NEXT: bswapl %eax ; X64-NEXT: bswapl %ecx ; X64-NEXT: cmpl %ecx, %eax -; X64-NEXT: jne .LBB18_3 -; X64-NEXT: # %bb.1: # %loadbb1 -; X64-NEXT: movzbl 4(%rdi), %eax -; X64-NEXT: movzbl 4(%rsi), %ecx -; X64-NEXT: subl %ecx, %eax -; X64-NEXT: shrl $31, %eax -; X64-NEXT: # kill: def $al killed $al killed $eax -; X64-NEXT: retq -; X64-NEXT: .LBB18_3: # %res_block +; X64-NEXT: je .LBB18_1 +; X64-NEXT: # %bb.3: # %res_block ; X64-NEXT: setae %al ; X64-NEXT: movzbl %al, %eax ; X64-NEXT: leal -1(%rax,%rax), %eax ; X64-NEXT: shrl $31, %eax ; X64-NEXT: # kill: def $al killed $al killed $eax +; X64-NEXT: retq +; X64-NEXT: .LBB18_1: # %loadbb1 +; X64-NEXT: movzbl 4(%rdi), %eax +; X64-NEXT: movzbl 4(%rsi), %ecx +; X64-NEXT: subl %ecx, %eax +; X64-NEXT: shrl $31, %eax +; X64-NEXT: # kill: def $al killed $al killed $eax ; X64-NEXT: retq %m = tail call i32 @memcmp(i8* %X, i8* %Y, i64 5) nounwind %c = icmp slt i32 %m, 0 @@ -577,53 +577,56 @@ ; X86-LABEL: length7: ; X86: # %bb.0: ; X86-NEXT: pushl %esi -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl {{[0-9]+}}(%esp), %edx ; X86-NEXT: movl {{[0-9]+}}(%esp), %esi -; X86-NEXT: movl (%esi), %ecx -; X86-NEXT: movl (%eax), %edx +; X86-NEXT: movl (%esi), %eax +; X86-NEXT: movl (%edx), %ecx +; X86-NEXT: bswapl %eax ; X86-NEXT: bswapl %ecx -; X86-NEXT: bswapl %edx -; X86-NEXT: cmpl %edx, %ecx -; X86-NEXT: jne .LBB19_2 +; X86-NEXT: cmpl %ecx, %eax +; X86-NEXT: jne .LBB19_4 ; X86-NEXT: # %bb.1: # %loadbb1 -; X86-NEXT: movl 3(%esi), %ecx -; X86-NEXT: movl 3(%eax), %edx +; X86-NEXT: movl 3(%esi), %eax +; X86-NEXT: movl 3(%edx), %ecx +; X86-NEXT: bswapl %eax ; X86-NEXT: bswapl %ecx -; X86-NEXT: bswapl %edx -; X86-NEXT: xorl %eax, %eax -; X86-NEXT: cmpl %edx, %ecx -; X86-NEXT: je .LBB19_3 -; X86-NEXT: .LBB19_2: # %res_block +; X86-NEXT: cmpl %ecx, %eax +; X86-NEXT: je .LBB19_2 +; X86-NEXT: .LBB19_4: # %res_block +; X86-NEXT: xorl %edx, %edx +; X86-NEXT: cmpl %ecx, %eax +; X86-NEXT: setae %dl +; X86-NEXT: leal -1(%edx,%edx), %eax +; X86-NEXT: popl %esi +; X86-NEXT: retl +; X86-NEXT: .LBB19_2: ; X86-NEXT: xorl %eax, %eax -; X86-NEXT: cmpl %edx, %ecx -; X86-NEXT: setae %al -; X86-NEXT: leal -1(%eax,%eax), %eax -; X86-NEXT: .LBB19_3: # %endblock ; X86-NEXT: popl %esi ; X86-NEXT: retl ; ; X64-LABEL: length7: ; X64: # %bb.0: -; X64-NEXT: movl (%rdi), %ecx -; X64-NEXT: movl (%rsi), %edx +; X64-NEXT: movl (%rdi), %eax +; X64-NEXT: movl (%rsi), %ecx +; X64-NEXT: bswapl %eax ; X64-NEXT: bswapl %ecx -; X64-NEXT: bswapl %edx -; X64-NEXT: cmpl %edx, %ecx -; X64-NEXT: jne .LBB19_2 +; X64-NEXT: cmpl %ecx, %eax +; X64-NEXT: jne .LBB19_4 ; X64-NEXT: # %bb.1: # %loadbb1 -; X64-NEXT: movl 3(%rdi), %ecx -; X64-NEXT: movl 3(%rsi), %edx +; X64-NEXT: movl 3(%rdi), %eax +; X64-NEXT: movl 3(%rsi), %ecx +; X64-NEXT: bswapl %eax ; X64-NEXT: bswapl %ecx -; X64-NEXT: bswapl %edx -; X64-NEXT: xorl %eax, %eax -; X64-NEXT: cmpl %edx, %ecx -; X64-NEXT: je .LBB19_3 -; X64-NEXT: .LBB19_2: # %res_block +; X64-NEXT: cmpl %ecx, %eax +; X64-NEXT: je .LBB19_2 +; X64-NEXT: .LBB19_4: # %res_block +; X64-NEXT: xorl %edx, %edx +; X64-NEXT: cmpl %ecx, %eax +; X64-NEXT: setae %dl +; X64-NEXT: leal -1(%rdx,%rdx), %eax +; X64-NEXT: retq +; X64-NEXT: .LBB19_2: ; X64-NEXT: xorl %eax, %eax -; X64-NEXT: cmpl %edx, %ecx -; X64-NEXT: setae %al -; X64-NEXT: leal -1(%rax,%rax), %eax -; X64-NEXT: .LBB19_3: # %endblock ; X64-NEXT: retq %m = tail call i32 @memcmp(i8* %X, i8* %Y, i64 7) nounwind ret i32 %m @@ -660,55 +663,60 @@ ; X86-LABEL: length7_lt: ; X86: # %bb.0: ; X86-NEXT: pushl %esi -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl {{[0-9]+}}(%esp), %edx ; X86-NEXT: movl {{[0-9]+}}(%esp), %esi -; X86-NEXT: movl (%esi), %ecx -; X86-NEXT: movl (%eax), %edx +; X86-NEXT: movl (%esi), %eax +; X86-NEXT: movl (%edx), %ecx +; X86-NEXT: bswapl %eax ; X86-NEXT: bswapl %ecx -; X86-NEXT: bswapl %edx -; X86-NEXT: cmpl %edx, %ecx -; X86-NEXT: jne .LBB21_2 +; X86-NEXT: cmpl %ecx, %eax +; X86-NEXT: jne .LBB21_4 ; X86-NEXT: # %bb.1: # %loadbb1 -; X86-NEXT: movl 3(%esi), %ecx -; X86-NEXT: movl 3(%eax), %edx +; X86-NEXT: movl 3(%esi), %eax +; X86-NEXT: movl 3(%edx), %ecx +; X86-NEXT: bswapl %eax ; X86-NEXT: bswapl %ecx -; X86-NEXT: bswapl %edx -; X86-NEXT: xorl %eax, %eax -; X86-NEXT: cmpl %edx, %ecx -; X86-NEXT: je .LBB21_3 -; X86-NEXT: .LBB21_2: # %res_block -; X86-NEXT: xorl %eax, %eax -; X86-NEXT: cmpl %edx, %ecx -; X86-NEXT: setae %al -; X86-NEXT: leal -1(%eax,%eax), %eax +; X86-NEXT: cmpl %ecx, %eax +; X86-NEXT: je .LBB21_2 +; X86-NEXT: .LBB21_4: # %res_block +; X86-NEXT: xorl %edx, %edx +; X86-NEXT: cmpl %ecx, %eax +; X86-NEXT: setae %dl +; X86-NEXT: leal -1(%edx,%edx), %eax ; X86-NEXT: .LBB21_3: # %endblock ; X86-NEXT: shrl $31, %eax ; X86-NEXT: # kill: def $al killed $al killed $eax ; X86-NEXT: popl %esi ; X86-NEXT: retl +; X86-NEXT: .LBB21_2: +; X86-NEXT: xorl %eax, %eax +; X86-NEXT: jmp .LBB21_3 ; ; X64-LABEL: length7_lt: ; X64: # %bb.0: -; X64-NEXT: movl (%rdi), %ecx -; X64-NEXT: movl (%rsi), %edx +; X64-NEXT: movl (%rdi), %eax +; X64-NEXT: movl (%rsi), %ecx +; X64-NEXT: bswapl %eax ; X64-NEXT: bswapl %ecx -; X64-NEXT: bswapl %edx -; X64-NEXT: cmpl %edx, %ecx +; X64-NEXT: cmpl %ecx, %eax ; X64-NEXT: jne .LBB21_2 ; X64-NEXT: # %bb.1: # %loadbb1 -; X64-NEXT: movl 3(%rdi), %ecx -; X64-NEXT: movl 3(%rsi), %edx +; X64-NEXT: movl 3(%rdi), %eax +; X64-NEXT: movl 3(%rsi), %ecx +; X64-NEXT: bswapl %eax ; X64-NEXT: bswapl %ecx -; X64-NEXT: bswapl %edx -; X64-NEXT: xorl %eax, %eax -; X64-NEXT: cmpl %edx, %ecx +; X64-NEXT: cmpl %ecx, %eax ; X64-NEXT: je .LBB21_3 ; X64-NEXT: .LBB21_2: # %res_block +; X64-NEXT: xorl %edx, %edx +; X64-NEXT: cmpl %ecx, %eax +; X64-NEXT: setae %dl +; X64-NEXT: leal -1(%rdx,%rdx), %eax +; X64-NEXT: shrl $31, %eax +; X64-NEXT: # kill: def $al killed $al killed $eax +; X64-NEXT: retq +; X64-NEXT: .LBB21_3: ; X64-NEXT: xorl %eax, %eax -; X64-NEXT: cmpl %edx, %ecx -; X64-NEXT: setae %al -; X64-NEXT: leal -1(%rax,%rax), %eax -; X64-NEXT: .LBB21_3: # %endblock ; X64-NEXT: shrl $31, %eax ; X64-NEXT: # kill: def $al killed $al killed $eax ; X64-NEXT: retq @@ -721,28 +729,30 @@ ; X86-LABEL: length8: ; X86: # %bb.0: ; X86-NEXT: pushl %esi -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl {{[0-9]+}}(%esp), %edx ; X86-NEXT: movl {{[0-9]+}}(%esp), %esi -; X86-NEXT: movl (%esi), %ecx -; X86-NEXT: movl (%eax), %edx +; X86-NEXT: movl (%esi), %eax +; X86-NEXT: movl (%edx), %ecx +; X86-NEXT: bswapl %eax ; X86-NEXT: bswapl %ecx -; X86-NEXT: bswapl %edx -; X86-NEXT: cmpl %edx, %ecx -; X86-NEXT: jne .LBB22_2 +; X86-NEXT: cmpl %ecx, %eax +; X86-NEXT: jne .LBB22_4 ; X86-NEXT: # %bb.1: # %loadbb1 -; X86-NEXT: movl 4(%esi), %ecx -; X86-NEXT: movl 4(%eax), %edx +; X86-NEXT: movl 4(%esi), %eax +; X86-NEXT: movl 4(%edx), %ecx +; X86-NEXT: bswapl %eax ; X86-NEXT: bswapl %ecx -; X86-NEXT: bswapl %edx -; X86-NEXT: xorl %eax, %eax -; X86-NEXT: cmpl %edx, %ecx -; X86-NEXT: je .LBB22_3 -; X86-NEXT: .LBB22_2: # %res_block +; X86-NEXT: cmpl %ecx, %eax +; X86-NEXT: je .LBB22_2 +; X86-NEXT: .LBB22_4: # %res_block +; X86-NEXT: xorl %edx, %edx +; X86-NEXT: cmpl %ecx, %eax +; X86-NEXT: setae %dl +; X86-NEXT: leal -1(%edx,%edx), %eax +; X86-NEXT: popl %esi +; X86-NEXT: retl +; X86-NEXT: .LBB22_2: ; X86-NEXT: xorl %eax, %eax -; X86-NEXT: cmpl %edx, %ecx -; X86-NEXT: setae %al -; X86-NEXT: leal -1(%eax,%eax), %eax -; X86-NEXT: .LBB22_3: # %endblock ; X86-NEXT: popl %esi ; X86-NEXT: retl ; @@ -944,60 +954,63 @@ ; X86-LABEL: length12: ; X86: # %bb.0: ; X86-NEXT: pushl %esi -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl {{[0-9]+}}(%esp), %edx ; X86-NEXT: movl {{[0-9]+}}(%esp), %esi -; X86-NEXT: movl (%esi), %ecx -; X86-NEXT: movl (%eax), %edx +; X86-NEXT: movl (%esi), %eax +; X86-NEXT: movl (%edx), %ecx +; X86-NEXT: bswapl %eax ; X86-NEXT: bswapl %ecx -; X86-NEXT: bswapl %edx -; X86-NEXT: cmpl %edx, %ecx -; X86-NEXT: jne .LBB29_3 +; X86-NEXT: cmpl %ecx, %eax +; X86-NEXT: jne .LBB29_5 ; X86-NEXT: # %bb.1: # %loadbb1 -; X86-NEXT: movl 4(%esi), %ecx -; X86-NEXT: movl 4(%eax), %edx +; X86-NEXT: movl 4(%esi), %eax +; X86-NEXT: movl 4(%edx), %ecx +; X86-NEXT: bswapl %eax ; X86-NEXT: bswapl %ecx -; X86-NEXT: bswapl %edx -; X86-NEXT: cmpl %edx, %ecx -; X86-NEXT: jne .LBB29_3 +; X86-NEXT: cmpl %ecx, %eax +; X86-NEXT: jne .LBB29_5 ; X86-NEXT: # %bb.2: # %loadbb2 -; X86-NEXT: movl 8(%esi), %ecx -; X86-NEXT: movl 8(%eax), %edx +; X86-NEXT: movl 8(%esi), %eax +; X86-NEXT: movl 8(%edx), %ecx +; X86-NEXT: bswapl %eax ; X86-NEXT: bswapl %ecx -; X86-NEXT: bswapl %edx -; X86-NEXT: xorl %eax, %eax -; X86-NEXT: cmpl %edx, %ecx -; X86-NEXT: je .LBB29_4 -; X86-NEXT: .LBB29_3: # %res_block +; X86-NEXT: cmpl %ecx, %eax +; X86-NEXT: je .LBB29_3 +; X86-NEXT: .LBB29_5: # %res_block +; X86-NEXT: xorl %edx, %edx +; X86-NEXT: cmpl %ecx, %eax +; X86-NEXT: setae %dl +; X86-NEXT: leal -1(%edx,%edx), %eax +; X86-NEXT: popl %esi +; X86-NEXT: retl +; X86-NEXT: .LBB29_3: ; X86-NEXT: xorl %eax, %eax -; X86-NEXT: cmpl %edx, %ecx -; X86-NEXT: setae %al -; X86-NEXT: leal -1(%eax,%eax), %eax -; X86-NEXT: .LBB29_4: # %endblock ; X86-NEXT: popl %esi ; X86-NEXT: retl ; ; X64-LABEL: length12: ; X64: # %bb.0: -; X64-NEXT: movq (%rdi), %rcx -; X64-NEXT: movq (%rsi), %rdx +; X64-NEXT: movq (%rdi), %rax +; X64-NEXT: movq (%rsi), %rcx +; X64-NEXT: bswapq %rax ; X64-NEXT: bswapq %rcx -; X64-NEXT: bswapq %rdx -; X64-NEXT: cmpq %rdx, %rcx -; X64-NEXT: jne .LBB29_2 +; X64-NEXT: cmpq %rcx, %rax +; X64-NEXT: jne .LBB29_4 ; X64-NEXT: # %bb.1: # %loadbb1 -; X64-NEXT: movl 8(%rdi), %ecx -; X64-NEXT: movl 8(%rsi), %edx +; X64-NEXT: movl 8(%rdi), %eax +; X64-NEXT: movl 8(%rsi), %ecx +; X64-NEXT: bswapl %eax ; X64-NEXT: bswapl %ecx -; X64-NEXT: bswapl %edx -; X64-NEXT: xorl %eax, %eax -; X64-NEXT: cmpq %rdx, %rcx -; X64-NEXT: je .LBB29_3 -; X64-NEXT: .LBB29_2: # %res_block +; X64-NEXT: cmpq %rcx, %rax +; X64-NEXT: je .LBB29_2 +; X64-NEXT: .LBB29_4: # %res_block +; X64-NEXT: xorl %edx, %edx +; X64-NEXT: cmpq %rcx, %rax +; X64-NEXT: setae %dl +; X64-NEXT: leal -1(%rdx,%rdx), %eax +; X64-NEXT: retq +; X64-NEXT: .LBB29_2: ; X64-NEXT: xorl %eax, %eax -; X64-NEXT: cmpq %rdx, %rcx -; X64-NEXT: setae %al -; X64-NEXT: leal -1(%rax,%rax), %eax -; X64-NEXT: .LBB29_3: # %endblock ; X64-NEXT: retq %m = tail call i32 @memcmp(i8* %X, i8* %Y, i64 12) nounwind ret i32 %m @@ -1116,67 +1129,70 @@ ; X86-LABEL: length16: ; X86: # %bb.0: ; X86-NEXT: pushl %esi -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl {{[0-9]+}}(%esp), %edx ; X86-NEXT: movl {{[0-9]+}}(%esp), %esi -; X86-NEXT: movl (%esi), %ecx -; X86-NEXT: movl (%eax), %edx +; X86-NEXT: movl (%esi), %eax +; X86-NEXT: movl (%edx), %ecx +; X86-NEXT: bswapl %eax ; X86-NEXT: bswapl %ecx -; X86-NEXT: bswapl %edx -; X86-NEXT: cmpl %edx, %ecx -; X86-NEXT: jne .LBB33_4 +; X86-NEXT: cmpl %ecx, %eax +; X86-NEXT: jne .LBB33_6 ; X86-NEXT: # %bb.1: # %loadbb1 -; X86-NEXT: movl 4(%esi), %ecx -; X86-NEXT: movl 4(%eax), %edx +; X86-NEXT: movl 4(%esi), %eax +; X86-NEXT: movl 4(%edx), %ecx +; X86-NEXT: bswapl %eax ; X86-NEXT: bswapl %ecx -; X86-NEXT: bswapl %edx -; X86-NEXT: cmpl %edx, %ecx -; X86-NEXT: jne .LBB33_4 +; X86-NEXT: cmpl %ecx, %eax +; X86-NEXT: jne .LBB33_6 ; X86-NEXT: # %bb.2: # %loadbb2 -; X86-NEXT: movl 8(%esi), %ecx -; X86-NEXT: movl 8(%eax), %edx +; X86-NEXT: movl 8(%esi), %eax +; X86-NEXT: movl 8(%edx), %ecx +; X86-NEXT: bswapl %eax ; X86-NEXT: bswapl %ecx -; X86-NEXT: bswapl %edx -; X86-NEXT: cmpl %edx, %ecx -; X86-NEXT: jne .LBB33_4 +; X86-NEXT: cmpl %ecx, %eax +; X86-NEXT: jne .LBB33_6 ; X86-NEXT: # %bb.3: # %loadbb3 -; X86-NEXT: movl 12(%esi), %ecx -; X86-NEXT: movl 12(%eax), %edx +; X86-NEXT: movl 12(%esi), %eax +; X86-NEXT: movl 12(%edx), %ecx +; X86-NEXT: bswapl %eax ; X86-NEXT: bswapl %ecx -; X86-NEXT: bswapl %edx -; X86-NEXT: xorl %eax, %eax -; X86-NEXT: cmpl %edx, %ecx -; X86-NEXT: je .LBB33_5 -; X86-NEXT: .LBB33_4: # %res_block +; X86-NEXT: cmpl %ecx, %eax +; X86-NEXT: je .LBB33_4 +; X86-NEXT: .LBB33_6: # %res_block +; X86-NEXT: xorl %edx, %edx +; X86-NEXT: cmpl %ecx, %eax +; X86-NEXT: setae %dl +; X86-NEXT: leal -1(%edx,%edx), %eax +; X86-NEXT: popl %esi +; X86-NEXT: retl +; X86-NEXT: .LBB33_4: ; X86-NEXT: xorl %eax, %eax -; X86-NEXT: cmpl %edx, %ecx -; X86-NEXT: setae %al -; X86-NEXT: leal -1(%eax,%eax), %eax -; X86-NEXT: .LBB33_5: # %endblock ; X86-NEXT: popl %esi ; X86-NEXT: retl ; ; X64-LABEL: length16: ; X64: # %bb.0: -; X64-NEXT: movq (%rdi), %rcx -; X64-NEXT: movq (%rsi), %rdx +; X64-NEXT: movq (%rdi), %rax +; X64-NEXT: movq (%rsi), %rcx +; X64-NEXT: bswapq %rax ; X64-NEXT: bswapq %rcx -; X64-NEXT: bswapq %rdx -; X64-NEXT: cmpq %rdx, %rcx -; X64-NEXT: jne .LBB33_2 +; X64-NEXT: cmpq %rcx, %rax +; X64-NEXT: jne .LBB33_4 ; X64-NEXT: # %bb.1: # %loadbb1 -; X64-NEXT: movq 8(%rdi), %rcx -; X64-NEXT: movq 8(%rsi), %rdx +; X64-NEXT: movq 8(%rdi), %rax +; X64-NEXT: movq 8(%rsi), %rcx +; X64-NEXT: bswapq %rax ; X64-NEXT: bswapq %rcx -; X64-NEXT: bswapq %rdx -; X64-NEXT: xorl %eax, %eax -; X64-NEXT: cmpq %rdx, %rcx -; X64-NEXT: je .LBB33_3 -; X64-NEXT: .LBB33_2: # %res_block +; X64-NEXT: cmpq %rcx, %rax +; X64-NEXT: je .LBB33_2 +; X64-NEXT: .LBB33_4: # %res_block +; X64-NEXT: xorl %edx, %edx +; X64-NEXT: cmpq %rcx, %rax +; X64-NEXT: setae %dl +; X64-NEXT: leal -1(%rdx,%rdx), %eax +; X64-NEXT: retq +; X64-NEXT: .LBB33_2: ; X64-NEXT: xorl %eax, %eax -; X64-NEXT: cmpq %rdx, %rcx -; X64-NEXT: setae %al -; X64-NEXT: leal -1(%rax,%rax), %eax -; X64-NEXT: .LBB33_3: # %endblock ; X64-NEXT: retq %m = tail call i32 @memcmp(i8* %X, i8* %Y, i64 16) nounwind ret i32 %m @@ -1291,69 +1307,74 @@ ; X86-LABEL: length16_lt: ; X86: # %bb.0: ; X86-NEXT: pushl %esi -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl {{[0-9]+}}(%esp), %edx ; X86-NEXT: movl {{[0-9]+}}(%esp), %esi -; X86-NEXT: movl (%esi), %ecx -; X86-NEXT: movl (%eax), %edx +; X86-NEXT: movl (%esi), %eax +; X86-NEXT: movl (%edx), %ecx +; X86-NEXT: bswapl %eax ; X86-NEXT: bswapl %ecx -; X86-NEXT: bswapl %edx -; X86-NEXT: cmpl %edx, %ecx -; X86-NEXT: jne .LBB35_4 +; X86-NEXT: cmpl %ecx, %eax +; X86-NEXT: jne .LBB35_6 ; X86-NEXT: # %bb.1: # %loadbb1 -; X86-NEXT: movl 4(%esi), %ecx -; X86-NEXT: movl 4(%eax), %edx +; X86-NEXT: movl 4(%esi), %eax +; X86-NEXT: movl 4(%edx), %ecx +; X86-NEXT: bswapl %eax ; X86-NEXT: bswapl %ecx -; X86-NEXT: bswapl %edx -; X86-NEXT: cmpl %edx, %ecx -; X86-NEXT: jne .LBB35_4 +; X86-NEXT: cmpl %ecx, %eax +; X86-NEXT: jne .LBB35_6 ; X86-NEXT: # %bb.2: # %loadbb2 -; X86-NEXT: movl 8(%esi), %ecx -; X86-NEXT: movl 8(%eax), %edx +; X86-NEXT: movl 8(%esi), %eax +; X86-NEXT: movl 8(%edx), %ecx +; X86-NEXT: bswapl %eax ; X86-NEXT: bswapl %ecx -; X86-NEXT: bswapl %edx -; X86-NEXT: cmpl %edx, %ecx -; X86-NEXT: jne .LBB35_4 +; X86-NEXT: cmpl %ecx, %eax +; X86-NEXT: jne .LBB35_6 ; X86-NEXT: # %bb.3: # %loadbb3 -; X86-NEXT: movl 12(%esi), %ecx -; X86-NEXT: movl 12(%eax), %edx +; X86-NEXT: movl 12(%esi), %eax +; X86-NEXT: movl 12(%edx), %ecx +; X86-NEXT: bswapl %eax ; X86-NEXT: bswapl %ecx -; X86-NEXT: bswapl %edx -; X86-NEXT: xorl %eax, %eax -; X86-NEXT: cmpl %edx, %ecx -; X86-NEXT: je .LBB35_5 -; X86-NEXT: .LBB35_4: # %res_block -; X86-NEXT: xorl %eax, %eax -; X86-NEXT: cmpl %edx, %ecx -; X86-NEXT: setae %al -; X86-NEXT: leal -1(%eax,%eax), %eax +; X86-NEXT: cmpl %ecx, %eax +; X86-NEXT: je .LBB35_4 +; X86-NEXT: .LBB35_6: # %res_block +; X86-NEXT: xorl %edx, %edx +; X86-NEXT: cmpl %ecx, %eax +; X86-NEXT: setae %dl +; X86-NEXT: leal -1(%edx,%edx), %eax ; X86-NEXT: .LBB35_5: # %endblock ; X86-NEXT: shrl $31, %eax ; X86-NEXT: # kill: def $al killed $al killed $eax ; X86-NEXT: popl %esi ; X86-NEXT: retl +; X86-NEXT: .LBB35_4: +; X86-NEXT: xorl %eax, %eax +; X86-NEXT: jmp .LBB35_5 ; ; X64-LABEL: length16_lt: ; X64: # %bb.0: -; X64-NEXT: movq (%rdi), %rcx -; X64-NEXT: movq (%rsi), %rdx +; X64-NEXT: movq (%rdi), %rax +; X64-NEXT: movq (%rsi), %rcx +; X64-NEXT: bswapq %rax ; X64-NEXT: bswapq %rcx -; X64-NEXT: bswapq %rdx -; X64-NEXT: cmpq %rdx, %rcx +; X64-NEXT: cmpq %rcx, %rax ; X64-NEXT: jne .LBB35_2 ; X64-NEXT: # %bb.1: # %loadbb1 -; X64-NEXT: movq 8(%rdi), %rcx -; X64-NEXT: movq 8(%rsi), %rdx +; X64-NEXT: movq 8(%rdi), %rax +; X64-NEXT: movq 8(%rsi), %rcx +; X64-NEXT: bswapq %rax ; X64-NEXT: bswapq %rcx -; X64-NEXT: bswapq %rdx -; X64-NEXT: xorl %eax, %eax -; X64-NEXT: cmpq %rdx, %rcx +; X64-NEXT: cmpq %rcx, %rax ; X64-NEXT: je .LBB35_3 ; X64-NEXT: .LBB35_2: # %res_block +; X64-NEXT: xorl %edx, %edx +; X64-NEXT: cmpq %rcx, %rax +; X64-NEXT: setae %dl +; X64-NEXT: leal -1(%rdx,%rdx), %eax +; X64-NEXT: shrl $31, %eax +; X64-NEXT: # kill: def $al killed $al killed $eax +; X64-NEXT: retq +; X64-NEXT: .LBB35_3: ; X64-NEXT: xorl %eax, %eax -; X64-NEXT: cmpq %rdx, %rcx -; X64-NEXT: setae %al -; X64-NEXT: leal -1(%rax,%rax), %eax -; X64-NEXT: .LBB35_3: # %endblock ; X64-NEXT: shrl $31, %eax ; X64-NEXT: # kill: def $al killed $al killed $eax ; X64-NEXT: retq @@ -1373,39 +1394,41 @@ ; X86-NEXT: bswapl %eax ; X86-NEXT: bswapl %ecx ; X86-NEXT: cmpl %ecx, %eax -; X86-NEXT: jne .LBB36_4 +; X86-NEXT: jne .LBB36_6 ; X86-NEXT: # %bb.1: # %loadbb1 ; X86-NEXT: movl 4(%esi), %eax ; X86-NEXT: movl 4(%edx), %ecx ; X86-NEXT: bswapl %eax ; X86-NEXT: bswapl %ecx ; X86-NEXT: cmpl %ecx, %eax -; X86-NEXT: jne .LBB36_4 +; X86-NEXT: jne .LBB36_6 ; X86-NEXT: # %bb.2: # %loadbb2 ; X86-NEXT: movl 8(%esi), %eax ; X86-NEXT: movl 8(%edx), %ecx ; X86-NEXT: bswapl %eax ; X86-NEXT: bswapl %ecx ; X86-NEXT: cmpl %ecx, %eax -; X86-NEXT: jne .LBB36_4 +; X86-NEXT: jne .LBB36_6 ; X86-NEXT: # %bb.3: # %loadbb3 ; X86-NEXT: movl 12(%esi), %eax ; X86-NEXT: movl 12(%edx), %ecx ; X86-NEXT: bswapl %eax ; X86-NEXT: bswapl %ecx -; X86-NEXT: xorl %edx, %edx ; X86-NEXT: cmpl %ecx, %eax -; X86-NEXT: je .LBB36_5 -; X86-NEXT: .LBB36_4: # %res_block +; X86-NEXT: je .LBB36_4 +; X86-NEXT: .LBB36_6: # %res_block ; X86-NEXT: xorl %edx, %edx ; X86-NEXT: cmpl %ecx, %eax ; X86-NEXT: setae %dl -; X86-NEXT: leal -1(%edx,%edx), %edx +; X86-NEXT: leal -1(%edx,%edx), %eax ; X86-NEXT: .LBB36_5: # %endblock -; X86-NEXT: testl %edx, %edx +; X86-NEXT: testl %eax, %eax ; X86-NEXT: setg %al ; X86-NEXT: popl %esi ; X86-NEXT: retl +; X86-NEXT: .LBB36_4: +; X86-NEXT: xorl %eax, %eax +; X86-NEXT: jmp .LBB36_5 ; ; X64-LABEL: length16_gt: ; X64: # %bb.0: @@ -1414,24 +1437,26 @@ ; X64-NEXT: bswapq %rax ; X64-NEXT: bswapq %rcx ; X64-NEXT: cmpq %rcx, %rax -; X64-NEXT: jne .LBB36_2 +; X64-NEXT: jne .LBB36_4 ; X64-NEXT: # %bb.1: # %loadbb1 ; X64-NEXT: movq 8(%rdi), %rax ; X64-NEXT: movq 8(%rsi), %rcx ; X64-NEXT: bswapq %rax ; X64-NEXT: bswapq %rcx -; X64-NEXT: xorl %edx, %edx ; X64-NEXT: cmpq %rcx, %rax -; X64-NEXT: je .LBB36_3 -; X64-NEXT: .LBB36_2: # %res_block +; X64-NEXT: je .LBB36_2 +; X64-NEXT: .LBB36_4: # %res_block ; X64-NEXT: xorl %edx, %edx ; X64-NEXT: cmpq %rcx, %rax ; X64-NEXT: setae %dl -; X64-NEXT: leal -1(%rdx,%rdx), %edx +; X64-NEXT: leal -1(%rdx,%rdx), %eax ; X64-NEXT: .LBB36_3: # %endblock -; X64-NEXT: testl %edx, %edx +; X64-NEXT: testl %eax, %eax ; X64-NEXT: setg %al ; X64-NEXT: retq +; X64-NEXT: .LBB36_2: +; X64-NEXT: xorl %eax, %eax +; X64-NEXT: jmp .LBB36_3 %call = tail call i32 @memcmp(i8* %x, i8* %y, i64 16) nounwind %cmp = icmp sgt i32 %call, 0 ret i1 %cmp @@ -1549,33 +1574,34 @@ ; ; X64-LABEL: length24: ; X64: # %bb.0: -; X64-NEXT: movq (%rdi), %rcx -; X64-NEXT: movq (%rsi), %rdx +; X64-NEXT: movq (%rdi), %rax +; X64-NEXT: movq (%rsi), %rcx +; X64-NEXT: bswapq %rax ; X64-NEXT: bswapq %rcx -; X64-NEXT: bswapq %rdx -; X64-NEXT: cmpq %rdx, %rcx -; X64-NEXT: jne .LBB38_3 +; X64-NEXT: cmpq %rcx, %rax +; X64-NEXT: jne .LBB38_5 ; X64-NEXT: # %bb.1: # %loadbb1 -; X64-NEXT: movq 8(%rdi), %rcx -; X64-NEXT: movq 8(%rsi), %rdx +; X64-NEXT: movq 8(%rdi), %rax +; X64-NEXT: movq 8(%rsi), %rcx +; X64-NEXT: bswapq %rax ; X64-NEXT: bswapq %rcx -; X64-NEXT: bswapq %rdx -; X64-NEXT: cmpq %rdx, %rcx -; X64-NEXT: jne .LBB38_3 +; X64-NEXT: cmpq %rcx, %rax +; X64-NEXT: jne .LBB38_5 ; X64-NEXT: # %bb.2: # %loadbb2 -; X64-NEXT: movq 16(%rdi), %rcx -; X64-NEXT: movq 16(%rsi), %rdx +; X64-NEXT: movq 16(%rdi), %rax +; X64-NEXT: movq 16(%rsi), %rcx +; X64-NEXT: bswapq %rax ; X64-NEXT: bswapq %rcx -; X64-NEXT: bswapq %rdx -; X64-NEXT: xorl %eax, %eax -; X64-NEXT: cmpq %rdx, %rcx -; X64-NEXT: je .LBB38_4 -; X64-NEXT: .LBB38_3: # %res_block +; X64-NEXT: cmpq %rcx, %rax +; X64-NEXT: je .LBB38_3 +; X64-NEXT: .LBB38_5: # %res_block +; X64-NEXT: xorl %edx, %edx +; X64-NEXT: cmpq %rcx, %rax +; X64-NEXT: setae %dl +; X64-NEXT: leal -1(%rdx,%rdx), %eax +; X64-NEXT: retq +; X64-NEXT: .LBB38_3: ; X64-NEXT: xorl %eax, %eax -; X64-NEXT: cmpq %rdx, %rcx -; X64-NEXT: setae %al -; X64-NEXT: leal -1(%rax,%rax), %eax -; X64-NEXT: .LBB38_4: # %endblock ; X64-NEXT: retq %m = tail call i32 @memcmp(i8* %X, i8* %Y, i64 24) nounwind ret i32 %m @@ -1708,33 +1734,36 @@ ; ; X64-LABEL: length24_lt: ; X64: # %bb.0: -; X64-NEXT: movq (%rdi), %rcx -; X64-NEXT: movq (%rsi), %rdx +; X64-NEXT: movq (%rdi), %rax +; X64-NEXT: movq (%rsi), %rcx +; X64-NEXT: bswapq %rax ; X64-NEXT: bswapq %rcx -; X64-NEXT: bswapq %rdx -; X64-NEXT: cmpq %rdx, %rcx +; X64-NEXT: cmpq %rcx, %rax ; X64-NEXT: jne .LBB40_3 ; X64-NEXT: # %bb.1: # %loadbb1 -; X64-NEXT: movq 8(%rdi), %rcx -; X64-NEXT: movq 8(%rsi), %rdx +; X64-NEXT: movq 8(%rdi), %rax +; X64-NEXT: movq 8(%rsi), %rcx +; X64-NEXT: bswapq %rax ; X64-NEXT: bswapq %rcx -; X64-NEXT: bswapq %rdx -; X64-NEXT: cmpq %rdx, %rcx +; X64-NEXT: cmpq %rcx, %rax ; X64-NEXT: jne .LBB40_3 ; X64-NEXT: # %bb.2: # %loadbb2 -; X64-NEXT: movq 16(%rdi), %rcx -; X64-NEXT: movq 16(%rsi), %rdx +; X64-NEXT: movq 16(%rdi), %rax +; X64-NEXT: movq 16(%rsi), %rcx +; X64-NEXT: bswapq %rax ; X64-NEXT: bswapq %rcx -; X64-NEXT: bswapq %rdx -; X64-NEXT: xorl %eax, %eax -; X64-NEXT: cmpq %rdx, %rcx +; X64-NEXT: cmpq %rcx, %rax ; X64-NEXT: je .LBB40_4 ; X64-NEXT: .LBB40_3: # %res_block +; X64-NEXT: xorl %edx, %edx +; X64-NEXT: cmpq %rcx, %rax +; X64-NEXT: setae %dl +; X64-NEXT: leal -1(%rdx,%rdx), %eax +; X64-NEXT: shrl $31, %eax +; X64-NEXT: # kill: def $al killed $al killed $eax +; X64-NEXT: retq +; X64-NEXT: .LBB40_4: ; X64-NEXT: xorl %eax, %eax -; X64-NEXT: cmpq %rdx, %rcx -; X64-NEXT: setae %al -; X64-NEXT: leal -1(%rax,%rax), %eax -; X64-NEXT: .LBB40_4: # %endblock ; X64-NEXT: shrl $31, %eax ; X64-NEXT: # kill: def $al killed $al killed $eax ; X64-NEXT: retq @@ -1763,31 +1792,33 @@ ; X64-NEXT: bswapq %rax ; X64-NEXT: bswapq %rcx ; X64-NEXT: cmpq %rcx, %rax -; X64-NEXT: jne .LBB41_3 +; X64-NEXT: jne .LBB41_5 ; X64-NEXT: # %bb.1: # %loadbb1 ; X64-NEXT: movq 8(%rdi), %rax ; X64-NEXT: movq 8(%rsi), %rcx ; X64-NEXT: bswapq %rax ; X64-NEXT: bswapq %rcx ; X64-NEXT: cmpq %rcx, %rax -; X64-NEXT: jne .LBB41_3 +; X64-NEXT: jne .LBB41_5 ; X64-NEXT: # %bb.2: # %loadbb2 ; X64-NEXT: movq 16(%rdi), %rax ; X64-NEXT: movq 16(%rsi), %rcx ; X64-NEXT: bswapq %rax ; X64-NEXT: bswapq %rcx -; X64-NEXT: xorl %edx, %edx ; X64-NEXT: cmpq %rcx, %rax -; X64-NEXT: je .LBB41_4 -; X64-NEXT: .LBB41_3: # %res_block +; X64-NEXT: je .LBB41_3 +; X64-NEXT: .LBB41_5: # %res_block ; X64-NEXT: xorl %edx, %edx ; X64-NEXT: cmpq %rcx, %rax ; X64-NEXT: setae %dl -; X64-NEXT: leal -1(%rdx,%rdx), %edx +; X64-NEXT: leal -1(%rdx,%rdx), %eax ; X64-NEXT: .LBB41_4: # %endblock -; X64-NEXT: testl %edx, %edx +; X64-NEXT: testl %eax, %eax ; X64-NEXT: setg %al ; X64-NEXT: retq +; X64-NEXT: .LBB41_3: +; X64-NEXT: xorl %eax, %eax +; X64-NEXT: jmp .LBB41_4 %call = tail call i32 @memcmp(i8* %x, i8* %y, i64 24) nounwind %cmp = icmp sgt i32 %call, 0 ret i1 %cmp @@ -1907,40 +1938,41 @@ ; ; X64-LABEL: length31: ; X64: # %bb.0: -; X64-NEXT: movq (%rdi), %rcx -; X64-NEXT: movq (%rsi), %rdx +; X64-NEXT: movq (%rdi), %rax +; X64-NEXT: movq (%rsi), %rcx +; X64-NEXT: bswapq %rax ; X64-NEXT: bswapq %rcx -; X64-NEXT: bswapq %rdx -; X64-NEXT: cmpq %rdx, %rcx -; X64-NEXT: jne .LBB43_4 +; X64-NEXT: cmpq %rcx, %rax +; X64-NEXT: jne .LBB43_6 ; X64-NEXT: # %bb.1: # %loadbb1 -; X64-NEXT: movq 8(%rdi), %rcx -; X64-NEXT: movq 8(%rsi), %rdx +; X64-NEXT: movq 8(%rdi), %rax +; X64-NEXT: movq 8(%rsi), %rcx +; X64-NEXT: bswapq %rax ; X64-NEXT: bswapq %rcx -; X64-NEXT: bswapq %rdx -; X64-NEXT: cmpq %rdx, %rcx -; X64-NEXT: jne .LBB43_4 +; X64-NEXT: cmpq %rcx, %rax +; X64-NEXT: jne .LBB43_6 ; X64-NEXT: # %bb.2: # %loadbb2 -; X64-NEXT: movq 16(%rdi), %rcx -; X64-NEXT: movq 16(%rsi), %rdx +; X64-NEXT: movq 16(%rdi), %rax +; X64-NEXT: movq 16(%rsi), %rcx +; X64-NEXT: bswapq %rax ; X64-NEXT: bswapq %rcx -; X64-NEXT: bswapq %rdx -; X64-NEXT: cmpq %rdx, %rcx -; X64-NEXT: jne .LBB43_4 +; X64-NEXT: cmpq %rcx, %rax +; X64-NEXT: jne .LBB43_6 ; X64-NEXT: # %bb.3: # %loadbb3 -; X64-NEXT: movq 23(%rdi), %rcx -; X64-NEXT: movq 23(%rsi), %rdx +; X64-NEXT: movq 23(%rdi), %rax +; X64-NEXT: movq 23(%rsi), %rcx +; X64-NEXT: bswapq %rax ; X64-NEXT: bswapq %rcx -; X64-NEXT: bswapq %rdx -; X64-NEXT: xorl %eax, %eax -; X64-NEXT: cmpq %rdx, %rcx -; X64-NEXT: je .LBB43_5 -; X64-NEXT: .LBB43_4: # %res_block +; X64-NEXT: cmpq %rcx, %rax +; X64-NEXT: je .LBB43_4 +; X64-NEXT: .LBB43_6: # %res_block +; X64-NEXT: xorl %edx, %edx +; X64-NEXT: cmpq %rcx, %rax +; X64-NEXT: setae %dl +; X64-NEXT: leal -1(%rdx,%rdx), %eax +; X64-NEXT: retq +; X64-NEXT: .LBB43_4: ; X64-NEXT: xorl %eax, %eax -; X64-NEXT: cmpq %rdx, %rcx -; X64-NEXT: setae %al -; X64-NEXT: leal -1(%rax,%rax), %eax -; X64-NEXT: .LBB43_5: # %endblock ; X64-NEXT: retq %m = tail call i32 @memcmp(i8* %X, i8* %Y, i64 31) nounwind ret i32 %m @@ -2072,40 +2104,43 @@ ; ; X64-LABEL: length31_lt: ; X64: # %bb.0: -; X64-NEXT: movq (%rdi), %rcx -; X64-NEXT: movq (%rsi), %rdx +; X64-NEXT: movq (%rdi), %rax +; X64-NEXT: movq (%rsi), %rcx +; X64-NEXT: bswapq %rax ; X64-NEXT: bswapq %rcx -; X64-NEXT: bswapq %rdx -; X64-NEXT: cmpq %rdx, %rcx +; X64-NEXT: cmpq %rcx, %rax ; X64-NEXT: jne .LBB45_4 ; X64-NEXT: # %bb.1: # %loadbb1 -; X64-NEXT: movq 8(%rdi), %rcx -; X64-NEXT: movq 8(%rsi), %rdx +; X64-NEXT: movq 8(%rdi), %rax +; X64-NEXT: movq 8(%rsi), %rcx +; X64-NEXT: bswapq %rax ; X64-NEXT: bswapq %rcx -; X64-NEXT: bswapq %rdx -; X64-NEXT: cmpq %rdx, %rcx +; X64-NEXT: cmpq %rcx, %rax ; X64-NEXT: jne .LBB45_4 ; X64-NEXT: # %bb.2: # %loadbb2 -; X64-NEXT: movq 16(%rdi), %rcx -; X64-NEXT: movq 16(%rsi), %rdx +; X64-NEXT: movq 16(%rdi), %rax +; X64-NEXT: movq 16(%rsi), %rcx +; X64-NEXT: bswapq %rax ; X64-NEXT: bswapq %rcx -; X64-NEXT: bswapq %rdx -; X64-NEXT: cmpq %rdx, %rcx +; X64-NEXT: cmpq %rcx, %rax ; X64-NEXT: jne .LBB45_4 ; X64-NEXT: # %bb.3: # %loadbb3 -; X64-NEXT: movq 23(%rdi), %rcx -; X64-NEXT: movq 23(%rsi), %rdx +; X64-NEXT: movq 23(%rdi), %rax +; X64-NEXT: movq 23(%rsi), %rcx +; X64-NEXT: bswapq %rax ; X64-NEXT: bswapq %rcx -; X64-NEXT: bswapq %rdx -; X64-NEXT: xorl %eax, %eax -; X64-NEXT: cmpq %rdx, %rcx +; X64-NEXT: cmpq %rcx, %rax ; X64-NEXT: je .LBB45_5 ; X64-NEXT: .LBB45_4: # %res_block +; X64-NEXT: xorl %edx, %edx +; X64-NEXT: cmpq %rcx, %rax +; X64-NEXT: setae %dl +; X64-NEXT: leal -1(%rdx,%rdx), %eax +; X64-NEXT: shrl $31, %eax +; X64-NEXT: # kill: def $al killed $al killed $eax +; X64-NEXT: retq +; X64-NEXT: .LBB45_5: ; X64-NEXT: xorl %eax, %eax -; X64-NEXT: cmpq %rdx, %rcx -; X64-NEXT: setae %al -; X64-NEXT: leal -1(%rax,%rax), %eax -; X64-NEXT: .LBB45_5: # %endblock ; X64-NEXT: shrl $31, %eax ; X64-NEXT: # kill: def $al killed $al killed $eax ; X64-NEXT: retq @@ -2134,38 +2169,40 @@ ; X64-NEXT: bswapq %rax ; X64-NEXT: bswapq %rcx ; X64-NEXT: cmpq %rcx, %rax -; X64-NEXT: jne .LBB46_4 +; X64-NEXT: jne .LBB46_6 ; X64-NEXT: # %bb.1: # %loadbb1 ; X64-NEXT: movq 8(%rdi), %rax ; X64-NEXT: movq 8(%rsi), %rcx ; X64-NEXT: bswapq %rax ; X64-NEXT: bswapq %rcx ; X64-NEXT: cmpq %rcx, %rax -; X64-NEXT: jne .LBB46_4 +; X64-NEXT: jne .LBB46_6 ; X64-NEXT: # %bb.2: # %loadbb2 ; X64-NEXT: movq 16(%rdi), %rax ; X64-NEXT: movq 16(%rsi), %rcx ; X64-NEXT: bswapq %rax ; X64-NEXT: bswapq %rcx ; X64-NEXT: cmpq %rcx, %rax -; X64-NEXT: jne .LBB46_4 +; X64-NEXT: jne .LBB46_6 ; X64-NEXT: # %bb.3: # %loadbb3 ; X64-NEXT: movq 23(%rdi), %rax ; X64-NEXT: movq 23(%rsi), %rcx ; X64-NEXT: bswapq %rax ; X64-NEXT: bswapq %rcx -; X64-NEXT: xorl %edx, %edx ; X64-NEXT: cmpq %rcx, %rax -; X64-NEXT: je .LBB46_5 -; X64-NEXT: .LBB46_4: # %res_block +; X64-NEXT: je .LBB46_4 +; X64-NEXT: .LBB46_6: # %res_block ; X64-NEXT: xorl %edx, %edx ; X64-NEXT: cmpq %rcx, %rax ; X64-NEXT: setae %dl -; X64-NEXT: leal -1(%rdx,%rdx), %edx +; X64-NEXT: leal -1(%rdx,%rdx), %eax ; X64-NEXT: .LBB46_5: # %endblock -; X64-NEXT: testl %edx, %edx +; X64-NEXT: testl %eax, %eax ; X64-NEXT: setg %al ; X64-NEXT: retq +; X64-NEXT: .LBB46_4: +; X64-NEXT: xorl %eax, %eax +; X64-NEXT: jmp .LBB46_5 %call = tail call i32 @memcmp(i8* %x, i8* %y, i64 31) nounwind %cmp = icmp sgt i32 %call, 0 ret i1 %cmp @@ -2396,40 +2433,41 @@ ; ; X64-LABEL: length32: ; X64: # %bb.0: -; X64-NEXT: movq (%rdi), %rcx -; X64-NEXT: movq (%rsi), %rdx +; X64-NEXT: movq (%rdi), %rax +; X64-NEXT: movq (%rsi), %rcx +; X64-NEXT: bswapq %rax ; X64-NEXT: bswapq %rcx -; X64-NEXT: bswapq %rdx -; X64-NEXT: cmpq %rdx, %rcx -; X64-NEXT: jne .LBB49_4 +; X64-NEXT: cmpq %rcx, %rax +; X64-NEXT: jne .LBB49_6 ; X64-NEXT: # %bb.1: # %loadbb1 -; X64-NEXT: movq 8(%rdi), %rcx -; X64-NEXT: movq 8(%rsi), %rdx +; X64-NEXT: movq 8(%rdi), %rax +; X64-NEXT: movq 8(%rsi), %rcx +; X64-NEXT: bswapq %rax ; X64-NEXT: bswapq %rcx -; X64-NEXT: bswapq %rdx -; X64-NEXT: cmpq %rdx, %rcx -; X64-NEXT: jne .LBB49_4 +; X64-NEXT: cmpq %rcx, %rax +; X64-NEXT: jne .LBB49_6 ; X64-NEXT: # %bb.2: # %loadbb2 -; X64-NEXT: movq 16(%rdi), %rcx -; X64-NEXT: movq 16(%rsi), %rdx +; X64-NEXT: movq 16(%rdi), %rax +; X64-NEXT: movq 16(%rsi), %rcx +; X64-NEXT: bswapq %rax ; X64-NEXT: bswapq %rcx -; X64-NEXT: bswapq %rdx -; X64-NEXT: cmpq %rdx, %rcx -; X64-NEXT: jne .LBB49_4 +; X64-NEXT: cmpq %rcx, %rax +; X64-NEXT: jne .LBB49_6 ; X64-NEXT: # %bb.3: # %loadbb3 -; X64-NEXT: movq 24(%rdi), %rcx -; X64-NEXT: movq 24(%rsi), %rdx +; X64-NEXT: movq 24(%rdi), %rax +; X64-NEXT: movq 24(%rsi), %rcx +; X64-NEXT: bswapq %rax ; X64-NEXT: bswapq %rcx -; X64-NEXT: bswapq %rdx -; X64-NEXT: xorl %eax, %eax -; X64-NEXT: cmpq %rdx, %rcx -; X64-NEXT: je .LBB49_5 -; X64-NEXT: .LBB49_4: # %res_block +; X64-NEXT: cmpq %rcx, %rax +; X64-NEXT: je .LBB49_4 +; X64-NEXT: .LBB49_6: # %res_block +; X64-NEXT: xorl %edx, %edx +; X64-NEXT: cmpq %rcx, %rax +; X64-NEXT: setae %dl +; X64-NEXT: leal -1(%rdx,%rdx), %eax +; X64-NEXT: retq +; X64-NEXT: .LBB49_4: ; X64-NEXT: xorl %eax, %eax -; X64-NEXT: cmpq %rdx, %rcx -; X64-NEXT: setae %al -; X64-NEXT: leal -1(%rax,%rax), %eax -; X64-NEXT: .LBB49_5: # %endblock ; X64-NEXT: retq %m = tail call i32 @memcmp(i8* %X, i8* %Y, i64 32) nounwind ret i32 %m @@ -2576,40 +2614,43 @@ ; ; X64-LABEL: length32_lt: ; X64: # %bb.0: -; X64-NEXT: movq (%rdi), %rcx -; X64-NEXT: movq (%rsi), %rdx +; X64-NEXT: movq (%rdi), %rax +; X64-NEXT: movq (%rsi), %rcx +; X64-NEXT: bswapq %rax ; X64-NEXT: bswapq %rcx -; X64-NEXT: bswapq %rdx -; X64-NEXT: cmpq %rdx, %rcx +; X64-NEXT: cmpq %rcx, %rax ; X64-NEXT: jne .LBB51_4 ; X64-NEXT: # %bb.1: # %loadbb1 -; X64-NEXT: movq 8(%rdi), %rcx -; X64-NEXT: movq 8(%rsi), %rdx +; X64-NEXT: movq 8(%rdi), %rax +; X64-NEXT: movq 8(%rsi), %rcx +; X64-NEXT: bswapq %rax ; X64-NEXT: bswapq %rcx -; X64-NEXT: bswapq %rdx -; X64-NEXT: cmpq %rdx, %rcx +; X64-NEXT: cmpq %rcx, %rax ; X64-NEXT: jne .LBB51_4 ; X64-NEXT: # %bb.2: # %loadbb2 -; X64-NEXT: movq 16(%rdi), %rcx -; X64-NEXT: movq 16(%rsi), %rdx +; X64-NEXT: movq 16(%rdi), %rax +; X64-NEXT: movq 16(%rsi), %rcx +; X64-NEXT: bswapq %rax ; X64-NEXT: bswapq %rcx -; X64-NEXT: bswapq %rdx -; X64-NEXT: cmpq %rdx, %rcx +; X64-NEXT: cmpq %rcx, %rax ; X64-NEXT: jne .LBB51_4 ; X64-NEXT: # %bb.3: # %loadbb3 -; X64-NEXT: movq 24(%rdi), %rcx -; X64-NEXT: movq 24(%rsi), %rdx +; X64-NEXT: movq 24(%rdi), %rax +; X64-NEXT: movq 24(%rsi), %rcx +; X64-NEXT: bswapq %rax ; X64-NEXT: bswapq %rcx -; X64-NEXT: bswapq %rdx -; X64-NEXT: xorl %eax, %eax -; X64-NEXT: cmpq %rdx, %rcx +; X64-NEXT: cmpq %rcx, %rax ; X64-NEXT: je .LBB51_5 ; X64-NEXT: .LBB51_4: # %res_block +; X64-NEXT: xorl %edx, %edx +; X64-NEXT: cmpq %rcx, %rax +; X64-NEXT: setae %dl +; X64-NEXT: leal -1(%rdx,%rdx), %eax +; X64-NEXT: shrl $31, %eax +; X64-NEXT: # kill: def $al killed $al killed $eax +; X64-NEXT: retq +; X64-NEXT: .LBB51_5: ; X64-NEXT: xorl %eax, %eax -; X64-NEXT: cmpq %rdx, %rcx -; X64-NEXT: setae %al -; X64-NEXT: leal -1(%rax,%rax), %eax -; X64-NEXT: .LBB51_5: # %endblock ; X64-NEXT: shrl $31, %eax ; X64-NEXT: # kill: def $al killed $al killed $eax ; X64-NEXT: retq @@ -2638,38 +2679,40 @@ ; X64-NEXT: bswapq %rax ; X64-NEXT: bswapq %rcx ; X64-NEXT: cmpq %rcx, %rax -; X64-NEXT: jne .LBB52_4 +; X64-NEXT: jne .LBB52_6 ; X64-NEXT: # %bb.1: # %loadbb1 ; X64-NEXT: movq 8(%rdi), %rax ; X64-NEXT: movq 8(%rsi), %rcx ; X64-NEXT: bswapq %rax ; X64-NEXT: bswapq %rcx ; X64-NEXT: cmpq %rcx, %rax -; X64-NEXT: jne .LBB52_4 +; X64-NEXT: jne .LBB52_6 ; X64-NEXT: # %bb.2: # %loadbb2 ; X64-NEXT: movq 16(%rdi), %rax ; X64-NEXT: movq 16(%rsi), %rcx ; X64-NEXT: bswapq %rax ; X64-NEXT: bswapq %rcx ; X64-NEXT: cmpq %rcx, %rax -; X64-NEXT: jne .LBB52_4 +; X64-NEXT: jne .LBB52_6 ; X64-NEXT: # %bb.3: # %loadbb3 ; X64-NEXT: movq 24(%rdi), %rax ; X64-NEXT: movq 24(%rsi), %rcx ; X64-NEXT: bswapq %rax ; X64-NEXT: bswapq %rcx -; X64-NEXT: xorl %edx, %edx ; X64-NEXT: cmpq %rcx, %rax -; X64-NEXT: je .LBB52_5 -; X64-NEXT: .LBB52_4: # %res_block +; X64-NEXT: je .LBB52_4 +; X64-NEXT: .LBB52_6: # %res_block ; X64-NEXT: xorl %edx, %edx ; X64-NEXT: cmpq %rcx, %rax ; X64-NEXT: setae %dl -; X64-NEXT: leal -1(%rdx,%rdx), %edx +; X64-NEXT: leal -1(%rdx,%rdx), %eax ; X64-NEXT: .LBB52_5: # %endblock -; X64-NEXT: testl %edx, %edx +; X64-NEXT: testl %eax, %eax ; X64-NEXT: setg %al ; X64-NEXT: retq +; X64-NEXT: .LBB52_4: +; X64-NEXT: xorl %eax, %eax +; X64-NEXT: jmp .LBB52_5 %call = tail call i32 @memcmp(i8* %x, i8* %y, i64 32) nounwind %cmp = icmp sgt i32 %call, 0 ret i1 %cmp diff --git a/llvm/test/CodeGen/X86/memcmp-optsize.ll b/llvm/test/CodeGen/X86/memcmp-optsize.ll --- a/llvm/test/CodeGen/X86/memcmp-optsize.ll +++ b/llvm/test/CodeGen/X86/memcmp-optsize.ll @@ -120,16 +120,16 @@ ; X86-NEXT: rolw $8, %dx ; X86-NEXT: rolw $8, %si ; X86-NEXT: cmpw %si, %dx -; X86-NEXT: jne .LBB4_3 -; X86-NEXT: # %bb.1: # %loadbb1 -; X86-NEXT: movzbl 2(%eax), %eax -; X86-NEXT: movzbl 2(%ecx), %ecx -; X86-NEXT: subl %ecx, %eax -; X86-NEXT: jmp .LBB4_2 -; X86-NEXT: .LBB4_3: # %res_block +; X86-NEXT: je .LBB4_1 +; X86-NEXT: # %bb.3: # %res_block ; X86-NEXT: setae %al ; X86-NEXT: movzbl %al, %eax ; X86-NEXT: leal -1(%eax,%eax), %eax +; X86-NEXT: jmp .LBB4_2 +; X86-NEXT: .LBB4_1: # %loadbb1 +; X86-NEXT: movzbl 2(%eax), %eax +; X86-NEXT: movzbl 2(%ecx), %ecx +; X86-NEXT: subl %ecx, %eax ; X86-NEXT: .LBB4_2: # %endblock ; X86-NEXT: popl %esi ; X86-NEXT: retl @@ -141,16 +141,16 @@ ; X64-NEXT: rolw $8, %ax ; X64-NEXT: rolw $8, %cx ; X64-NEXT: cmpw %cx, %ax -; X64-NEXT: jne .LBB4_3 -; X64-NEXT: # %bb.1: # %loadbb1 -; X64-NEXT: movzbl 2(%rdi), %eax -; X64-NEXT: movzbl 2(%rsi), %ecx -; X64-NEXT: subl %ecx, %eax -; X64-NEXT: retq -; X64-NEXT: .LBB4_3: # %res_block +; X64-NEXT: je .LBB4_1 +; X64-NEXT: # %bb.3: # %res_block ; X64-NEXT: setae %al ; X64-NEXT: movzbl %al, %eax ; X64-NEXT: leal -1(%rax,%rax), %eax +; X64-NEXT: retq +; X64-NEXT: .LBB4_1: # %loadbb1 +; X64-NEXT: movzbl 2(%rdi), %eax +; X64-NEXT: movzbl 2(%rsi), %ecx +; X64-NEXT: subl %ecx, %eax ; X64-NEXT: retq %m = tail call i32 @memcmp(i8* %X, i8* %Y, i64 3) nounwind ret i32 %m @@ -265,16 +265,16 @@ ; X86-NEXT: bswapl %edx ; X86-NEXT: bswapl %esi ; X86-NEXT: cmpl %esi, %edx -; X86-NEXT: jne .LBB9_3 -; X86-NEXT: # %bb.1: # %loadbb1 -; X86-NEXT: movzbl 4(%eax), %eax -; X86-NEXT: movzbl 4(%ecx), %ecx -; X86-NEXT: subl %ecx, %eax -; X86-NEXT: jmp .LBB9_2 -; X86-NEXT: .LBB9_3: # %res_block +; X86-NEXT: je .LBB9_1 +; X86-NEXT: # %bb.3: # %res_block ; X86-NEXT: setae %al ; X86-NEXT: movzbl %al, %eax ; X86-NEXT: leal -1(%eax,%eax), %eax +; X86-NEXT: jmp .LBB9_2 +; X86-NEXT: .LBB9_1: # %loadbb1 +; X86-NEXT: movzbl 4(%eax), %eax +; X86-NEXT: movzbl 4(%ecx), %ecx +; X86-NEXT: subl %ecx, %eax ; X86-NEXT: .LBB9_2: # %endblock ; X86-NEXT: popl %esi ; X86-NEXT: retl @@ -286,16 +286,16 @@ ; X64-NEXT: bswapl %eax ; X64-NEXT: bswapl %ecx ; X64-NEXT: cmpl %ecx, %eax -; X64-NEXT: jne .LBB9_3 -; X64-NEXT: # %bb.1: # %loadbb1 -; X64-NEXT: movzbl 4(%rdi), %eax -; X64-NEXT: movzbl 4(%rsi), %ecx -; X64-NEXT: subl %ecx, %eax -; X64-NEXT: retq -; X64-NEXT: .LBB9_3: # %res_block +; X64-NEXT: je .LBB9_1 +; X64-NEXT: # %bb.3: # %res_block ; X64-NEXT: setae %al ; X64-NEXT: movzbl %al, %eax ; X64-NEXT: leal -1(%rax,%rax), %eax +; X64-NEXT: retq +; X64-NEXT: .LBB9_1: # %loadbb1 +; X64-NEXT: movzbl 4(%rdi), %eax +; X64-NEXT: movzbl 4(%rsi), %ecx +; X64-NEXT: subl %ecx, %eax ; X64-NEXT: retq %m = tail call i32 @memcmp(i8* %X, i8* %Y, i64 5) nounwind ret i32 %m @@ -334,30 +334,32 @@ ; X86-LABEL: length8: ; X86: # %bb.0: ; X86-NEXT: pushl %esi -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl {{[0-9]+}}(%esp), %edx ; X86-NEXT: movl {{[0-9]+}}(%esp), %esi -; X86-NEXT: movl (%esi), %ecx -; X86-NEXT: movl (%eax), %edx +; X86-NEXT: movl (%esi), %eax +; X86-NEXT: movl (%edx), %ecx +; X86-NEXT: bswapl %eax ; X86-NEXT: bswapl %ecx -; X86-NEXT: bswapl %edx -; X86-NEXT: cmpl %edx, %ecx -; X86-NEXT: jne .LBB11_2 +; X86-NEXT: cmpl %ecx, %eax +; X86-NEXT: jne .LBB11_4 ; X86-NEXT: # %bb.1: # %loadbb1 -; X86-NEXT: movl 4(%esi), %ecx -; X86-NEXT: movl 4(%eax), %edx +; X86-NEXT: movl 4(%esi), %eax +; X86-NEXT: movl 4(%edx), %ecx +; X86-NEXT: bswapl %eax ; X86-NEXT: bswapl %ecx -; X86-NEXT: bswapl %edx -; X86-NEXT: xorl %eax, %eax -; X86-NEXT: cmpl %edx, %ecx -; X86-NEXT: je .LBB11_3 -; X86-NEXT: .LBB11_2: # %res_block -; X86-NEXT: xorl %eax, %eax -; X86-NEXT: cmpl %edx, %ecx -; X86-NEXT: setae %al -; X86-NEXT: leal -1(%eax,%eax), %eax +; X86-NEXT: cmpl %ecx, %eax +; X86-NEXT: je .LBB11_2 +; X86-NEXT: .LBB11_4: # %res_block +; X86-NEXT: xorl %edx, %edx +; X86-NEXT: cmpl %ecx, %eax +; X86-NEXT: setae %dl +; X86-NEXT: leal -1(%edx,%edx), %eax ; X86-NEXT: .LBB11_3: # %endblock ; X86-NEXT: popl %esi ; X86-NEXT: retl +; X86-NEXT: .LBB11_2: +; X86-NEXT: xorl %eax, %eax +; X86-NEXT: jmp .LBB11_3 ; ; X64-LABEL: length8: ; X64: # %bb.0: @@ -461,26 +463,27 @@ ; ; X64-LABEL: length12: ; X64: # %bb.0: -; X64-NEXT: movq (%rdi), %rcx -; X64-NEXT: movq (%rsi), %rdx +; X64-NEXT: movq (%rdi), %rax +; X64-NEXT: movq (%rsi), %rcx +; X64-NEXT: bswapq %rax ; X64-NEXT: bswapq %rcx -; X64-NEXT: bswapq %rdx -; X64-NEXT: cmpq %rdx, %rcx -; X64-NEXT: jne .LBB15_2 +; X64-NEXT: cmpq %rcx, %rax +; X64-NEXT: jne .LBB15_4 ; X64-NEXT: # %bb.1: # %loadbb1 -; X64-NEXT: movl 8(%rdi), %ecx -; X64-NEXT: movl 8(%rsi), %edx +; X64-NEXT: movl 8(%rdi), %eax +; X64-NEXT: movl 8(%rsi), %ecx +; X64-NEXT: bswapl %eax ; X64-NEXT: bswapl %ecx -; X64-NEXT: bswapl %edx -; X64-NEXT: xorl %eax, %eax -; X64-NEXT: cmpq %rdx, %rcx -; X64-NEXT: je .LBB15_3 -; X64-NEXT: .LBB15_2: # %res_block +; X64-NEXT: cmpq %rcx, %rax +; X64-NEXT: je .LBB15_2 +; X64-NEXT: .LBB15_4: # %res_block +; X64-NEXT: xorl %edx, %edx +; X64-NEXT: cmpq %rcx, %rax +; X64-NEXT: setae %dl +; X64-NEXT: leal -1(%rdx,%rdx), %eax +; X64-NEXT: retq +; X64-NEXT: .LBB15_2: ; X64-NEXT: xorl %eax, %eax -; X64-NEXT: cmpq %rdx, %rcx -; X64-NEXT: setae %al -; X64-NEXT: leal -1(%rax,%rax), %eax -; X64-NEXT: .LBB15_3: # %endblock ; X64-NEXT: retq %m = tail call i32 @memcmp(i8* %X, i8* %Y, i64 12) nounwind ret i32 %m @@ -501,26 +504,27 @@ ; ; X64-LABEL: length16: ; X64: # %bb.0: -; X64-NEXT: movq (%rdi), %rcx -; X64-NEXT: movq (%rsi), %rdx +; X64-NEXT: movq (%rdi), %rax +; X64-NEXT: movq (%rsi), %rcx +; X64-NEXT: bswapq %rax ; X64-NEXT: bswapq %rcx -; X64-NEXT: bswapq %rdx -; X64-NEXT: cmpq %rdx, %rcx -; X64-NEXT: jne .LBB16_2 +; X64-NEXT: cmpq %rcx, %rax +; X64-NEXT: jne .LBB16_4 ; X64-NEXT: # %bb.1: # %loadbb1 -; X64-NEXT: movq 8(%rdi), %rcx -; X64-NEXT: movq 8(%rsi), %rdx +; X64-NEXT: movq 8(%rdi), %rax +; X64-NEXT: movq 8(%rsi), %rcx +; X64-NEXT: bswapq %rax ; X64-NEXT: bswapq %rcx -; X64-NEXT: bswapq %rdx -; X64-NEXT: xorl %eax, %eax -; X64-NEXT: cmpq %rdx, %rcx -; X64-NEXT: je .LBB16_3 -; X64-NEXT: .LBB16_2: # %res_block +; X64-NEXT: cmpq %rcx, %rax +; X64-NEXT: je .LBB16_2 +; X64-NEXT: .LBB16_4: # %res_block +; X64-NEXT: xorl %edx, %edx +; X64-NEXT: cmpq %rcx, %rax +; X64-NEXT: setae %dl +; X64-NEXT: leal -1(%rdx,%rdx), %eax +; X64-NEXT: retq +; X64-NEXT: .LBB16_2: ; X64-NEXT: xorl %eax, %eax -; X64-NEXT: cmpq %rdx, %rcx -; X64-NEXT: setae %al -; X64-NEXT: leal -1(%rax,%rax), %eax -; X64-NEXT: .LBB16_3: # %endblock ; X64-NEXT: retq %m = tail call i32 @memcmp(i8* %X, i8* %Y, i64 16) nounwind ret i32 %m diff --git a/llvm/test/CodeGen/X86/memcmp-pgso.ll b/llvm/test/CodeGen/X86/memcmp-pgso.ll --- a/llvm/test/CodeGen/X86/memcmp-pgso.ll +++ b/llvm/test/CodeGen/X86/memcmp-pgso.ll @@ -120,19 +120,19 @@ ; X86-NEXT: rolw $8, %dx ; X86-NEXT: rolw $8, %si ; X86-NEXT: cmpw %si, %dx -; X86-NEXT: jne .LBB4_3 -; X86-NEXT: # %bb.1: # %loadbb1 -; X86-NEXT: movzbl 2(%eax), %eax -; X86-NEXT: movzbl 2(%ecx), %ecx -; X86-NEXT: subl %ecx, %eax -; X86-NEXT: jmp .LBB4_2 -; X86-NEXT: .LBB4_3: # %res_block +; X86-NEXT: je .LBB4_1 +; X86-NEXT: # %bb.3: # %res_block ; X86-NEXT: setae %al ; X86-NEXT: movzbl %al, %eax ; X86-NEXT: leal -1(%eax,%eax), %eax ; X86-NEXT: .LBB4_2: # %endblock ; X86-NEXT: popl %esi ; X86-NEXT: retl +; X86-NEXT: .LBB4_1: # %loadbb1 +; X86-NEXT: movzbl 2(%eax), %eax +; X86-NEXT: movzbl 2(%ecx), %ecx +; X86-NEXT: subl %ecx, %eax +; X86-NEXT: jmp .LBB4_2 ; ; X64-LABEL: length3: ; X64: # %bb.0: @@ -141,16 +141,16 @@ ; X64-NEXT: rolw $8, %ax ; X64-NEXT: rolw $8, %cx ; X64-NEXT: cmpw %cx, %ax -; X64-NEXT: jne .LBB4_3 -; X64-NEXT: # %bb.1: # %loadbb1 -; X64-NEXT: movzbl 2(%rdi), %eax -; X64-NEXT: movzbl 2(%rsi), %ecx -; X64-NEXT: subl %ecx, %eax -; X64-NEXT: retq -; X64-NEXT: .LBB4_3: # %res_block +; X64-NEXT: je .LBB4_1 +; X64-NEXT: # %bb.3: # %res_block ; X64-NEXT: setae %al ; X64-NEXT: movzbl %al, %eax ; X64-NEXT: leal -1(%rax,%rax), %eax +; X64-NEXT: retq +; X64-NEXT: .LBB4_1: # %loadbb1 +; X64-NEXT: movzbl 2(%rdi), %eax +; X64-NEXT: movzbl 2(%rsi), %ecx +; X64-NEXT: subl %ecx, %eax ; X64-NEXT: retq %m = tail call i32 @memcmp(i8* %X, i8* %Y, i64 3) nounwind ret i32 %m @@ -265,19 +265,19 @@ ; X86-NEXT: bswapl %edx ; X86-NEXT: bswapl %esi ; X86-NEXT: cmpl %esi, %edx -; X86-NEXT: jne .LBB9_3 -; X86-NEXT: # %bb.1: # %loadbb1 -; X86-NEXT: movzbl 4(%eax), %eax -; X86-NEXT: movzbl 4(%ecx), %ecx -; X86-NEXT: subl %ecx, %eax -; X86-NEXT: jmp .LBB9_2 -; X86-NEXT: .LBB9_3: # %res_block +; X86-NEXT: je .LBB9_1 +; X86-NEXT: # %bb.3: # %res_block ; X86-NEXT: setae %al ; X86-NEXT: movzbl %al, %eax ; X86-NEXT: leal -1(%eax,%eax), %eax ; X86-NEXT: .LBB9_2: # %endblock ; X86-NEXT: popl %esi ; X86-NEXT: retl +; X86-NEXT: .LBB9_1: # %loadbb1 +; X86-NEXT: movzbl 4(%eax), %eax +; X86-NEXT: movzbl 4(%ecx), %ecx +; X86-NEXT: subl %ecx, %eax +; X86-NEXT: jmp .LBB9_2 ; ; X64-LABEL: length5: ; X64: # %bb.0: @@ -286,16 +286,16 @@ ; X64-NEXT: bswapl %eax ; X64-NEXT: bswapl %ecx ; X64-NEXT: cmpl %ecx, %eax -; X64-NEXT: jne .LBB9_3 -; X64-NEXT: # %bb.1: # %loadbb1 -; X64-NEXT: movzbl 4(%rdi), %eax -; X64-NEXT: movzbl 4(%rsi), %ecx -; X64-NEXT: subl %ecx, %eax -; X64-NEXT: retq -; X64-NEXT: .LBB9_3: # %res_block +; X64-NEXT: je .LBB9_1 +; X64-NEXT: # %bb.3: # %res_block ; X64-NEXT: setae %al ; X64-NEXT: movzbl %al, %eax ; X64-NEXT: leal -1(%rax,%rax), %eax +; X64-NEXT: retq +; X64-NEXT: .LBB9_1: # %loadbb1 +; X64-NEXT: movzbl 4(%rdi), %eax +; X64-NEXT: movzbl 4(%rsi), %ecx +; X64-NEXT: subl %ecx, %eax ; X64-NEXT: retq %m = tail call i32 @memcmp(i8* %X, i8* %Y, i64 5) nounwind ret i32 %m @@ -334,30 +334,32 @@ ; X86-LABEL: length8: ; X86: # %bb.0: ; X86-NEXT: pushl %esi -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl {{[0-9]+}}(%esp), %edx ; X86-NEXT: movl {{[0-9]+}}(%esp), %esi -; X86-NEXT: movl (%esi), %ecx -; X86-NEXT: movl (%eax), %edx -; X86-NEXT: bswapl %ecx -; X86-NEXT: bswapl %edx -; X86-NEXT: cmpl %edx, %ecx -; X86-NEXT: jne .LBB11_2 -; X86-NEXT: # %bb.1: # %loadbb1 -; X86-NEXT: movl 4(%esi), %ecx -; X86-NEXT: movl 4(%eax), %edx +; X86-NEXT: movl (%esi), %eax +; X86-NEXT: movl (%edx), %ecx +; X86-NEXT: bswapl %eax ; X86-NEXT: bswapl %ecx -; X86-NEXT: bswapl %edx -; X86-NEXT: xorl %eax, %eax -; X86-NEXT: cmpl %edx, %ecx -; X86-NEXT: je .LBB11_3 -; X86-NEXT: .LBB11_2: # %res_block -; X86-NEXT: xorl %eax, %eax -; X86-NEXT: cmpl %edx, %ecx -; X86-NEXT: setae %al -; X86-NEXT: leal -1(%eax,%eax), %eax +; X86-NEXT: cmpl %ecx, %eax +; X86-NEXT: je .LBB11_1 +; X86-NEXT: .LBB11_4: # %res_block +; X86-NEXT: xorl %edx, %edx +; X86-NEXT: cmpl %ecx, %eax +; X86-NEXT: setae %dl +; X86-NEXT: leal -1(%edx,%edx), %eax ; X86-NEXT: .LBB11_3: # %endblock ; X86-NEXT: popl %esi ; X86-NEXT: retl +; X86-NEXT: .LBB11_1: # %loadbb1 +; X86-NEXT: movl 4(%esi), %eax +; X86-NEXT: movl 4(%edx), %ecx +; X86-NEXT: bswapl %eax +; X86-NEXT: bswapl %ecx +; X86-NEXT: cmpl %ecx, %eax +; X86-NEXT: jne .LBB11_4 +; X86-NEXT: # %bb.2: +; X86-NEXT: xorl %eax, %eax +; X86-NEXT: jmp .LBB11_3 ; ; X64-LABEL: length8: ; X64: # %bb.0: @@ -461,26 +463,27 @@ ; ; X64-LABEL: length12: ; X64: # %bb.0: -; X64-NEXT: movq (%rdi), %rcx -; X64-NEXT: movq (%rsi), %rdx +; X64-NEXT: movq (%rdi), %rax +; X64-NEXT: movq (%rsi), %rcx +; X64-NEXT: bswapq %rax ; X64-NEXT: bswapq %rcx -; X64-NEXT: bswapq %rdx -; X64-NEXT: cmpq %rdx, %rcx -; X64-NEXT: jne .LBB15_2 -; X64-NEXT: # %bb.1: # %loadbb1 -; X64-NEXT: movl 8(%rdi), %ecx -; X64-NEXT: movl 8(%rsi), %edx +; X64-NEXT: cmpq %rcx, %rax +; X64-NEXT: je .LBB15_1 +; X64-NEXT: .LBB15_4: # %res_block +; X64-NEXT: xorl %edx, %edx +; X64-NEXT: cmpq %rcx, %rax +; X64-NEXT: setae %dl +; X64-NEXT: leal -1(%rdx,%rdx), %eax +; X64-NEXT: retq +; X64-NEXT: .LBB15_1: # %loadbb1 +; X64-NEXT: movl 8(%rdi), %eax +; X64-NEXT: movl 8(%rsi), %ecx +; X64-NEXT: bswapl %eax ; X64-NEXT: bswapl %ecx -; X64-NEXT: bswapl %edx -; X64-NEXT: xorl %eax, %eax -; X64-NEXT: cmpq %rdx, %rcx -; X64-NEXT: je .LBB15_3 -; X64-NEXT: .LBB15_2: # %res_block +; X64-NEXT: cmpq %rcx, %rax +; X64-NEXT: jne .LBB15_4 +; X64-NEXT: # %bb.2: ; X64-NEXT: xorl %eax, %eax -; X64-NEXT: cmpq %rdx, %rcx -; X64-NEXT: setae %al -; X64-NEXT: leal -1(%rax,%rax), %eax -; X64-NEXT: .LBB15_3: # %endblock ; X64-NEXT: retq %m = tail call i32 @memcmp(i8* %X, i8* %Y, i64 12) nounwind ret i32 %m @@ -501,26 +504,27 @@ ; ; X64-LABEL: length16: ; X64: # %bb.0: -; X64-NEXT: movq (%rdi), %rcx -; X64-NEXT: movq (%rsi), %rdx +; X64-NEXT: movq (%rdi), %rax +; X64-NEXT: movq (%rsi), %rcx +; X64-NEXT: bswapq %rax ; X64-NEXT: bswapq %rcx -; X64-NEXT: bswapq %rdx -; X64-NEXT: cmpq %rdx, %rcx -; X64-NEXT: jne .LBB16_2 -; X64-NEXT: # %bb.1: # %loadbb1 -; X64-NEXT: movq 8(%rdi), %rcx -; X64-NEXT: movq 8(%rsi), %rdx +; X64-NEXT: cmpq %rcx, %rax +; X64-NEXT: je .LBB16_1 +; X64-NEXT: .LBB16_4: # %res_block +; X64-NEXT: xorl %edx, %edx +; X64-NEXT: cmpq %rcx, %rax +; X64-NEXT: setae %dl +; X64-NEXT: leal -1(%rdx,%rdx), %eax +; X64-NEXT: retq +; X64-NEXT: .LBB16_1: # %loadbb1 +; X64-NEXT: movq 8(%rdi), %rax +; X64-NEXT: movq 8(%rsi), %rcx +; X64-NEXT: bswapq %rax ; X64-NEXT: bswapq %rcx -; X64-NEXT: bswapq %rdx +; X64-NEXT: cmpq %rcx, %rax +; X64-NEXT: jne .LBB16_4 +; X64-NEXT: # %bb.2: ; X64-NEXT: xorl %eax, %eax -; X64-NEXT: cmpq %rdx, %rcx -; X64-NEXT: je .LBB16_3 -; X64-NEXT: .LBB16_2: # %res_block -; X64-NEXT: xorl %eax, %eax -; X64-NEXT: cmpq %rdx, %rcx -; X64-NEXT: setae %al -; X64-NEXT: leal -1(%rax,%rax), %eax -; X64-NEXT: .LBB16_3: # %endblock ; X64-NEXT: retq %m = tail call i32 @memcmp(i8* %X, i8* %Y, i64 16) nounwind ret i32 %m diff --git a/llvm/test/CodeGen/X86/memcmp.ll b/llvm/test/CodeGen/X86/memcmp.ll --- a/llvm/test/CodeGen/X86/memcmp.ll +++ b/llvm/test/CodeGen/X86/memcmp.ll @@ -285,19 +285,19 @@ ; X86-NEXT: rolw $8, %dx ; X86-NEXT: rolw $8, %si ; X86-NEXT: cmpw %si, %dx -; X86-NEXT: jne .LBB11_3 -; X86-NEXT: # %bb.1: # %loadbb1 -; X86-NEXT: movzbl 2(%eax), %eax -; X86-NEXT: movzbl 2(%ecx), %ecx -; X86-NEXT: subl %ecx, %eax -; X86-NEXT: popl %esi -; X86-NEXT: retl -; X86-NEXT: .LBB11_3: # %res_block +; X86-NEXT: je .LBB11_1 +; X86-NEXT: # %bb.3: # %res_block ; X86-NEXT: setae %al ; X86-NEXT: movzbl %al, %eax ; X86-NEXT: leal -1(%eax,%eax), %eax ; X86-NEXT: popl %esi ; X86-NEXT: retl +; X86-NEXT: .LBB11_1: # %loadbb1 +; X86-NEXT: movzbl 2(%eax), %eax +; X86-NEXT: movzbl 2(%ecx), %ecx +; X86-NEXT: subl %ecx, %eax +; X86-NEXT: popl %esi +; X86-NEXT: retl ; ; X64-LABEL: length3: ; X64: # %bb.0: @@ -306,16 +306,16 @@ ; X64-NEXT: rolw $8, %ax ; X64-NEXT: rolw $8, %cx ; X64-NEXT: cmpw %cx, %ax -; X64-NEXT: jne .LBB11_3 -; X64-NEXT: # %bb.1: # %loadbb1 -; X64-NEXT: movzbl 2(%rdi), %eax -; X64-NEXT: movzbl 2(%rsi), %ecx -; X64-NEXT: subl %ecx, %eax -; X64-NEXT: retq -; X64-NEXT: .LBB11_3: # %res_block +; X64-NEXT: je .LBB11_1 +; X64-NEXT: # %bb.3: # %res_block ; X64-NEXT: setae %al ; X64-NEXT: movzbl %al, %eax ; X64-NEXT: leal -1(%rax,%rax), %eax +; X64-NEXT: retq +; X64-NEXT: .LBB11_1: # %loadbb1 +; X64-NEXT: movzbl 2(%rdi), %eax +; X64-NEXT: movzbl 2(%rsi), %ecx +; X64-NEXT: subl %ecx, %eax ; X64-NEXT: retq %m = tail call i32 @memcmp(i8* %X, i8* %Y, i64 3) nounwind ret i32 %m @@ -500,19 +500,19 @@ ; X86-NEXT: bswapl %edx ; X86-NEXT: bswapl %esi ; X86-NEXT: cmpl %esi, %edx -; X86-NEXT: jne .LBB18_3 -; X86-NEXT: # %bb.1: # %loadbb1 -; X86-NEXT: movzbl 4(%eax), %eax -; X86-NEXT: movzbl 4(%ecx), %ecx -; X86-NEXT: subl %ecx, %eax -; X86-NEXT: popl %esi -; X86-NEXT: retl -; X86-NEXT: .LBB18_3: # %res_block +; X86-NEXT: je .LBB18_1 +; X86-NEXT: # %bb.3: # %res_block ; X86-NEXT: setae %al ; X86-NEXT: movzbl %al, %eax ; X86-NEXT: leal -1(%eax,%eax), %eax ; X86-NEXT: popl %esi ; X86-NEXT: retl +; X86-NEXT: .LBB18_1: # %loadbb1 +; X86-NEXT: movzbl 4(%eax), %eax +; X86-NEXT: movzbl 4(%ecx), %ecx +; X86-NEXT: subl %ecx, %eax +; X86-NEXT: popl %esi +; X86-NEXT: retl ; ; X64-LABEL: length5: ; X64: # %bb.0: @@ -521,16 +521,16 @@ ; X64-NEXT: bswapl %eax ; X64-NEXT: bswapl %ecx ; X64-NEXT: cmpl %ecx, %eax -; X64-NEXT: jne .LBB18_3 -; X64-NEXT: # %bb.1: # %loadbb1 -; X64-NEXT: movzbl 4(%rdi), %eax -; X64-NEXT: movzbl 4(%rsi), %ecx -; X64-NEXT: subl %ecx, %eax -; X64-NEXT: retq -; X64-NEXT: .LBB18_3: # %res_block +; X64-NEXT: je .LBB18_1 +; X64-NEXT: # %bb.3: # %res_block ; X64-NEXT: setae %al ; X64-NEXT: movzbl %al, %eax ; X64-NEXT: leal -1(%rax,%rax), %eax +; X64-NEXT: retq +; X64-NEXT: .LBB18_1: # %loadbb1 +; X64-NEXT: movzbl 4(%rdi), %eax +; X64-NEXT: movzbl 4(%rsi), %ecx +; X64-NEXT: subl %ecx, %eax ; X64-NEXT: retq %m = tail call i32 @memcmp(i8* %X, i8* %Y, i64 5) nounwind ret i32 %m @@ -576,16 +576,16 @@ ; X86-NEXT: bswapl %edx ; X86-NEXT: bswapl %esi ; X86-NEXT: cmpl %esi, %edx -; X86-NEXT: jne .LBB20_3 -; X86-NEXT: # %bb.1: # %loadbb1 -; X86-NEXT: movzbl 4(%eax), %eax -; X86-NEXT: movzbl 4(%ecx), %ecx -; X86-NEXT: subl %ecx, %eax -; X86-NEXT: jmp .LBB20_2 -; X86-NEXT: .LBB20_3: # %res_block +; X86-NEXT: je .LBB20_1 +; X86-NEXT: # %bb.3: # %res_block ; X86-NEXT: setae %al ; X86-NEXT: movzbl %al, %eax ; X86-NEXT: leal -1(%eax,%eax), %eax +; X86-NEXT: jmp .LBB20_2 +; X86-NEXT: .LBB20_1: # %loadbb1 +; X86-NEXT: movzbl 4(%eax), %eax +; X86-NEXT: movzbl 4(%ecx), %ecx +; X86-NEXT: subl %ecx, %eax ; X86-NEXT: .LBB20_2: # %endblock ; X86-NEXT: shrl $31, %eax ; X86-NEXT: # kill: def $al killed $al killed $eax @@ -599,20 +599,20 @@ ; X64-NEXT: bswapl %eax ; X64-NEXT: bswapl %ecx ; X64-NEXT: cmpl %ecx, %eax -; X64-NEXT: jne .LBB20_3 -; X64-NEXT: # %bb.1: # %loadbb1 -; X64-NEXT: movzbl 4(%rdi), %eax -; X64-NEXT: movzbl 4(%rsi), %ecx -; X64-NEXT: subl %ecx, %eax -; X64-NEXT: shrl $31, %eax -; X64-NEXT: # kill: def $al killed $al killed $eax -; X64-NEXT: retq -; X64-NEXT: .LBB20_3: # %res_block +; X64-NEXT: je .LBB20_1 +; X64-NEXT: # %bb.3: # %res_block ; X64-NEXT: setae %al ; X64-NEXT: movzbl %al, %eax ; X64-NEXT: leal -1(%rax,%rax), %eax ; X64-NEXT: shrl $31, %eax ; X64-NEXT: # kill: def $al killed $al killed $eax +; X64-NEXT: retq +; X64-NEXT: .LBB20_1: # %loadbb1 +; X64-NEXT: movzbl 4(%rdi), %eax +; X64-NEXT: movzbl 4(%rsi), %ecx +; X64-NEXT: subl %ecx, %eax +; X64-NEXT: shrl $31, %eax +; X64-NEXT: # kill: def $al killed $al killed $eax ; X64-NEXT: retq %m = tail call i32 @memcmp(i8* %X, i8* %Y, i64 5) nounwind %c = icmp slt i32 %m, 0 @@ -623,53 +623,56 @@ ; X86-LABEL: length7: ; X86: # %bb.0: ; X86-NEXT: pushl %esi -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl {{[0-9]+}}(%esp), %edx ; X86-NEXT: movl {{[0-9]+}}(%esp), %esi -; X86-NEXT: movl (%esi), %ecx -; X86-NEXT: movl (%eax), %edx +; X86-NEXT: movl (%esi), %eax +; X86-NEXT: movl (%edx), %ecx +; X86-NEXT: bswapl %eax ; X86-NEXT: bswapl %ecx -; X86-NEXT: bswapl %edx -; X86-NEXT: cmpl %edx, %ecx -; X86-NEXT: jne .LBB21_2 +; X86-NEXT: cmpl %ecx, %eax +; X86-NEXT: jne .LBB21_4 ; X86-NEXT: # %bb.1: # %loadbb1 -; X86-NEXT: movl 3(%esi), %ecx -; X86-NEXT: movl 3(%eax), %edx +; X86-NEXT: movl 3(%esi), %eax +; X86-NEXT: movl 3(%edx), %ecx +; X86-NEXT: bswapl %eax ; X86-NEXT: bswapl %ecx -; X86-NEXT: bswapl %edx -; X86-NEXT: xorl %eax, %eax -; X86-NEXT: cmpl %edx, %ecx -; X86-NEXT: je .LBB21_3 -; X86-NEXT: .LBB21_2: # %res_block +; X86-NEXT: cmpl %ecx, %eax +; X86-NEXT: je .LBB21_2 +; X86-NEXT: .LBB21_4: # %res_block +; X86-NEXT: xorl %edx, %edx +; X86-NEXT: cmpl %ecx, %eax +; X86-NEXT: setae %dl +; X86-NEXT: leal -1(%edx,%edx), %eax +; X86-NEXT: popl %esi +; X86-NEXT: retl +; X86-NEXT: .LBB21_2: ; X86-NEXT: xorl %eax, %eax -; X86-NEXT: cmpl %edx, %ecx -; X86-NEXT: setae %al -; X86-NEXT: leal -1(%eax,%eax), %eax -; X86-NEXT: .LBB21_3: # %endblock ; X86-NEXT: popl %esi ; X86-NEXT: retl ; ; X64-LABEL: length7: ; X64: # %bb.0: -; X64-NEXT: movl (%rdi), %ecx -; X64-NEXT: movl (%rsi), %edx +; X64-NEXT: movl (%rdi), %eax +; X64-NEXT: movl (%rsi), %ecx +; X64-NEXT: bswapl %eax ; X64-NEXT: bswapl %ecx -; X64-NEXT: bswapl %edx -; X64-NEXT: cmpl %edx, %ecx -; X64-NEXT: jne .LBB21_2 +; X64-NEXT: cmpl %ecx, %eax +; X64-NEXT: jne .LBB21_4 ; X64-NEXT: # %bb.1: # %loadbb1 -; X64-NEXT: movl 3(%rdi), %ecx -; X64-NEXT: movl 3(%rsi), %edx +; X64-NEXT: movl 3(%rdi), %eax +; X64-NEXT: movl 3(%rsi), %ecx +; X64-NEXT: bswapl %eax ; X64-NEXT: bswapl %ecx -; X64-NEXT: bswapl %edx -; X64-NEXT: xorl %eax, %eax -; X64-NEXT: cmpl %edx, %ecx -; X64-NEXT: je .LBB21_3 -; X64-NEXT: .LBB21_2: # %res_block +; X64-NEXT: cmpl %ecx, %eax +; X64-NEXT: je .LBB21_2 +; X64-NEXT: .LBB21_4: # %res_block +; X64-NEXT: xorl %edx, %edx +; X64-NEXT: cmpl %ecx, %eax +; X64-NEXT: setae %dl +; X64-NEXT: leal -1(%rdx,%rdx), %eax +; X64-NEXT: retq +; X64-NEXT: .LBB21_2: ; X64-NEXT: xorl %eax, %eax -; X64-NEXT: cmpl %edx, %ecx -; X64-NEXT: setae %al -; X64-NEXT: leal -1(%rax,%rax), %eax -; X64-NEXT: .LBB21_3: # %endblock ; X64-NEXT: retq %m = tail call i32 @memcmp(i8* %X, i8* %Y, i64 7) nounwind ret i32 %m @@ -679,55 +682,60 @@ ; X86-LABEL: length7_lt: ; X86: # %bb.0: ; X86-NEXT: pushl %esi -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl {{[0-9]+}}(%esp), %edx ; X86-NEXT: movl {{[0-9]+}}(%esp), %esi -; X86-NEXT: movl (%esi), %ecx -; X86-NEXT: movl (%eax), %edx +; X86-NEXT: movl (%esi), %eax +; X86-NEXT: movl (%edx), %ecx +; X86-NEXT: bswapl %eax ; X86-NEXT: bswapl %ecx -; X86-NEXT: bswapl %edx -; X86-NEXT: cmpl %edx, %ecx -; X86-NEXT: jne .LBB22_2 +; X86-NEXT: cmpl %ecx, %eax +; X86-NEXT: jne .LBB22_4 ; X86-NEXT: # %bb.1: # %loadbb1 -; X86-NEXT: movl 3(%esi), %ecx -; X86-NEXT: movl 3(%eax), %edx +; X86-NEXT: movl 3(%esi), %eax +; X86-NEXT: movl 3(%edx), %ecx +; X86-NEXT: bswapl %eax ; X86-NEXT: bswapl %ecx -; X86-NEXT: bswapl %edx -; X86-NEXT: xorl %eax, %eax -; X86-NEXT: cmpl %edx, %ecx -; X86-NEXT: je .LBB22_3 -; X86-NEXT: .LBB22_2: # %res_block -; X86-NEXT: xorl %eax, %eax -; X86-NEXT: cmpl %edx, %ecx -; X86-NEXT: setae %al -; X86-NEXT: leal -1(%eax,%eax), %eax +; X86-NEXT: cmpl %ecx, %eax +; X86-NEXT: je .LBB22_2 +; X86-NEXT: .LBB22_4: # %res_block +; X86-NEXT: xorl %edx, %edx +; X86-NEXT: cmpl %ecx, %eax +; X86-NEXT: setae %dl +; X86-NEXT: leal -1(%edx,%edx), %eax ; X86-NEXT: .LBB22_3: # %endblock ; X86-NEXT: shrl $31, %eax ; X86-NEXT: # kill: def $al killed $al killed $eax ; X86-NEXT: popl %esi ; X86-NEXT: retl +; X86-NEXT: .LBB22_2: +; X86-NEXT: xorl %eax, %eax +; X86-NEXT: jmp .LBB22_3 ; ; X64-LABEL: length7_lt: ; X64: # %bb.0: -; X64-NEXT: movl (%rdi), %ecx -; X64-NEXT: movl (%rsi), %edx +; X64-NEXT: movl (%rdi), %eax +; X64-NEXT: movl (%rsi), %ecx +; X64-NEXT: bswapl %eax ; X64-NEXT: bswapl %ecx -; X64-NEXT: bswapl %edx -; X64-NEXT: cmpl %edx, %ecx +; X64-NEXT: cmpl %ecx, %eax ; X64-NEXT: jne .LBB22_2 ; X64-NEXT: # %bb.1: # %loadbb1 -; X64-NEXT: movl 3(%rdi), %ecx -; X64-NEXT: movl 3(%rsi), %edx +; X64-NEXT: movl 3(%rdi), %eax +; X64-NEXT: movl 3(%rsi), %ecx +; X64-NEXT: bswapl %eax ; X64-NEXT: bswapl %ecx -; X64-NEXT: bswapl %edx -; X64-NEXT: xorl %eax, %eax -; X64-NEXT: cmpl %edx, %ecx +; X64-NEXT: cmpl %ecx, %eax ; X64-NEXT: je .LBB22_3 ; X64-NEXT: .LBB22_2: # %res_block +; X64-NEXT: xorl %edx, %edx +; X64-NEXT: cmpl %ecx, %eax +; X64-NEXT: setae %dl +; X64-NEXT: leal -1(%rdx,%rdx), %eax +; X64-NEXT: shrl $31, %eax +; X64-NEXT: # kill: def $al killed $al killed $eax +; X64-NEXT: retq +; X64-NEXT: .LBB22_3: ; X64-NEXT: xorl %eax, %eax -; X64-NEXT: cmpl %edx, %ecx -; X64-NEXT: setae %al -; X64-NEXT: leal -1(%rax,%rax), %eax -; X64-NEXT: .LBB22_3: # %endblock ; X64-NEXT: shrl $31, %eax ; X64-NEXT: # kill: def $al killed $al killed $eax ; X64-NEXT: retq @@ -767,28 +775,30 @@ ; X86-LABEL: length8: ; X86: # %bb.0: ; X86-NEXT: pushl %esi -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl {{[0-9]+}}(%esp), %edx ; X86-NEXT: movl {{[0-9]+}}(%esp), %esi -; X86-NEXT: movl (%esi), %ecx -; X86-NEXT: movl (%eax), %edx +; X86-NEXT: movl (%esi), %eax +; X86-NEXT: movl (%edx), %ecx +; X86-NEXT: bswapl %eax ; X86-NEXT: bswapl %ecx -; X86-NEXT: bswapl %edx -; X86-NEXT: cmpl %edx, %ecx -; X86-NEXT: jne .LBB24_2 +; X86-NEXT: cmpl %ecx, %eax +; X86-NEXT: jne .LBB24_4 ; X86-NEXT: # %bb.1: # %loadbb1 -; X86-NEXT: movl 4(%esi), %ecx -; X86-NEXT: movl 4(%eax), %edx +; X86-NEXT: movl 4(%esi), %eax +; X86-NEXT: movl 4(%edx), %ecx +; X86-NEXT: bswapl %eax ; X86-NEXT: bswapl %ecx -; X86-NEXT: bswapl %edx -; X86-NEXT: xorl %eax, %eax -; X86-NEXT: cmpl %edx, %ecx -; X86-NEXT: je .LBB24_3 -; X86-NEXT: .LBB24_2: # %res_block +; X86-NEXT: cmpl %ecx, %eax +; X86-NEXT: je .LBB24_2 +; X86-NEXT: .LBB24_4: # %res_block +; X86-NEXT: xorl %edx, %edx +; X86-NEXT: cmpl %ecx, %eax +; X86-NEXT: setae %dl +; X86-NEXT: leal -1(%edx,%edx), %eax +; X86-NEXT: popl %esi +; X86-NEXT: retl +; X86-NEXT: .LBB24_2: ; X86-NEXT: xorl %eax, %eax -; X86-NEXT: cmpl %edx, %ecx -; X86-NEXT: setae %al -; X86-NEXT: leal -1(%eax,%eax), %eax -; X86-NEXT: .LBB24_3: # %endblock ; X86-NEXT: popl %esi ; X86-NEXT: retl ; @@ -977,26 +987,27 @@ ; ; X64-LABEL: length12: ; X64: # %bb.0: -; X64-NEXT: movq (%rdi), %rcx -; X64-NEXT: movq (%rsi), %rdx +; X64-NEXT: movq (%rdi), %rax +; X64-NEXT: movq (%rsi), %rcx +; X64-NEXT: bswapq %rax ; X64-NEXT: bswapq %rcx -; X64-NEXT: bswapq %rdx -; X64-NEXT: cmpq %rdx, %rcx -; X64-NEXT: jne .LBB31_2 +; X64-NEXT: cmpq %rcx, %rax +; X64-NEXT: jne .LBB31_4 ; X64-NEXT: # %bb.1: # %loadbb1 -; X64-NEXT: movl 8(%rdi), %ecx -; X64-NEXT: movl 8(%rsi), %edx +; X64-NEXT: movl 8(%rdi), %eax +; X64-NEXT: movl 8(%rsi), %ecx +; X64-NEXT: bswapl %eax ; X64-NEXT: bswapl %ecx -; X64-NEXT: bswapl %edx -; X64-NEXT: xorl %eax, %eax -; X64-NEXT: cmpq %rdx, %rcx -; X64-NEXT: je .LBB31_3 -; X64-NEXT: .LBB31_2: # %res_block +; X64-NEXT: cmpq %rcx, %rax +; X64-NEXT: je .LBB31_2 +; X64-NEXT: .LBB31_4: # %res_block +; X64-NEXT: xorl %edx, %edx +; X64-NEXT: cmpq %rcx, %rax +; X64-NEXT: setae %dl +; X64-NEXT: leal -1(%rdx,%rdx), %eax +; X64-NEXT: retq +; X64-NEXT: .LBB31_2: ; X64-NEXT: xorl %eax, %eax -; X64-NEXT: cmpq %rdx, %rcx -; X64-NEXT: setae %al -; X64-NEXT: leal -1(%rax,%rax), %eax -; X64-NEXT: .LBB31_3: # %endblock ; X64-NEXT: retq %m = tail call i32 @memcmp(i8* %X, i8* %Y, i64 12) nounwind ret i32 %m @@ -1069,26 +1080,27 @@ ; ; X64-LABEL: length15: ; X64: # %bb.0: -; X64-NEXT: movq (%rdi), %rcx -; X64-NEXT: movq (%rsi), %rdx +; X64-NEXT: movq (%rdi), %rax +; X64-NEXT: movq (%rsi), %rcx +; X64-NEXT: bswapq %rax ; X64-NEXT: bswapq %rcx -; X64-NEXT: bswapq %rdx -; X64-NEXT: cmpq %rdx, %rcx -; X64-NEXT: jne .LBB34_2 +; X64-NEXT: cmpq %rcx, %rax +; X64-NEXT: jne .LBB34_4 ; X64-NEXT: # %bb.1: # %loadbb1 -; X64-NEXT: movq 7(%rdi), %rcx -; X64-NEXT: movq 7(%rsi), %rdx +; X64-NEXT: movq 7(%rdi), %rax +; X64-NEXT: movq 7(%rsi), %rcx +; X64-NEXT: bswapq %rax ; X64-NEXT: bswapq %rcx -; X64-NEXT: bswapq %rdx -; X64-NEXT: xorl %eax, %eax -; X64-NEXT: cmpq %rdx, %rcx -; X64-NEXT: je .LBB34_3 -; X64-NEXT: .LBB34_2: # %res_block +; X64-NEXT: cmpq %rcx, %rax +; X64-NEXT: je .LBB34_2 +; X64-NEXT: .LBB34_4: # %res_block +; X64-NEXT: xorl %edx, %edx +; X64-NEXT: cmpq %rcx, %rax +; X64-NEXT: setae %dl +; X64-NEXT: leal -1(%rdx,%rdx), %eax +; X64-NEXT: retq +; X64-NEXT: .LBB34_2: ; X64-NEXT: xorl %eax, %eax -; X64-NEXT: cmpq %rdx, %rcx -; X64-NEXT: setae %al -; X64-NEXT: leal -1(%rax,%rax), %eax -; X64-NEXT: .LBB34_3: # %endblock ; X64-NEXT: retq %m = tail call i32 @memcmp(i8* %X, i8* %Y, i64 15) nounwind ret i32 %m @@ -1109,26 +1121,29 @@ ; ; X64-LABEL: length15_lt: ; X64: # %bb.0: -; X64-NEXT: movq (%rdi), %rcx -; X64-NEXT: movq (%rsi), %rdx +; X64-NEXT: movq (%rdi), %rax +; X64-NEXT: movq (%rsi), %rcx +; X64-NEXT: bswapq %rax ; X64-NEXT: bswapq %rcx -; X64-NEXT: bswapq %rdx -; X64-NEXT: cmpq %rdx, %rcx +; X64-NEXT: cmpq %rcx, %rax ; X64-NEXT: jne .LBB35_2 ; X64-NEXT: # %bb.1: # %loadbb1 -; X64-NEXT: movq 7(%rdi), %rcx -; X64-NEXT: movq 7(%rsi), %rdx +; X64-NEXT: movq 7(%rdi), %rax +; X64-NEXT: movq 7(%rsi), %rcx +; X64-NEXT: bswapq %rax ; X64-NEXT: bswapq %rcx -; X64-NEXT: bswapq %rdx -; X64-NEXT: xorl %eax, %eax -; X64-NEXT: cmpq %rdx, %rcx +; X64-NEXT: cmpq %rcx, %rax ; X64-NEXT: je .LBB35_3 ; X64-NEXT: .LBB35_2: # %res_block +; X64-NEXT: xorl %edx, %edx +; X64-NEXT: cmpq %rcx, %rax +; X64-NEXT: setae %dl +; X64-NEXT: leal -1(%rdx,%rdx), %eax +; X64-NEXT: shrl $31, %eax +; X64-NEXT: # kill: def $al killed $al killed $eax +; X64-NEXT: retq +; X64-NEXT: .LBB35_3: ; X64-NEXT: xorl %eax, %eax -; X64-NEXT: cmpq %rdx, %rcx -; X64-NEXT: setae %al -; X64-NEXT: leal -1(%rax,%rax), %eax -; X64-NEXT: .LBB35_3: # %endblock ; X64-NEXT: shrl $31, %eax ; X64-NEXT: # kill: def $al killed $al killed $eax ; X64-NEXT: retq @@ -1256,26 +1271,27 @@ ; ; X64-LABEL: length16: ; X64: # %bb.0: -; X64-NEXT: movq (%rdi), %rcx -; X64-NEXT: movq (%rsi), %rdx +; X64-NEXT: movq (%rdi), %rax +; X64-NEXT: movq (%rsi), %rcx +; X64-NEXT: bswapq %rax ; X64-NEXT: bswapq %rcx -; X64-NEXT: bswapq %rdx -; X64-NEXT: cmpq %rdx, %rcx -; X64-NEXT: jne .LBB39_2 +; X64-NEXT: cmpq %rcx, %rax +; X64-NEXT: jne .LBB39_4 ; X64-NEXT: # %bb.1: # %loadbb1 -; X64-NEXT: movq 8(%rdi), %rcx -; X64-NEXT: movq 8(%rsi), %rdx +; X64-NEXT: movq 8(%rdi), %rax +; X64-NEXT: movq 8(%rsi), %rcx +; X64-NEXT: bswapq %rax ; X64-NEXT: bswapq %rcx -; X64-NEXT: bswapq %rdx -; X64-NEXT: xorl %eax, %eax -; X64-NEXT: cmpq %rdx, %rcx -; X64-NEXT: je .LBB39_3 -; X64-NEXT: .LBB39_2: # %res_block +; X64-NEXT: cmpq %rcx, %rax +; X64-NEXT: je .LBB39_2 +; X64-NEXT: .LBB39_4: # %res_block +; X64-NEXT: xorl %edx, %edx +; X64-NEXT: cmpq %rcx, %rax +; X64-NEXT: setae %dl +; X64-NEXT: leal -1(%rdx,%rdx), %eax +; X64-NEXT: retq +; X64-NEXT: .LBB39_2: ; X64-NEXT: xorl %eax, %eax -; X64-NEXT: cmpq %rdx, %rcx -; X64-NEXT: setae %al -; X64-NEXT: leal -1(%rax,%rax), %eax -; X64-NEXT: .LBB39_3: # %endblock ; X64-NEXT: retq %m = tail call i32 @memcmp(i8* %X, i8* %Y, i64 16) nounwind ret i32 %m @@ -1385,26 +1401,29 @@ ; ; X64-LABEL: length16_lt: ; X64: # %bb.0: -; X64-NEXT: movq (%rdi), %rcx -; X64-NEXT: movq (%rsi), %rdx +; X64-NEXT: movq (%rdi), %rax +; X64-NEXT: movq (%rsi), %rcx +; X64-NEXT: bswapq %rax ; X64-NEXT: bswapq %rcx -; X64-NEXT: bswapq %rdx -; X64-NEXT: cmpq %rdx, %rcx +; X64-NEXT: cmpq %rcx, %rax ; X64-NEXT: jne .LBB41_2 ; X64-NEXT: # %bb.1: # %loadbb1 -; X64-NEXT: movq 8(%rdi), %rcx -; X64-NEXT: movq 8(%rsi), %rdx +; X64-NEXT: movq 8(%rdi), %rax +; X64-NEXT: movq 8(%rsi), %rcx +; X64-NEXT: bswapq %rax ; X64-NEXT: bswapq %rcx -; X64-NEXT: bswapq %rdx -; X64-NEXT: xorl %eax, %eax -; X64-NEXT: cmpq %rdx, %rcx +; X64-NEXT: cmpq %rcx, %rax ; X64-NEXT: je .LBB41_3 ; X64-NEXT: .LBB41_2: # %res_block +; X64-NEXT: xorl %edx, %edx +; X64-NEXT: cmpq %rcx, %rax +; X64-NEXT: setae %dl +; X64-NEXT: leal -1(%rdx,%rdx), %eax +; X64-NEXT: shrl $31, %eax +; X64-NEXT: # kill: def $al killed $al killed $eax +; X64-NEXT: retq +; X64-NEXT: .LBB41_3: ; X64-NEXT: xorl %eax, %eax -; X64-NEXT: cmpq %rdx, %rcx -; X64-NEXT: setae %al -; X64-NEXT: leal -1(%rax,%rax), %eax -; X64-NEXT: .LBB41_3: # %endblock ; X64-NEXT: shrl $31, %eax ; X64-NEXT: # kill: def $al killed $al killed $eax ; X64-NEXT: retq @@ -1433,24 +1452,26 @@ ; X64-NEXT: bswapq %rax ; X64-NEXT: bswapq %rcx ; X64-NEXT: cmpq %rcx, %rax -; X64-NEXT: jne .LBB42_2 +; X64-NEXT: jne .LBB42_4 ; X64-NEXT: # %bb.1: # %loadbb1 ; X64-NEXT: movq 8(%rdi), %rax ; X64-NEXT: movq 8(%rsi), %rcx ; X64-NEXT: bswapq %rax ; X64-NEXT: bswapq %rcx -; X64-NEXT: xorl %edx, %edx ; X64-NEXT: cmpq %rcx, %rax -; X64-NEXT: je .LBB42_3 -; X64-NEXT: .LBB42_2: # %res_block +; X64-NEXT: je .LBB42_2 +; X64-NEXT: .LBB42_4: # %res_block ; X64-NEXT: xorl %edx, %edx ; X64-NEXT: cmpq %rcx, %rax ; X64-NEXT: setae %dl -; X64-NEXT: leal -1(%rdx,%rdx), %edx +; X64-NEXT: leal -1(%rdx,%rdx), %eax ; X64-NEXT: .LBB42_3: # %endblock -; X64-NEXT: testl %edx, %edx +; X64-NEXT: testl %eax, %eax ; X64-NEXT: setg %al ; X64-NEXT: retq +; X64-NEXT: .LBB42_2: +; X64-NEXT: xorl %eax, %eax +; X64-NEXT: jmp .LBB42_3 %call = tail call i32 @memcmp(i8* %x, i8* %y, i64 16) nounwind %cmp = icmp sgt i32 %call, 0 ret i1 %cmp diff --git a/llvm/test/CodeGen/X86/neg_cmp.ll b/llvm/test/CodeGen/X86/neg_cmp.ll --- a/llvm/test/CodeGen/X86/neg_cmp.ll +++ b/llvm/test/CodeGen/X86/neg_cmp.ll @@ -10,11 +10,11 @@ ; CHECK-LABEL: neg_cmp: ; CHECK: # %bb.0: ; CHECK-NEXT: addl %esi, %edi -; CHECK-NEXT: jne .LBB0_1 -; CHECK-NEXT: # %bb.2: # %if.then -; CHECK-NEXT: jmp g # TAILCALL -; CHECK-NEXT: .LBB0_1: # %if.end +; CHECK-NEXT: je .LBB0_2 +; CHECK-NEXT: # %bb.1: # %if.end ; CHECK-NEXT: retq +; CHECK-NEXT: .LBB0_2: # %if.then +; CHECK-NEXT: jmp g # TAILCALL %sub = sub i32 0, %y %cmp = icmp eq i32 %x, %sub br i1 %cmp, label %if.then, label %if.end @@ -31,11 +31,11 @@ ; CHECK-LABEL: neg_cmp_commuted: ; CHECK: # %bb.0: ; CHECK-NEXT: addl %esi, %edi -; CHECK-NEXT: jne .LBB1_1 -; CHECK-NEXT: # %bb.2: # %if.then -; CHECK-NEXT: jmp g # TAILCALL -; CHECK-NEXT: .LBB1_1: # %if.end +; CHECK-NEXT: je .LBB1_2 +; CHECK-NEXT: # %bb.1: # %if.end ; CHECK-NEXT: retq +; CHECK-NEXT: .LBB1_2: # %if.then +; CHECK-NEXT: jmp g # TAILCALL %sub = sub i32 0, %y %cmp = icmp eq i32 %sub, %x br i1 %cmp, label %if.then, label %if.end diff --git a/llvm/test/CodeGen/X86/nobt.ll b/llvm/test/CodeGen/X86/nobt.ll --- a/llvm/test/CodeGen/X86/nobt.ll +++ b/llvm/test/CodeGen/X86/nobt.ll @@ -9,10 +9,11 @@ ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: xorl %eax, %eax ; CHECK-NEXT: testb %al, %al -; CHECK-NEXT: jne .LBB0_2 -; CHECK-NEXT: # %bb.1: # %bb +; CHECK-NEXT: je .LBB0_1 +; CHECK-NEXT: # %bb.2: # %UnifiedReturnBlock +; CHECK-NEXT: retl +; CHECK-NEXT: .LBB0_1: # %bb ; CHECK-NEXT: calll foo -; CHECK-NEXT: .LBB0_2: # %UnifiedReturnBlock ; CHECK-NEXT: retl entry: %tmp1 = and i32 %x, 1 @@ -34,10 +35,11 @@ ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: xorl %eax, %eax ; CHECK-NEXT: testb %al, %al -; CHECK-NEXT: jne .LBB1_2 -; CHECK-NEXT: # %bb.1: # %bb +; CHECK-NEXT: je .LBB1_1 +; CHECK-NEXT: # %bb.2: # %UnifiedReturnBlock +; CHECK-NEXT: retl +; CHECK-NEXT: .LBB1_1: # %bb ; CHECK-NEXT: calll foo -; CHECK-NEXT: .LBB1_2: # %UnifiedReturnBlock ; CHECK-NEXT: retl entry: %tmp1 = and i32 %x, 1 diff --git a/llvm/test/CodeGen/X86/pr29170.ll b/llvm/test/CodeGen/X86/pr29170.ll --- a/llvm/test/CodeGen/X86/pr29170.ll +++ b/llvm/test/CodeGen/X86/pr29170.ll @@ -11,8 +11,11 @@ ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: xorl %eax, %eax ; CHECK-NEXT: testb %al, %al -; CHECK-NEXT: jne .LBB0_3 -; CHECK-NEXT: # %bb.1: # %go +; CHECK-NEXT: je .LBB0_1 +; CHECK-NEXT: .LBB0_3: # %if.else +; CHECK-NEXT: xorl %eax, %eax +; CHECK-NEXT: retl +; CHECK-NEXT: .LBB0_1: # %go ; CHECK-NEXT: movl $-1, %ecx ; CHECK-NEXT: movsbl b, %edx ; CHECK-NEXT: notl %ecx @@ -23,9 +26,6 @@ ; CHECK-NEXT: # %bb.2: # %if.then ; CHECK-NEXT: movl $42, %eax ; CHECK-NEXT: retl -; CHECK-NEXT: .LBB0_3: # %if.else -; CHECK-NEXT: xorl %eax, %eax -; CHECK-NEXT: retl entry: %true = icmp eq i32 0, 0 %const = bitcast i64 -4294967296 to i64 diff --git a/llvm/test/CodeGen/X86/wide-integer-cmp.ll b/llvm/test/CodeGen/X86/wide-integer-cmp.ll --- a/llvm/test/CodeGen/X86/wide-integer-cmp.ll +++ b/llvm/test/CodeGen/X86/wide-integer-cmp.ll @@ -9,13 +9,13 @@ ; CHECK-NEXT: xorl {{[0-9]+}}(%esp), %ecx ; CHECK-NEXT: xorl {{[0-9]+}}(%esp), %eax ; CHECK-NEXT: orl %ecx, %eax -; CHECK-NEXT: jne .LBB0_2 -; CHECK-NEXT: # %bb.1: # %bb1 -; CHECK-NEXT: movl $1, %eax -; CHECK-NEXT: retl -; CHECK-NEXT: .LBB0_2: # %bb2 +; CHECK-NEXT: je .LBB0_1 +; CHECK-NEXT: # %bb.2: # %bb2 ; CHECK-NEXT: movl $2, %eax ; CHECK-NEXT: retl +; CHECK-NEXT: .LBB0_1: # %bb1 +; CHECK-NEXT: movl $1, %eax +; CHECK-NEXT: retl entry: %cmp = icmp eq i64 %a, %b br i1 %cmp, label %bb1, label %bb2 @@ -55,13 +55,13 @@ ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ecx ; CHECK-NEXT: cmpl {{[0-9]+}}(%esp), %eax ; CHECK-NEXT: sbbl {{[0-9]+}}(%esp), %ecx -; CHECK-NEXT: jb .LBB2_2 -; CHECK-NEXT: # %bb.1: # %bb1 -; CHECK-NEXT: movl $1, %eax -; CHECK-NEXT: retl -; CHECK-NEXT: .LBB2_2: # %bb2 +; CHECK-NEXT: jae .LBB2_1 +; CHECK-NEXT: # %bb.2: # %bb2 ; CHECK-NEXT: movl $2, %eax ; CHECK-NEXT: retl +; CHECK-NEXT: .LBB2_1: # %bb1 +; CHECK-NEXT: movl $1, %eax +; CHECK-NEXT: retl entry: %cmp = icmp ule i64 %a, %b br i1 %cmp, label %bb1, label %bb2