diff --git a/llvm/include/llvm/CodeGen/TargetLowering.h b/llvm/include/llvm/CodeGen/TargetLowering.h --- a/llvm/include/llvm/CodeGen/TargetLowering.h +++ b/llvm/include/llvm/CodeGen/TargetLowering.h @@ -472,6 +472,10 @@ return false; } + /// Return true if instruction generated for equality comparison is folded + /// with instruction generated for signed comparison. + virtual bool isEqualityCmpFoldedWithSignedCmp() const { return true; } + /// Return true if it is safe to transform an integer-domain bitwise operation /// into the equivalent floating-point operation. This should be set to true /// if the target has IEEE-754-compliant fabs/fneg operations for the input diff --git a/llvm/lib/CodeGen/CodeGenPrepare.cpp b/llvm/lib/CodeGen/CodeGenPrepare.cpp --- a/llvm/lib/CodeGen/CodeGenPrepare.cpp +++ b/llvm/lib/CodeGen/CodeGenPrepare.cpp @@ -222,6 +222,10 @@ cl::init(true), cl::desc("Enable splitting large offset of GEP.")); +static cl::opt EnableICMP_EQToICMP_ST( + "cgp-icmp-eq2icmp-st", cl::Hidden, cl::init(false), + cl::desc("Enable ICMP_EQ to ICMP_S(L|G)T conversion.")); + namespace { enum ExtType { @@ -1408,6 +1412,93 @@ return MadeChange; } +/// For pattern like: +/// +/// DomCond = icmp sgt/slt CmpOp0, CmpOp1 (might not be in DomBB) +/// ... +/// DomBB: +/// ... +/// br DomCond, TrueBB, CmpBB +/// CmpBB: (with DomBB being the single predecessor) +/// ... +/// Cmp = icmp eq CmpOp0, CmpOp1 +/// ... +/// +/// It would use two comparison on targets that lowering of icmp sgt/slt is +/// different from lowering of icmp eq (PowerPC). This function try to convert +/// 'Cmp = icmp eq CmpOp0, CmpOp1' to ' Cmp = icmp slt/sgt CmpOp0, CmpOp1'. +/// After that, DomCond and Cmp can use the same comparison so reduce one +/// comparison. +/// +/// Return true if any changes are made. +static bool foldICmpWithDominatingICmp(CmpInst *Cmp, + const TargetLowering &TLI) { + if (!EnableICMP_EQToICMP_ST && TLI.isEqualityCmpFoldedWithSignedCmp()) + return false; + + ICmpInst::Predicate Pred = Cmp->getPredicate(); + if (Pred != ICmpInst::ICMP_EQ) + return false; + + // If icmp eq has users other than BranchInst and SelectInst, converting it to + // icmp slt/sgt would introduce more redundant LLVM IR. + for (User *U : Cmp->users()) { + if (isa(U)) + continue; + if (isa(U) && cast(U)->getCondition() == Cmp) + continue; + return false; + } + + // This is a cheap/incomplete check for dominance - just match a single + // predecessor with a conditional branch. + BasicBlock *CmpBB = Cmp->getParent(); + BasicBlock *DomBB = CmpBB->getSinglePredecessor(); + if (!DomBB) + return false; + + // We want to ensure that the only way control gets to the comparison of + // interest is that a less/greater than comparison on the same operands is + // false. + Value *DomCond; + BasicBlock *TrueBB, *FalseBB; + if (!match(DomBB->getTerminator(), m_Br(m_Value(DomCond), TrueBB, FalseBB))) + return false; + if (CmpBB != FalseBB) + return false; + + Value *CmpOp0 = Cmp->getOperand(0), *CmpOp1 = Cmp->getOperand(1); + ICmpInst::Predicate DomPred; + if (!match(DomCond, m_ICmp(DomPred, m_Specific(CmpOp0), m_Specific(CmpOp1)))) + return false; + if (DomPred != ICmpInst::ICMP_SGT && DomPred != ICmpInst::ICMP_SLT) + return false; + + // Convert the equality comparison to the opposite of the dominating + // comparison and swap the direction for all branch/select users. + // We have conceptually converted: + // Res = (a < b) ? : (a == b) ? : ; + // to + // Res = (a < b) ? : (a > b) ? : ; + // And similarly for branches. + for (User *U : Cmp->users()) { + if (auto *BI = dyn_cast(U)) { + assert(BI->isConditional() && "Must be conditional"); + BI->swapSuccessors(); + continue; + } + if (auto *SI = dyn_cast(U)) { + // Swap operands + SI->swapValues(); + SI->swapProfMetadata(); + continue; + } + llvm_unreachable("Must be a branch or a select"); + } + Cmp->setPredicate(CmpInst::getSwappedPredicate(DomPred)); + return true; +} + bool CodeGenPrepare::optimizeCmp(CmpInst *Cmp, bool &ModifiedDT) { if (sinkCmpExpression(Cmp, *TLI)) return true; @@ -1418,6 +1509,9 @@ if (combineToUSubWithOverflow(Cmp, ModifiedDT)) return true; + if (foldICmpWithDominatingICmp(Cmp, *TLI)) + return true; + return false; } diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.h b/llvm/lib/Target/PowerPC/PPCISelLowering.h --- a/llvm/lib/Target/PowerPC/PPCISelLowering.h +++ b/llvm/lib/Target/PowerPC/PPCISelLowering.h @@ -650,6 +650,10 @@ return true; } + bool isEqualityCmpFoldedWithSignedCmp() const override { + return false; + } + bool hasAndNotCompare(SDValue) const override { return true; } diff --git a/llvm/test/CodeGen/AArch64/use-cr-result-of-dom-icmp-st.ll b/llvm/test/CodeGen/AArch64/use-cr-result-of-dom-icmp-st.ll --- a/llvm/test/CodeGen/AArch64/use-cr-result-of-dom-icmp-st.ll +++ b/llvm/test/CodeGen/AArch64/use-cr-result-of-dom-icmp-st.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=aarch64-unknown-unknown -O3 -verify-machineinstrs < %s | FileCheck %s +; RUN: llc -mtriple=aarch64-unknown-unknown -O3 -cgp-icmp-eq2icmp-st -verify-machineinstrs < %s | FileCheck %s ; Test cases are generated from: ; long long NAME(PARAM a, PARAM b) { @@ -24,7 +24,7 @@ ; CHECK-NEXT: mov x0, x1 ; CHECK-NEXT: ret ; CHECK-NEXT: .LBB0_2: // %if.end -; CHECK-NEXT: csinc x8, x1, xzr, eq +; CHECK-NEXT: csinc x8, x1, xzr, ge ; CHECK-NEXT: mul x0, x8, x0 ; CHECK-NEXT: ret entry: @@ -52,7 +52,7 @@ ; CHECK-NEXT: ret ; CHECK-NEXT: .LBB1_2: // %if.end ; CHECK-NEXT: cmn x8, #1 // =1 -; CHECK-NEXT: csinc x8, x1, xzr, eq +; CHECK-NEXT: csinc x8, x1, xzr, ge ; CHECK-NEXT: mul x0, x8, x0 ; CHECK-NEXT: ret entry: @@ -80,7 +80,7 @@ ; CHECK-NEXT: mov x0, x1 ; CHECK-NEXT: ret ; CHECK-NEXT: .LBB2_2: // %if.end -; CHECK-NEXT: csinc x8, x1, xzr, eq +; CHECK-NEXT: csinc x8, x1, xzr, ge ; CHECK-NEXT: mul x0, x8, x0 ; CHECK-NEXT: ret entry: @@ -108,7 +108,7 @@ ; CHECK-NEXT: mov x0, x1 ; CHECK-NEXT: ret ; CHECK-NEXT: .LBB3_2: // %if.end -; CHECK-NEXT: csinc x8, x1, xzr, eq +; CHECK-NEXT: csinc x8, x1, xzr, ge ; CHECK-NEXT: mul x0, x8, x0 ; CHECK-NEXT: ret entry: @@ -136,7 +136,7 @@ ; CHECK-NEXT: mov x0, x1 ; CHECK-NEXT: ret ; CHECK-NEXT: .LBB4_2: // %if.end -; CHECK-NEXT: csinc x8, x1, xzr, eq +; CHECK-NEXT: csinc x8, x1, xzr, ge ; CHECK-NEXT: mul x0, x8, x0 ; CHECK-NEXT: ret entry: @@ -163,7 +163,7 @@ ; CHECK-NEXT: mov x0, x1 ; CHECK-NEXT: ret ; CHECK-NEXT: .LBB5_2: // %if.end -; CHECK-NEXT: csinc x8, x1, xzr, eq +; CHECK-NEXT: csinc x8, x1, xzr, ge ; CHECK-NEXT: mul x0, x8, x0 ; CHECK-NEXT: ret entry: @@ -189,7 +189,7 @@ ; CHECK-NEXT: ret ; CHECK-NEXT: .LBB6_2: // %if.end ; CHECK-NEXT: cmn x0, #1 // =1 -; CHECK-NEXT: csinc x8, x1, xzr, eq +; CHECK-NEXT: csinc x8, x1, xzr, ge ; CHECK-NEXT: mul x0, x8, x0 ; CHECK-NEXT: ret entry: @@ -215,7 +215,7 @@ ; CHECK-NEXT: mov x0, x1 ; CHECK-NEXT: ret ; CHECK-NEXT: .LBB7_2: // %if.end -; CHECK-NEXT: csinc x8, x1, xzr, eq +; CHECK-NEXT: csinc x8, x1, xzr, ge ; CHECK-NEXT: mul x0, x8, x0 ; CHECK-NEXT: ret entry: @@ -241,7 +241,7 @@ ; CHECK-NEXT: mov x0, x1 ; CHECK-NEXT: ret ; CHECK-NEXT: .LBB8_2: // %if.end -; CHECK-NEXT: csinc x8, x1, xzr, eq +; CHECK-NEXT: csinc x8, x1, xzr, ge ; CHECK-NEXT: mul x0, x8, x0 ; CHECK-NEXT: ret entry: @@ -267,7 +267,7 @@ ; CHECK-NEXT: mov x0, x1 ; CHECK-NEXT: ret ; CHECK-NEXT: .LBB9_2: // %if.end -; CHECK-NEXT: csinc x8, x1, xzr, eq +; CHECK-NEXT: csinc x8, x1, xzr, ge ; CHECK-NEXT: mul x0, x8, x0 ; CHECK-NEXT: ret entry: diff --git a/llvm/test/CodeGen/PowerPC/use-cr-result-of-dom-icmp-st.ll b/llvm/test/CodeGen/PowerPC/use-cr-result-of-dom-icmp-st.ll --- a/llvm/test/CodeGen/PowerPC/use-cr-result-of-dom-icmp-st.ll +++ b/llvm/test/CodeGen/PowerPC/use-cr-result-of-dom-icmp-st.ll @@ -25,7 +25,7 @@ ; CHECK-NEXT: blr ; CHECK-NEXT: .LBB0_2: # %if.end ; CHECK-NEXT: li r5, 1 -; CHECK-NEXT: isel r4, r4, r5, eq +; CHECK-NEXT: isel r4, r5, r4, lt ; CHECK-NEXT: mulld r3, r4, r3 ; CHECK-NEXT: blr entry: @@ -54,7 +54,7 @@ ; CHECK-NEXT: blr ; CHECK-NEXT: .LBB1_2: # %if.end ; CHECK-NEXT: li r5, 1 -; CHECK-NEXT: isel r4, r4, r5, eq +; CHECK-NEXT: isel r4, r5, r4, lt ; CHECK-NEXT: mulld r3, r4, r3 ; CHECK-NEXT: blr entry: @@ -81,9 +81,8 @@ ; CHECK-NEXT: mr r3, r4 ; CHECK-NEXT: blr ; CHECK-NEXT: .LBB2_2: # %if.end -; CHECK-NEXT: cmpldi r5, 0 ; CHECK-NEXT: li r5, 1 -; CHECK-NEXT: isel r4, r4, r5, eq +; CHECK-NEXT: isel r4, r5, r4, lt ; CHECK-NEXT: mulld r3, r4, r3 ; CHECK-NEXT: blr entry: @@ -111,9 +110,8 @@ ; CHECK-NEXT: mr r3, r4 ; CHECK-NEXT: blr ; CHECK-NEXT: .LBB3_2: # %if.end -; CHECK-NEXT: cmpldi r5, 1 ; CHECK-NEXT: li r5, 1 -; CHECK-NEXT: isel r4, r4, r5, eq +; CHECK-NEXT: isel r4, r5, r4, lt ; CHECK-NEXT: mulld r3, r4, r3 ; CHECK-NEXT: blr entry: @@ -141,9 +139,8 @@ ; CHECK-NEXT: mr r3, r4 ; CHECK-NEXT: blr ; CHECK-NEXT: .LBB4_2: # %if.end -; CHECK-NEXT: cmpldi r5, 2 ; CHECK-NEXT: li r5, 1 -; CHECK-NEXT: isel r4, r4, r5, eq +; CHECK-NEXT: isel r4, r5, r4, lt ; CHECK-NEXT: mulld r3, r4, r3 ; CHECK-NEXT: blr entry: @@ -171,7 +168,7 @@ ; CHECK-NEXT: blr ; CHECK-NEXT: .LBB5_2: # %if.end ; CHECK-NEXT: li r5, 1 -; CHECK-NEXT: isel r4, r4, r5, eq +; CHECK-NEXT: isel r4, r5, r4, lt ; CHECK-NEXT: mulld r3, r4, r3 ; CHECK-NEXT: blr entry: @@ -198,7 +195,7 @@ ; CHECK-NEXT: blr ; CHECK-NEXT: .LBB6_2: # %if.end ; CHECK-NEXT: li r5, 1 -; CHECK-NEXT: isel r4, r4, r5, eq +; CHECK-NEXT: isel r4, r5, r4, lt ; CHECK-NEXT: mulld r3, r4, r3 ; CHECK-NEXT: blr entry: @@ -224,9 +221,8 @@ ; CHECK-NEXT: mr r3, r4 ; CHECK-NEXT: blr ; CHECK-NEXT: .LBB7_2: # %if.end -; CHECK-NEXT: cmpldi r3, 0 ; CHECK-NEXT: li r5, 1 -; CHECK-NEXT: isel r4, r4, r5, eq +; CHECK-NEXT: isel r4, r5, r4, lt ; CHECK-NEXT: mulld r3, r4, r3 ; CHECK-NEXT: blr entry: @@ -252,9 +248,8 @@ ; CHECK-NEXT: mr r3, r4 ; CHECK-NEXT: blr ; CHECK-NEXT: .LBB8_2: # %if.end -; CHECK-NEXT: cmpldi r3, 1 ; CHECK-NEXT: li r5, 1 -; CHECK-NEXT: isel r4, r4, r5, eq +; CHECK-NEXT: isel r4, r5, r4, lt ; CHECK-NEXT: mulld r3, r4, r3 ; CHECK-NEXT: blr entry: @@ -280,9 +275,8 @@ ; CHECK-NEXT: mr r3, r4 ; CHECK-NEXT: blr ; CHECK-NEXT: .LBB9_2: # %if.end -; CHECK-NEXT: cmpldi r3, 2 ; CHECK-NEXT: li r5, 1 -; CHECK-NEXT: isel r4, r4, r5, eq +; CHECK-NEXT: isel r4, r5, r4, lt ; CHECK-NEXT: mulld r3, r4, r3 ; CHECK-NEXT: blr entry: @@ -307,7 +301,7 @@ ; CHECK-NEXT: bgt cr0, .LBB10_2 ; CHECK-NEXT: # %bb.1: # %if.end ; CHECK-NEXT: li r5, 1 -; CHECK-NEXT: isel r4, r4, r5, eq +; CHECK-NEXT: isel r4, r5, r4, lt ; CHECK-NEXT: mullw r4, r4, r3 ; CHECK-NEXT: .LBB10_2: # %return ; CHECK-NEXT: extsw r3, r4 @@ -340,7 +334,7 @@ ; CHECK-NEXT: blr ; CHECK-NEXT: .LBB11_2: # %if.end ; CHECK-NEXT: li r5, 1 -; CHECK-NEXT: isel r4, r4, r5, eq +; CHECK-NEXT: isel r4, r5, r4, lt ; CHECK-NEXT: mullw r4, r4, r3 ; CHECK-NEXT: extsw r3, r4 ; CHECK-NEXT: blr @@ -371,9 +365,8 @@ ; CHECK-NEXT: extsw r3, r4 ; CHECK-NEXT: blr ; CHECK-NEXT: .LBB12_2: # %if.end -; CHECK-NEXT: cmplwi r5, 0 ; CHECK-NEXT: li r5, 1 -; CHECK-NEXT: isel r4, r4, r5, eq +; CHECK-NEXT: isel r4, r5, r4, lt ; CHECK-NEXT: mullw r4, r4, r3 ; CHECK-NEXT: extsw r3, r4 ; CHECK-NEXT: blr @@ -401,9 +394,8 @@ ; CHECK-NEXT: cmpwi r5, 1 ; CHECK-NEXT: bgt cr0, .LBB13_2 ; CHECK-NEXT: # %bb.1: # %if.end -; CHECK-NEXT: cmplwi r5, 1 ; CHECK-NEXT: li r5, 1 -; CHECK-NEXT: isel r4, r4, r5, eq +; CHECK-NEXT: isel r4, r5, r4, lt ; CHECK-NEXT: mullw r4, r4, r3 ; CHECK-NEXT: .LBB13_2: # %return ; CHECK-NEXT: extsw r3, r4 @@ -432,9 +424,8 @@ ; CHECK-NEXT: cmpwi r5, 2 ; CHECK-NEXT: bgt cr0, .LBB14_2 ; CHECK-NEXT: # %bb.1: # %if.end -; CHECK-NEXT: cmplwi r5, 2 ; CHECK-NEXT: li r5, 1 -; CHECK-NEXT: isel r4, r4, r5, eq +; CHECK-NEXT: isel r4, r5, r4, lt ; CHECK-NEXT: mullw r4, r4, r3 ; CHECK-NEXT: .LBB14_2: # %return ; CHECK-NEXT: extsw r3, r4 @@ -463,7 +454,7 @@ ; CHECK-NEXT: bgt cr0, .LBB15_2 ; CHECK-NEXT: # %bb.1: # %if.end ; CHECK-NEXT: li r5, 1 -; CHECK-NEXT: isel r4, r4, r5, eq +; CHECK-NEXT: isel r4, r5, r4, lt ; CHECK-NEXT: mullw r4, r4, r3 ; CHECK-NEXT: .LBB15_2: # %return ; CHECK-NEXT: extsw r3, r4 @@ -494,7 +485,7 @@ ; CHECK-NEXT: blr ; CHECK-NEXT: .LBB16_2: # %if.end ; CHECK-NEXT: li r5, 1 -; CHECK-NEXT: isel r4, r4, r5, eq +; CHECK-NEXT: isel r4, r5, r4, lt ; CHECK-NEXT: mullw r4, r4, r3 ; CHECK-NEXT: extsw r3, r4 ; CHECK-NEXT: blr @@ -523,9 +514,8 @@ ; CHECK-NEXT: extsw r3, r4 ; CHECK-NEXT: blr ; CHECK-NEXT: .LBB17_2: # %if.end -; CHECK-NEXT: cmplwi r3, 0 ; CHECK-NEXT: li r5, 1 -; CHECK-NEXT: isel r4, r4, r5, eq +; CHECK-NEXT: isel r4, r5, r4, lt ; CHECK-NEXT: mullw r4, r4, r3 ; CHECK-NEXT: extsw r3, r4 ; CHECK-NEXT: blr @@ -551,9 +541,8 @@ ; CHECK-NEXT: cmpwi r3, 1 ; CHECK-NEXT: bgt cr0, .LBB18_2 ; CHECK-NEXT: # %bb.1: # %if.end -; CHECK-NEXT: cmplwi r3, 1 ; CHECK-NEXT: li r5, 1 -; CHECK-NEXT: isel r4, r4, r5, eq +; CHECK-NEXT: isel r4, r5, r4, lt ; CHECK-NEXT: mullw r4, r4, r3 ; CHECK-NEXT: .LBB18_2: # %return ; CHECK-NEXT: extsw r3, r4 @@ -580,9 +569,8 @@ ; CHECK-NEXT: cmpwi r3, 2 ; CHECK-NEXT: bgt cr0, .LBB19_2 ; CHECK-NEXT: # %bb.1: # %if.end -; CHECK-NEXT: cmplwi r3, 2 ; CHECK-NEXT: li r5, 1 -; CHECK-NEXT: isel r4, r4, r5, eq +; CHECK-NEXT: isel r4, r5, r4, lt ; CHECK-NEXT: mullw r4, r4, r3 ; CHECK-NEXT: .LBB19_2: # %return ; CHECK-NEXT: extsw r3, r4 diff --git a/llvm/test/CodeGen/X86/use-cr-result-of-dom-icmp-st.ll b/llvm/test/CodeGen/X86/use-cr-result-of-dom-icmp-st.ll --- a/llvm/test/CodeGen/X86/use-cr-result-of-dom-icmp-st.ll +++ b/llvm/test/CodeGen/X86/use-cr-result-of-dom-icmp-st.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=x86_64-unknown-unknown -O3 -verify-machineinstrs < %s | FileCheck %s +; RUN: llc -mtriple=x86_64-unknown-unknown -O3 -cgp-icmp-eq2icmp-st -verify-machineinstrs < %s | FileCheck %s ; Test cases are generated from: ; long long NAME(PARAM a, PARAM b) { @@ -17,18 +17,17 @@ define i64 @ll_a_op_b__2(i64 %a, i64 %b) { ; CHECK-LABEL: ll_a_op_b__2: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: movq %rsi, %rcx -; CHECK-NEXT: movq %rdi, %rax -; CHECK-NEXT: shlq %cl, %rax -; CHECK-NEXT: cmpq $-2, %rax -; CHECK-NEXT: jle .LBB0_1 -; CHECK-NEXT: # %bb.2: # %return -; CHECK-NEXT: movq %rcx, %rax -; CHECK-NEXT: retq -; CHECK-NEXT: .LBB0_1: # %if.end -; CHECK-NEXT: movl $1, %eax -; CHECK-NEXT: cmoveq %rcx, %rax +; CHECK-NEXT: movq %rsi, %rax +; CHECK-NEXT: movq %rdi, %rdx +; CHECK-NEXT: movl %eax, %ecx +; CHECK-NEXT: shlq %cl, %rdx +; CHECK-NEXT: cmpq $-2, %rdx +; CHECK-NEXT: jg .LBB0_2 +; CHECK-NEXT: # %bb.1: # %if.end +; CHECK-NEXT: movl $1, %ecx +; CHECK-NEXT: cmovlq %rcx, %rax ; CHECK-NEXT: imulq %rdi, %rax +; CHECK-NEXT: .LBB0_2: # %return ; CHECK-NEXT: retq entry: %shl = shl i64 %a, %b @@ -48,18 +47,18 @@ define i64 @ll_a_op_b__1(i64 %a, i64 %b) { ; CHECK-LABEL: ll_a_op_b__1: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: movq %rsi, %rcx -; CHECK-NEXT: movq %rdi, %rax -; CHECK-NEXT: shlq %cl, %rax -; CHECK-NEXT: testq %rax, %rax +; CHECK-NEXT: movq %rsi, %rax +; CHECK-NEXT: movq %rdi, %rdx +; CHECK-NEXT: movl %eax, %ecx +; CHECK-NEXT: shlq %cl, %rdx +; CHECK-NEXT: testq %rdx, %rdx ; CHECK-NEXT: js .LBB1_1 ; CHECK-NEXT: # %bb.2: # %return -; CHECK-NEXT: movq %rcx, %rax ; CHECK-NEXT: retq ; CHECK-NEXT: .LBB1_1: # %if.end -; CHECK-NEXT: cmpq $-1, %rax -; CHECK-NEXT: movl $1, %eax -; CHECK-NEXT: cmoveq %rcx, %rax +; CHECK-NEXT: cmpq $-1, %rdx +; CHECK-NEXT: movl $1, %ecx +; CHECK-NEXT: cmovlq %rcx, %rax ; CHECK-NEXT: imulq %rdi, %rax ; CHECK-NEXT: retq entry: @@ -80,17 +79,17 @@ define i64 @ll_a_op_b_0(i64 %a, i64 %b) { ; CHECK-LABEL: ll_a_op_b_0: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: movq %rsi, %rcx -; CHECK-NEXT: movq %rdi, %rax -; CHECK-NEXT: shlq %cl, %rax -; CHECK-NEXT: testq %rax, %rax +; CHECK-NEXT: movq %rsi, %rax +; CHECK-NEXT: movq %rdi, %rdx +; CHECK-NEXT: movl %eax, %ecx +; CHECK-NEXT: shlq %cl, %rdx +; CHECK-NEXT: testq %rdx, %rdx ; CHECK-NEXT: jle .LBB2_1 ; CHECK-NEXT: # %bb.2: # %return -; CHECK-NEXT: movq %rcx, %rax ; CHECK-NEXT: retq ; CHECK-NEXT: .LBB2_1: # %if.end -; CHECK-NEXT: movl $1, %eax -; CHECK-NEXT: cmoveq %rcx, %rax +; CHECK-NEXT: movl $1, %ecx +; CHECK-NEXT: cmovsq %rcx, %rax ; CHECK-NEXT: imulq %rdi, %rax ; CHECK-NEXT: retq entry: @@ -111,18 +110,18 @@ define i64 @ll_a_op_b_1(i64 %a, i64 %b) { ; CHECK-LABEL: ll_a_op_b_1: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: movq %rsi, %rcx -; CHECK-NEXT: movq %rdi, %rax -; CHECK-NEXT: shlq %cl, %rax -; CHECK-NEXT: cmpq $1, %rax -; CHECK-NEXT: jle .LBB3_1 -; CHECK-NEXT: # %bb.2: # %return -; CHECK-NEXT: movq %rcx, %rax -; CHECK-NEXT: retq -; CHECK-NEXT: .LBB3_1: # %if.end -; CHECK-NEXT: movl $1, %eax -; CHECK-NEXT: cmoveq %rcx, %rax +; CHECK-NEXT: movq %rsi, %rax +; CHECK-NEXT: movq %rdi, %rdx +; CHECK-NEXT: movl %eax, %ecx +; CHECK-NEXT: shlq %cl, %rdx +; CHECK-NEXT: cmpq $1, %rdx +; CHECK-NEXT: jg .LBB3_2 +; CHECK-NEXT: # %bb.1: # %if.end +; CHECK-NEXT: testq %rdx, %rdx +; CHECK-NEXT: movl $1, %ecx +; CHECK-NEXT: cmovleq %rcx, %rax ; CHECK-NEXT: imulq %rdi, %rax +; CHECK-NEXT: .LBB3_2: # %return ; CHECK-NEXT: retq entry: %shl = shl i64 %a, %b @@ -142,18 +141,17 @@ define i64 @ll_a_op_b_2(i64 %a, i64 %b) { ; CHECK-LABEL: ll_a_op_b_2: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: movq %rsi, %rcx -; CHECK-NEXT: movq %rdi, %rax -; CHECK-NEXT: shlq %cl, %rax -; CHECK-NEXT: cmpq $2, %rax -; CHECK-NEXT: jle .LBB4_1 -; CHECK-NEXT: # %bb.2: # %return -; CHECK-NEXT: movq %rcx, %rax -; CHECK-NEXT: retq -; CHECK-NEXT: .LBB4_1: # %if.end -; CHECK-NEXT: movl $1, %eax -; CHECK-NEXT: cmoveq %rcx, %rax +; CHECK-NEXT: movq %rsi, %rax +; CHECK-NEXT: movq %rdi, %rdx +; CHECK-NEXT: movl %eax, %ecx +; CHECK-NEXT: shlq %cl, %rdx +; CHECK-NEXT: cmpq $2, %rdx +; CHECK-NEXT: jg .LBB4_2 +; CHECK-NEXT: # %bb.1: # %if.end +; CHECK-NEXT: movl $1, %ecx +; CHECK-NEXT: cmovlq %rcx, %rax ; CHECK-NEXT: imulq %rdi, %rax +; CHECK-NEXT: .LBB4_2: # %return ; CHECK-NEXT: retq entry: %shl = shl i64 %a, %b @@ -173,15 +171,14 @@ define i64 @ll_a__2(i64 %a, i64 %b) { ; CHECK-LABEL: ll_a__2: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: cmpq $-2, %rdi -; CHECK-NEXT: jle .LBB5_1 -; CHECK-NEXT: # %bb.2: # %return ; CHECK-NEXT: movq %rsi, %rax -; CHECK-NEXT: retq -; CHECK-NEXT: .LBB5_1: # %if.end -; CHECK-NEXT: movl $1, %eax -; CHECK-NEXT: cmoveq %rsi, %rax +; CHECK-NEXT: cmpq $-2, %rdi +; CHECK-NEXT: jg .LBB5_2 +; CHECK-NEXT: # %bb.1: # %if.end +; CHECK-NEXT: movl $1, %ecx +; CHECK-NEXT: cmovlq %rcx, %rax ; CHECK-NEXT: imulq %rdi, %rax +; CHECK-NEXT: .LBB5_2: # %return ; CHECK-NEXT: retq entry: %cmp = icmp sgt i64 %a, -2 @@ -200,15 +197,15 @@ define i64 @ll_a__1(i64 %a, i64 %b) { ; CHECK-LABEL: ll_a__1: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: movq %rsi, %rax ; CHECK-NEXT: testq %rdi, %rdi ; CHECK-NEXT: js .LBB6_1 ; CHECK-NEXT: # %bb.2: # %return -; CHECK-NEXT: movq %rsi, %rax ; CHECK-NEXT: retq ; CHECK-NEXT: .LBB6_1: # %if.end ; CHECK-NEXT: cmpq $-1, %rdi -; CHECK-NEXT: movl $1, %eax -; CHECK-NEXT: cmoveq %rsi, %rax +; CHECK-NEXT: movl $1, %ecx +; CHECK-NEXT: cmovlq %rcx, %rax ; CHECK-NEXT: imulq %rdi, %rax ; CHECK-NEXT: retq entry: @@ -228,14 +225,14 @@ define i64 @ll_a_0(i64 %a, i64 %b) { ; CHECK-LABEL: ll_a_0: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: movq %rsi, %rax ; CHECK-NEXT: testq %rdi, %rdi ; CHECK-NEXT: jle .LBB7_1 ; CHECK-NEXT: # %bb.2: # %return -; CHECK-NEXT: movq %rsi, %rax ; CHECK-NEXT: retq ; CHECK-NEXT: .LBB7_1: # %if.end -; CHECK-NEXT: movl $1, %eax -; CHECK-NEXT: cmoveq %rsi, %rax +; CHECK-NEXT: movl $1, %ecx +; CHECK-NEXT: cmovsq %rcx, %rax ; CHECK-NEXT: imulq %rdi, %rax ; CHECK-NEXT: retq entry: @@ -255,15 +252,15 @@ define i64 @ll_a_1(i64 %a, i64 %b) { ; CHECK-LABEL: ll_a_1: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: cmpq $1, %rdi -; CHECK-NEXT: jle .LBB8_1 -; CHECK-NEXT: # %bb.2: # %return ; CHECK-NEXT: movq %rsi, %rax -; CHECK-NEXT: retq -; CHECK-NEXT: .LBB8_1: # %if.end -; CHECK-NEXT: movl $1, %eax -; CHECK-NEXT: cmoveq %rsi, %rax +; CHECK-NEXT: cmpq $1, %rdi +; CHECK-NEXT: jg .LBB8_2 +; CHECK-NEXT: # %bb.1: # %if.end +; CHECK-NEXT: testq %rdi, %rdi +; CHECK-NEXT: movl $1, %ecx +; CHECK-NEXT: cmovleq %rcx, %rax ; CHECK-NEXT: imulq %rdi, %rax +; CHECK-NEXT: .LBB8_2: # %return ; CHECK-NEXT: retq entry: %cmp = icmp sgt i64 %a, 1 @@ -282,15 +279,14 @@ define i64 @ll_a_2(i64 %a, i64 %b) { ; CHECK-LABEL: ll_a_2: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: cmpq $2, %rdi -; CHECK-NEXT: jle .LBB9_1 -; CHECK-NEXT: # %bb.2: # %return ; CHECK-NEXT: movq %rsi, %rax -; CHECK-NEXT: retq -; CHECK-NEXT: .LBB9_1: # %if.end -; CHECK-NEXT: movl $1, %eax -; CHECK-NEXT: cmoveq %rsi, %rax +; CHECK-NEXT: cmpq $2, %rdi +; CHECK-NEXT: jg .LBB9_2 +; CHECK-NEXT: # %bb.1: # %if.end +; CHECK-NEXT: movl $1, %ecx +; CHECK-NEXT: cmovlq %rcx, %rax ; CHECK-NEXT: imulq %rdi, %rax +; CHECK-NEXT: .LBB9_2: # %return ; CHECK-NEXT: retq entry: %cmp = icmp sgt i64 %a, 2 @@ -316,9 +312,8 @@ ; CHECK-NEXT: jg .LBB10_2 ; CHECK-NEXT: # %bb.1: # %if.end ; CHECK-NEXT: movl $1, %eax -; CHECK-NEXT: cmovel %ecx, %eax -; CHECK-NEXT: imull %edi, %eax -; CHECK-NEXT: movl %eax, %ecx +; CHECK-NEXT: cmovll %eax, %ecx +; CHECK-NEXT: imull %edi, %ecx ; CHECK-NEXT: .LBB10_2: # %return ; CHECK-NEXT: movslq %ecx, %rax ; CHECK-NEXT: retq @@ -353,9 +348,8 @@ ; CHECK-NEXT: .LBB11_1: # %if.end ; CHECK-NEXT: cmpl $-1, %eax ; CHECK-NEXT: movl $1, %eax -; CHECK-NEXT: cmovel %ecx, %eax -; CHECK-NEXT: imull %edi, %eax -; CHECK-NEXT: movl %eax, %ecx +; CHECK-NEXT: cmovll %eax, %ecx +; CHECK-NEXT: imull %edi, %ecx ; CHECK-NEXT: movslq %ecx, %rax ; CHECK-NEXT: retq entry: @@ -388,9 +382,8 @@ ; CHECK-NEXT: retq ; CHECK-NEXT: .LBB12_1: # %if.end ; CHECK-NEXT: movl $1, %eax -; CHECK-NEXT: cmovel %ecx, %eax -; CHECK-NEXT: imull %edi, %eax -; CHECK-NEXT: movl %eax, %ecx +; CHECK-NEXT: cmovsl %eax, %ecx +; CHECK-NEXT: imull %edi, %ecx ; CHECK-NEXT: movslq %ecx, %rax ; CHECK-NEXT: retq entry: @@ -419,10 +412,10 @@ ; CHECK-NEXT: cmpl $1, %eax ; CHECK-NEXT: jg .LBB13_2 ; CHECK-NEXT: # %bb.1: # %if.end +; CHECK-NEXT: testl %eax, %eax ; CHECK-NEXT: movl $1, %eax -; CHECK-NEXT: cmovel %ecx, %eax -; CHECK-NEXT: imull %edi, %eax -; CHECK-NEXT: movl %eax, %ecx +; CHECK-NEXT: cmovlel %eax, %ecx +; CHECK-NEXT: imull %edi, %ecx ; CHECK-NEXT: .LBB13_2: # %return ; CHECK-NEXT: movslq %ecx, %rax ; CHECK-NEXT: retq @@ -453,9 +446,8 @@ ; CHECK-NEXT: jg .LBB14_2 ; CHECK-NEXT: # %bb.1: # %if.end ; CHECK-NEXT: movl $1, %eax -; CHECK-NEXT: cmovel %ecx, %eax -; CHECK-NEXT: imull %edi, %eax -; CHECK-NEXT: movl %eax, %ecx +; CHECK-NEXT: cmovll %eax, %ecx +; CHECK-NEXT: imull %edi, %ecx ; CHECK-NEXT: .LBB14_2: # %return ; CHECK-NEXT: movslq %ecx, %rax ; CHECK-NEXT: retq @@ -483,9 +475,8 @@ ; CHECK-NEXT: jg .LBB15_2 ; CHECK-NEXT: # %bb.1: # %if.end ; CHECK-NEXT: movl $1, %eax -; CHECK-NEXT: cmovel %esi, %eax -; CHECK-NEXT: imull %edi, %eax -; CHECK-NEXT: movl %eax, %esi +; CHECK-NEXT: cmovll %eax, %esi +; CHECK-NEXT: imull %edi, %esi ; CHECK-NEXT: .LBB15_2: # %return ; CHECK-NEXT: movslq %esi, %rax ; CHECK-NEXT: retq @@ -516,9 +507,8 @@ ; CHECK-NEXT: .LBB16_1: # %if.end ; CHECK-NEXT: cmpl $-1, %edi ; CHECK-NEXT: movl $1, %eax -; CHECK-NEXT: cmovel %esi, %eax -; CHECK-NEXT: imull %edi, %eax -; CHECK-NEXT: movl %eax, %esi +; CHECK-NEXT: cmovll %eax, %esi +; CHECK-NEXT: imull %edi, %esi ; CHECK-NEXT: movslq %esi, %rax ; CHECK-NEXT: retq entry: @@ -547,9 +537,8 @@ ; CHECK-NEXT: retq ; CHECK-NEXT: .LBB17_1: # %if.end ; CHECK-NEXT: movl $1, %eax -; CHECK-NEXT: cmovel %esi, %eax -; CHECK-NEXT: imull %edi, %eax -; CHECK-NEXT: movl %eax, %esi +; CHECK-NEXT: cmovsl %eax, %esi +; CHECK-NEXT: imull %edi, %esi ; CHECK-NEXT: movslq %esi, %rax ; CHECK-NEXT: retq entry: @@ -574,10 +563,10 @@ ; CHECK-NEXT: cmpl $1, %edi ; CHECK-NEXT: jg .LBB18_2 ; CHECK-NEXT: # %bb.1: # %if.end +; CHECK-NEXT: testl %edi, %edi ; CHECK-NEXT: movl $1, %eax -; CHECK-NEXT: cmovel %esi, %eax -; CHECK-NEXT: imull %edi, %eax -; CHECK-NEXT: movl %eax, %esi +; CHECK-NEXT: cmovlel %eax, %esi +; CHECK-NEXT: imull %edi, %esi ; CHECK-NEXT: .LBB18_2: # %return ; CHECK-NEXT: movslq %esi, %rax ; CHECK-NEXT: retq @@ -604,9 +593,8 @@ ; CHECK-NEXT: jg .LBB19_2 ; CHECK-NEXT: # %bb.1: # %if.end ; CHECK-NEXT: movl $1, %eax -; CHECK-NEXT: cmovel %esi, %eax -; CHECK-NEXT: imull %edi, %eax -; CHECK-NEXT: movl %eax, %esi +; CHECK-NEXT: cmovll %eax, %esi +; CHECK-NEXT: imull %edi, %esi ; CHECK-NEXT: .LBB19_2: # %return ; CHECK-NEXT: movslq %esi, %rax ; CHECK-NEXT: retq