Index: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp =================================================================== --- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp +++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp @@ -33916,21 +33916,6 @@ return SDValue(); } -/// Materialize "setb reg" as "sbb reg,reg", since it produces an all-ones bit -/// which is more useful than 0/1 in some cases. -static SDValue materializeSBB(SDNode *N, SDValue EFLAGS, SelectionDAG &DAG) { - SDLoc DL(N); - // "Condition code B" is also known as "the carry flag" (CF). - SDValue CF = DAG.getConstant(X86::COND_B, DL, MVT::i8); - SDValue SBB = DAG.getNode(X86ISD::SETCC_CARRY, DL, MVT::i8, CF, EFLAGS); - MVT VT = N->getSimpleValueType(0); - if (VT == MVT::i8) - return DAG.getNode(ISD::AND, DL, VT, SBB, DAG.getConstant(1, DL, VT)); - - assert(VT == MVT::i1 && "Unexpected type for SETCC node"); - return DAG.getNode(ISD::TRUNCATE, DL, MVT::i1, SBB); -} - // Optimize RES = X86ISD::SETCC CONDCODE, EFLAG_INPUT static SDValue combineX86SetCC(SDNode *N, SelectionDAG &DAG, const X86Subtarget &Subtarget) { @@ -33938,27 +33923,6 @@ X86::CondCode CC = X86::CondCode(N->getConstantOperandVal(0)); SDValue EFLAGS = N->getOperand(1); - if (CC == X86::COND_A) { - // Try to convert COND_A into COND_B in an attempt to facilitate - // materializing "setb reg". - // - // Do not flip "e > c", where "c" is a constant, because Cmp instruction - // cannot take an immediate as its first operand. - // - if (EFLAGS.getOpcode() == X86ISD::SUB && EFLAGS.hasOneUse() && - EFLAGS.getValueType().isInteger() && - !isa(EFLAGS.getOperand(1))) { - SDValue NewSub = DAG.getNode(X86ISD::SUB, SDLoc(EFLAGS), - EFLAGS.getNode()->getVTList(), - EFLAGS.getOperand(1), EFLAGS.getOperand(0)); - SDValue NewEFLAGS = SDValue(NewSub.getNode(), EFLAGS.getResNo()); - return materializeSBB(N, NewEFLAGS, DAG); - } - } - - if (CC == X86::COND_B) - return materializeSBB(N, EFLAGS, DAG); - // Try to simplify the EFLAGS and condition code operands. if (SDValue Flags = combineSetCCEFLAGS(EFLAGS, CC, DAG)) return getSETCC(CC, Flags, DL, DAG); @@ -34153,6 +34117,21 @@ return SDValue(); } +/// Materialize "setb reg" as "sbb reg,reg", since it produces an all-ones bit +/// which is more useful than 0/1 in some cases. +static SDValue materializeSBB(SDNode *N, SDValue EFLAGS, SelectionDAG &DAG) { + SDLoc DL(N); + // "Condition code B" is also known as "the carry flag" (CF). + SDValue CF = DAG.getConstant(X86::COND_B, DL, MVT::i8); + SDValue SBB = DAG.getNode(X86ISD::SETCC_CARRY, DL, MVT::i8, CF, EFLAGS); + MVT VT = N->getSimpleValueType(0); + if (VT == MVT::i8) + return DAG.getNode(ISD::AND, DL, VT, SBB, DAG.getConstant(1, DL, VT)); + + assert(VT == MVT::i1 && "Unexpected type for SETCC node"); + return DAG.getNode(ISD::TRUNCATE, DL, MVT::i1, SBB); +} + /// If this is an add or subtract where one operand is produced by a cmp+setcc, /// then try to convert it to an ADC or SBB. This replaces TEST+SET+{ADD/SUB} /// with CMP+{ADC, SBB}. @@ -34180,7 +34159,42 @@ if (Y.getOpcode() != X86ISD::SETCC || !Y.hasOneUse()) return SDValue(); + SDLoc DL(N); + EVT VT = N->getValueType(0); X86::CondCode CC = (X86::CondCode)Y.getConstantOperandVal(0); + + if (CC == X86::COND_B) { + // X + SETB Z --> X + (mask SBB Z, Z) + // X - SETB Z --> X - (mask SBB Z, Z) + // TODO: Produce ADC/SBB here directly and avoid SETCC_CARRY? + SDValue SBB = materializeSBB(Y.getNode(), Y.getOperand(1), DAG); + if (SBB.getValueSizeInBits() != VT.getSizeInBits()) + SBB = DAG.getZExtOrTrunc(SBB, DL, VT); + return DAG.getNode(IsSub ? ISD::SUB : ISD::ADD, DL, VT, X, SBB); + } + + if (CC == X86::COND_A) { + SDValue EFLAGS = Y->getOperand(1); + // Try to convert COND_A into COND_B in an attempt to facilitate + // materializing "setb reg". + // + // Do not flip "e > c", where "c" is a constant, because Cmp instruction + // cannot take an immediate as its first operand. + // + if (EFLAGS.getOpcode() == X86ISD::SUB && EFLAGS.hasOneUse() && + EFLAGS.getValueType().isInteger() && + !isa(EFLAGS.getOperand(1))) { + SDValue NewSub = DAG.getNode(X86ISD::SUB, SDLoc(EFLAGS), + EFLAGS.getNode()->getVTList(), + EFLAGS.getOperand(1), EFLAGS.getOperand(0)); + SDValue NewEFLAGS = SDValue(NewSub.getNode(), EFLAGS.getResNo()); + SDValue SBB = materializeSBB(Y.getNode(), NewEFLAGS, DAG); + if (SBB.getValueSizeInBits() != VT.getSizeInBits()) + SBB = DAG.getZExtOrTrunc(SBB, DL, VT); + return DAG.getNode(IsSub ? ISD::SUB : ISD::ADD, DL, VT, X, SBB); + } + } + if (CC != X86::COND_E && CC != X86::COND_NE) return SDValue(); @@ -34190,9 +34204,6 @@ !Cmp.getOperand(0).getValueType().isInteger()) return SDValue(); - SDLoc DL(N); - EVT VT = N->getValueType(0); - // (cmp Z, 1) sets the carry flag if Z is 0. SDValue Z = Cmp.getOperand(0); SDValue NewCmp = DAG.getNode(X86ISD::CMP, DL, MVT::i32, Z, Index: llvm/trunk/test/CodeGen/X86/add-of-carry.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/add-of-carry.ll +++ llvm/trunk/test/CodeGen/X86/add-of-carry.ll @@ -1,6 +1,9 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc < %s -mtriple=i686-unknown-unknown | FileCheck %s +; These tests use adc/sbb in place of set+add/sub. Should this transform +; be enabled by micro-architecture rather than as part of generic lowering/isel? + ; define i32 @test1(i32 %sum, i32 %x) nounwind readnone ssp { Index: llvm/trunk/test/CodeGen/X86/avx-intrinsics-fast-isel.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/avx-intrinsics-fast-isel.ll +++ llvm/trunk/test/CodeGen/X86/avx-intrinsics-fast-isel.ll @@ -3310,16 +3310,16 @@ define i32 @test_mm_testc_pd(<2 x double> %a0, <2 x double> %a1) nounwind { ; X32-LABEL: test_mm_testc_pd: ; X32: # BB#0: +; X32-NEXT: xorl %eax, %eax ; X32-NEXT: vtestpd %xmm1, %xmm0 -; X32-NEXT: sbbl %eax, %eax -; X32-NEXT: andl $1, %eax +; X32-NEXT: setb %al ; X32-NEXT: retl ; ; X64-LABEL: test_mm_testc_pd: ; X64: # BB#0: +; X64-NEXT: xorl %eax, %eax ; X64-NEXT: vtestpd %xmm1, %xmm0 -; X64-NEXT: sbbl %eax, %eax -; X64-NEXT: andl $1, %eax +; X64-NEXT: setb %al ; X64-NEXT: retq %res = call i32 @llvm.x86.avx.vtestc.pd(<2 x double> %a0, <2 x double> %a1) ret i32 %res @@ -3329,17 +3329,17 @@ define i32 @test_mm256_testc_pd(<4 x double> %a0, <4 x double> %a1) nounwind { ; X32-LABEL: test_mm256_testc_pd: ; X32: # BB#0: +; X32-NEXT: xorl %eax, %eax ; X32-NEXT: vtestpd %ymm1, %ymm0 -; X32-NEXT: sbbl %eax, %eax -; X32-NEXT: andl $1, %eax +; X32-NEXT: setb %al ; X32-NEXT: vzeroupper ; X32-NEXT: retl ; ; X64-LABEL: test_mm256_testc_pd: ; X64: # BB#0: +; X64-NEXT: xorl %eax, %eax ; X64-NEXT: vtestpd %ymm1, %ymm0 -; X64-NEXT: sbbl %eax, %eax -; X64-NEXT: andl $1, %eax +; X64-NEXT: setb %al ; X64-NEXT: vzeroupper ; X64-NEXT: retq %res = call i32 @llvm.x86.avx.vtestc.pd.256(<4 x double> %a0, <4 x double> %a1) @@ -3350,16 +3350,16 @@ define i32 @test_mm_testc_ps(<4 x float> %a0, <4 x float> %a1) nounwind { ; X32-LABEL: test_mm_testc_ps: ; X32: # BB#0: +; X32-NEXT: xorl %eax, %eax ; X32-NEXT: vtestps %xmm1, %xmm0 -; X32-NEXT: sbbl %eax, %eax -; X32-NEXT: andl $1, %eax +; X32-NEXT: setb %al ; X32-NEXT: retl ; ; X64-LABEL: test_mm_testc_ps: ; X64: # BB#0: +; X64-NEXT: xorl %eax, %eax ; X64-NEXT: vtestps %xmm1, %xmm0 -; X64-NEXT: sbbl %eax, %eax -; X64-NEXT: andl $1, %eax +; X64-NEXT: setb %al ; X64-NEXT: retq %res = call i32 @llvm.x86.avx.vtestc.ps(<4 x float> %a0, <4 x float> %a1) ret i32 %res @@ -3369,17 +3369,17 @@ define i32 @test_mm256_testc_ps(<8 x float> %a0, <8 x float> %a1) nounwind { ; X32-LABEL: test_mm256_testc_ps: ; X32: # BB#0: +; X32-NEXT: xorl %eax, %eax ; X32-NEXT: vtestps %ymm1, %ymm0 -; X32-NEXT: sbbl %eax, %eax -; X32-NEXT: andl $1, %eax +; X32-NEXT: setb %al ; X32-NEXT: vzeroupper ; X32-NEXT: retl ; ; X64-LABEL: test_mm256_testc_ps: ; X64: # BB#0: +; X64-NEXT: xorl %eax, %eax ; X64-NEXT: vtestps %ymm1, %ymm0 -; X64-NEXT: sbbl %eax, %eax -; X64-NEXT: andl $1, %eax +; X64-NEXT: setb %al ; X64-NEXT: vzeroupper ; X64-NEXT: retq %res = call i32 @llvm.x86.avx.vtestc.ps.256(<8 x float> %a0, <8 x float> %a1) @@ -3390,17 +3390,17 @@ define i32 @test_mm256_testc_si256(<4 x i64> %a0, <4 x i64> %a1) nounwind { ; X32-LABEL: test_mm256_testc_si256: ; X32: # BB#0: +; X32-NEXT: xorl %eax, %eax ; X32-NEXT: vptest %ymm1, %ymm0 -; X32-NEXT: sbbl %eax, %eax -; X32-NEXT: andl $1, %eax +; X32-NEXT: setb %al ; X32-NEXT: vzeroupper ; X32-NEXT: retl ; ; X64-LABEL: test_mm256_testc_si256: ; X64: # BB#0: +; X64-NEXT: xorl %eax, %eax ; X64-NEXT: vptest %ymm1, %ymm0 -; X64-NEXT: sbbl %eax, %eax -; X64-NEXT: andl $1, %eax +; X64-NEXT: setb %al ; X64-NEXT: vzeroupper ; X64-NEXT: retq %res = call i32 @llvm.x86.avx.ptestc.256(<4 x i64> %a0, <4 x i64> %a1) Index: llvm/trunk/test/CodeGen/X86/avx-intrinsics-x86.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/avx-intrinsics-x86.ll +++ llvm/trunk/test/CodeGen/X86/avx-intrinsics-x86.ll @@ -497,9 +497,9 @@ define i32 @test_x86_avx_ptestc_256(<4 x i64> %a0, <4 x i64> %a1) { ; CHECK-LABEL: test_x86_avx_ptestc_256: ; CHECK: ## BB#0: +; CHECK-NEXT: xorl %eax, %eax ## encoding: [0x31,0xc0] ; CHECK-NEXT: vptest %ymm1, %ymm0 ## encoding: [0xc4,0xe2,0x7d,0x17,0xc1] -; CHECK-NEXT: sbbl %eax, %eax ## encoding: [0x19,0xc0] -; CHECK-NEXT: andl $1, %eax ## encoding: [0x83,0xe0,0x01] +; CHECK-NEXT: setb %al ## encoding: [0x0f,0x92,0xc0] ; CHECK-NEXT: vzeroupper ## encoding: [0xc5,0xf8,0x77] ; CHECK-NEXT: retl ## encoding: [0xc3] %res = call i32 @llvm.x86.avx.ptestc.256(<4 x i64> %a0, <4 x i64> %a1) ; [#uses=1] @@ -746,9 +746,9 @@ define i32 @test_x86_avx_vtestc_pd(<2 x double> %a0, <2 x double> %a1) { ; CHECK-LABEL: test_x86_avx_vtestc_pd: ; CHECK: ## BB#0: +; CHECK-NEXT: xorl %eax, %eax ## encoding: [0x31,0xc0] ; CHECK-NEXT: vtestpd %xmm1, %xmm0 ## encoding: [0xc4,0xe2,0x79,0x0f,0xc1] -; CHECK-NEXT: sbbl %eax, %eax ## encoding: [0x19,0xc0] -; CHECK-NEXT: andl $1, %eax ## encoding: [0x83,0xe0,0x01] +; CHECK-NEXT: setb %al ## encoding: [0x0f,0x92,0xc0] ; CHECK-NEXT: retl ## encoding: [0xc3] %res = call i32 @llvm.x86.avx.vtestc.pd(<2 x double> %a0, <2 x double> %a1) ; [#uses=1] ret i32 %res @@ -759,9 +759,9 @@ define i32 @test_x86_avx_vtestc_pd_256(<4 x double> %a0, <4 x double> %a1) { ; CHECK-LABEL: test_x86_avx_vtestc_pd_256: ; CHECK: ## BB#0: +; CHECK-NEXT: xorl %eax, %eax ## encoding: [0x31,0xc0] ; CHECK-NEXT: vtestpd %ymm1, %ymm0 ## encoding: [0xc4,0xe2,0x7d,0x0f,0xc1] -; CHECK-NEXT: sbbl %eax, %eax ## encoding: [0x19,0xc0] -; CHECK-NEXT: andl $1, %eax ## encoding: [0x83,0xe0,0x01] +; CHECK-NEXT: setb %al ## encoding: [0x0f,0x92,0xc0] ; CHECK-NEXT: vzeroupper ## encoding: [0xc5,0xf8,0x77] ; CHECK-NEXT: retl ## encoding: [0xc3] %res = call i32 @llvm.x86.avx.vtestc.pd.256(<4 x double> %a0, <4 x double> %a1) ; [#uses=1] @@ -773,9 +773,9 @@ define i32 @test_x86_avx_vtestc_ps(<4 x float> %a0, <4 x float> %a1) { ; CHECK-LABEL: test_x86_avx_vtestc_ps: ; CHECK: ## BB#0: +; CHECK-NEXT: xorl %eax, %eax ## encoding: [0x31,0xc0] ; CHECK-NEXT: vtestps %xmm1, %xmm0 ## encoding: [0xc4,0xe2,0x79,0x0e,0xc1] -; CHECK-NEXT: sbbl %eax, %eax ## encoding: [0x19,0xc0] -; CHECK-NEXT: andl $1, %eax ## encoding: [0x83,0xe0,0x01] +; CHECK-NEXT: setb %al ## encoding: [0x0f,0x92,0xc0] ; CHECK-NEXT: retl ## encoding: [0xc3] %res = call i32 @llvm.x86.avx.vtestc.ps(<4 x float> %a0, <4 x float> %a1) ; [#uses=1] ret i32 %res @@ -786,9 +786,9 @@ define i32 @test_x86_avx_vtestc_ps_256(<8 x float> %a0, <8 x float> %a1) { ; CHECK-LABEL: test_x86_avx_vtestc_ps_256: ; CHECK: ## BB#0: +; CHECK-NEXT: xorl %eax, %eax ## encoding: [0x31,0xc0] ; CHECK-NEXT: vtestps %ymm1, %ymm0 ## encoding: [0xc4,0xe2,0x7d,0x0e,0xc1] -; CHECK-NEXT: sbbl %eax, %eax ## encoding: [0x19,0xc0] -; CHECK-NEXT: andl $1, %eax ## encoding: [0x83,0xe0,0x01] +; CHECK-NEXT: setb %al ## encoding: [0x0f,0x92,0xc0] ; CHECK-NEXT: vzeroupper ## encoding: [0xc5,0xf8,0x77] ; CHECK-NEXT: retl ## encoding: [0xc3] %res = call i32 @llvm.x86.avx.vtestc.ps.256(<8 x float> %a0, <8 x float> %a1) ; [#uses=1] Index: llvm/trunk/test/CodeGen/X86/avx512-insert-extract.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/avx512-insert-extract.ll +++ llvm/trunk/test/CodeGen/X86/avx512-insert-extract.ll @@ -334,7 +334,7 @@ ; KNL-LABEL: test13: ; KNL: ## BB#0: ; KNL-NEXT: cmpl %esi, %edi -; KNL-NEXT: sbbb %al, %al +; KNL-NEXT: setb %al ; KNL-NEXT: andl $1, %eax ; KNL-NEXT: kmovw %eax, %k0 ; KNL-NEXT: movw $-4, %ax @@ -348,7 +348,7 @@ ; SKX-LABEL: test13: ; SKX: ## BB#0: ; SKX-NEXT: cmpl %esi, %edi -; SKX-NEXT: sbbb %al, %al +; SKX-NEXT: setb %al ; SKX-NEXT: andl $1, %eax ; SKX-NEXT: kmovw %eax, %k0 ; SKX-NEXT: movw $-4, %ax @@ -1122,135 +1122,137 @@ ; KNL-NEXT: .cfi_def_cfa_register %rbp ; KNL-NEXT: andq $-32, %rsp ; KNL-NEXT: subq $32, %rsp +; KNL-NEXT: xorl %eax, %eax ; KNL-NEXT: cmpl %esi, %edi +; KNL-NEXT: setb %al ; KNL-NEXT: vpcmpltud %zmm3, %zmm1, %k0 ; KNL-NEXT: kshiftlw $14, %k0, %k1 ; KNL-NEXT: kshiftrw $15, %k1, %k1 -; KNL-NEXT: kmovw %k1, %eax +; KNL-NEXT: kmovw %k1, %ecx ; KNL-NEXT: kshiftlw $15, %k0, %k1 ; KNL-NEXT: kshiftrw $15, %k1, %k1 -; KNL-NEXT: kmovw %k1, %ecx -; KNL-NEXT: vmovd %ecx, %xmm1 -; KNL-NEXT: vpinsrb $1, %eax, %xmm1, %xmm1 +; KNL-NEXT: kmovw %k1, %edx +; KNL-NEXT: vmovd %edx, %xmm1 +; KNL-NEXT: vpinsrb $1, %ecx, %xmm1, %xmm1 ; KNL-NEXT: kshiftlw $13, %k0, %k1 ; KNL-NEXT: kshiftrw $15, %k1, %k1 -; KNL-NEXT: kmovw %k1, %eax -; KNL-NEXT: vpinsrb $2, %eax, %xmm1, %xmm1 +; KNL-NEXT: kmovw %k1, %ecx +; KNL-NEXT: vpinsrb $2, %ecx, %xmm1, %xmm1 ; KNL-NEXT: kshiftlw $12, %k0, %k1 ; KNL-NEXT: kshiftrw $15, %k1, %k1 -; KNL-NEXT: kmovw %k1, %eax -; KNL-NEXT: vpinsrb $3, %eax, %xmm1, %xmm1 +; KNL-NEXT: kmovw %k1, %ecx +; KNL-NEXT: vpinsrb $3, %ecx, %xmm1, %xmm1 ; KNL-NEXT: kshiftlw $11, %k0, %k1 ; KNL-NEXT: kshiftrw $15, %k1, %k1 -; KNL-NEXT: kmovw %k1, %eax -; KNL-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1 +; KNL-NEXT: kmovw %k1, %ecx +; KNL-NEXT: vpinsrb $4, %ecx, %xmm1, %xmm1 ; KNL-NEXT: kshiftlw $10, %k0, %k1 ; KNL-NEXT: kshiftrw $15, %k1, %k1 -; KNL-NEXT: kmovw %k1, %eax -; KNL-NEXT: vpinsrb $5, %eax, %xmm1, %xmm1 +; KNL-NEXT: kmovw %k1, %ecx +; KNL-NEXT: vpinsrb $5, %ecx, %xmm1, %xmm1 ; KNL-NEXT: kshiftlw $9, %k0, %k1 ; KNL-NEXT: kshiftrw $15, %k1, %k1 -; KNL-NEXT: kmovw %k1, %eax -; KNL-NEXT: vpinsrb $6, %eax, %xmm1, %xmm1 +; KNL-NEXT: kmovw %k1, %ecx +; KNL-NEXT: vpinsrb $6, %ecx, %xmm1, %xmm1 ; KNL-NEXT: kshiftlw $8, %k0, %k1 ; KNL-NEXT: kshiftrw $15, %k1, %k1 -; KNL-NEXT: kmovw %k1, %eax -; KNL-NEXT: vpinsrb $7, %eax, %xmm1, %xmm1 +; KNL-NEXT: kmovw %k1, %ecx +; KNL-NEXT: vpinsrb $7, %ecx, %xmm1, %xmm1 ; KNL-NEXT: kshiftlw $7, %k0, %k1 ; KNL-NEXT: kshiftrw $15, %k1, %k1 -; KNL-NEXT: kmovw %k1, %eax -; KNL-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1 +; KNL-NEXT: kmovw %k1, %ecx +; KNL-NEXT: vpinsrb $8, %ecx, %xmm1, %xmm1 ; KNL-NEXT: kshiftlw $6, %k0, %k1 ; KNL-NEXT: kshiftrw $15, %k1, %k1 -; KNL-NEXT: kmovw %k1, %eax -; KNL-NEXT: vpinsrb $9, %eax, %xmm1, %xmm1 +; KNL-NEXT: kmovw %k1, %ecx +; KNL-NEXT: vpinsrb $9, %ecx, %xmm1, %xmm1 ; KNL-NEXT: kshiftlw $5, %k0, %k1 ; KNL-NEXT: kshiftrw $15, %k1, %k1 -; KNL-NEXT: kmovw %k1, %eax -; KNL-NEXT: vpinsrb $10, %eax, %xmm1, %xmm1 +; KNL-NEXT: kmovw %k1, %ecx +; KNL-NEXT: vpinsrb $10, %ecx, %xmm1, %xmm1 ; KNL-NEXT: kshiftlw $4, %k0, %k1 ; KNL-NEXT: kshiftrw $15, %k1, %k1 -; KNL-NEXT: kmovw %k1, %eax -; KNL-NEXT: vpinsrb $11, %eax, %xmm1, %xmm1 +; KNL-NEXT: kmovw %k1, %ecx +; KNL-NEXT: vpinsrb $11, %ecx, %xmm1, %xmm1 ; KNL-NEXT: kshiftlw $3, %k0, %k1 ; KNL-NEXT: kshiftrw $15, %k1, %k1 -; KNL-NEXT: kmovw %k1, %eax -; KNL-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1 +; KNL-NEXT: kmovw %k1, %ecx +; KNL-NEXT: vpinsrb $12, %ecx, %xmm1, %xmm1 ; KNL-NEXT: kshiftlw $2, %k0, %k1 ; KNL-NEXT: kshiftrw $15, %k1, %k1 -; KNL-NEXT: kmovw %k1, %eax -; KNL-NEXT: vpinsrb $13, %eax, %xmm1, %xmm1 +; KNL-NEXT: kmovw %k1, %ecx +; KNL-NEXT: vpinsrb $13, %ecx, %xmm1, %xmm1 ; KNL-NEXT: kshiftlw $1, %k0, %k1 ; KNL-NEXT: kshiftrw $15, %k1, %k1 -; KNL-NEXT: kmovw %k1, %eax -; KNL-NEXT: vpinsrb $14, %eax, %xmm1, %xmm1 +; KNL-NEXT: kmovw %k1, %ecx +; KNL-NEXT: vpinsrb $14, %ecx, %xmm1, %xmm1 ; KNL-NEXT: kshiftrw $15, %k0, %k0 -; KNL-NEXT: kmovw %k0, %eax -; KNL-NEXT: vpinsrb $15, %eax, %xmm1, %xmm1 +; KNL-NEXT: kmovw %k0, %ecx +; KNL-NEXT: vpinsrb $15, %ecx, %xmm1, %xmm1 ; KNL-NEXT: vpcmpltud %zmm2, %zmm0, %k0 ; KNL-NEXT: kshiftlw $14, %k0, %k1 ; KNL-NEXT: kshiftrw $15, %k1, %k1 -; KNL-NEXT: kmovw %k1, %eax +; KNL-NEXT: kmovw %k1, %ecx ; KNL-NEXT: kshiftlw $15, %k0, %k1 ; KNL-NEXT: kshiftrw $15, %k1, %k1 -; KNL-NEXT: kmovw %k1, %ecx -; KNL-NEXT: vmovd %ecx, %xmm0 -; KNL-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0 +; KNL-NEXT: kmovw %k1, %edx +; KNL-NEXT: vmovd %edx, %xmm0 +; KNL-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0 ; KNL-NEXT: kshiftlw $13, %k0, %k1 ; KNL-NEXT: kshiftrw $15, %k1, %k1 -; KNL-NEXT: kmovw %k1, %eax -; KNL-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 +; KNL-NEXT: kmovw %k1, %ecx +; KNL-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 ; KNL-NEXT: kshiftlw $12, %k0, %k1 ; KNL-NEXT: kshiftrw $15, %k1, %k1 -; KNL-NEXT: kmovw %k1, %eax -; KNL-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 +; KNL-NEXT: kmovw %k1, %ecx +; KNL-NEXT: vpinsrb $3, %ecx, %xmm0, %xmm0 ; KNL-NEXT: kshiftlw $11, %k0, %k1 ; KNL-NEXT: kshiftrw $15, %k1, %k1 -; KNL-NEXT: kmovw %k1, %eax -; KNL-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0 +; KNL-NEXT: kmovw %k1, %ecx +; KNL-NEXT: vpinsrb $4, %ecx, %xmm0, %xmm0 ; KNL-NEXT: kshiftlw $10, %k0, %k1 ; KNL-NEXT: kshiftrw $15, %k1, %k1 -; KNL-NEXT: kmovw %k1, %eax -; KNL-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0 +; KNL-NEXT: kmovw %k1, %ecx +; KNL-NEXT: vpinsrb $5, %ecx, %xmm0, %xmm0 ; KNL-NEXT: kshiftlw $9, %k0, %k1 ; KNL-NEXT: kshiftrw $15, %k1, %k1 -; KNL-NEXT: kmovw %k1, %eax -; KNL-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0 +; KNL-NEXT: kmovw %k1, %ecx +; KNL-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0 ; KNL-NEXT: kshiftlw $8, %k0, %k1 ; KNL-NEXT: kshiftrw $15, %k1, %k1 -; KNL-NEXT: kmovw %k1, %eax -; KNL-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0 +; KNL-NEXT: kmovw %k1, %ecx +; KNL-NEXT: vpinsrb $7, %ecx, %xmm0, %xmm0 ; KNL-NEXT: kshiftlw $7, %k0, %k1 ; KNL-NEXT: kshiftrw $15, %k1, %k1 -; KNL-NEXT: kmovw %k1, %eax -; KNL-NEXT: vpinsrb $8, %eax, %xmm0, %xmm0 +; KNL-NEXT: kmovw %k1, %ecx +; KNL-NEXT: vpinsrb $8, %ecx, %xmm0, %xmm0 ; KNL-NEXT: kshiftlw $6, %k0, %k1 ; KNL-NEXT: kshiftrw $15, %k1, %k1 -; KNL-NEXT: kmovw %k1, %eax -; KNL-NEXT: vpinsrb $9, %eax, %xmm0, %xmm0 +; KNL-NEXT: kmovw %k1, %ecx +; KNL-NEXT: vpinsrb $9, %ecx, %xmm0, %xmm0 ; KNL-NEXT: kshiftlw $5, %k0, %k1 ; KNL-NEXT: kshiftrw $15, %k1, %k1 -; KNL-NEXT: kmovw %k1, %eax -; KNL-NEXT: vpinsrb $10, %eax, %xmm0, %xmm0 +; KNL-NEXT: kmovw %k1, %ecx +; KNL-NEXT: vpinsrb $10, %ecx, %xmm0, %xmm0 ; KNL-NEXT: kshiftlw $4, %k0, %k1 ; KNL-NEXT: kshiftrw $15, %k1, %k1 -; KNL-NEXT: kmovw %k1, %eax -; KNL-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0 +; KNL-NEXT: kmovw %k1, %ecx +; KNL-NEXT: vpinsrb $11, %ecx, %xmm0, %xmm0 ; KNL-NEXT: kshiftlw $3, %k0, %k1 ; KNL-NEXT: kshiftrw $15, %k1, %k1 -; KNL-NEXT: kmovw %k1, %eax -; KNL-NEXT: vpinsrb $12, %eax, %xmm0, %xmm0 +; KNL-NEXT: kmovw %k1, %ecx +; KNL-NEXT: vpinsrb $12, %ecx, %xmm0, %xmm0 ; KNL-NEXT: kshiftlw $2, %k0, %k1 ; KNL-NEXT: kshiftrw $15, %k1, %k1 -; KNL-NEXT: kmovw %k1, %eax -; KNL-NEXT: vpinsrb $13, %eax, %xmm0, %xmm0 +; KNL-NEXT: kmovw %k1, %ecx +; KNL-NEXT: vpinsrb $13, %ecx, %xmm0, %xmm0 ; KNL-NEXT: kshiftlw $1, %k0, %k1 ; KNL-NEXT: kshiftrw $15, %k1, %k1 -; KNL-NEXT: kmovw %k1, %eax -; KNL-NEXT: vpinsrb $14, %eax, %xmm0, %xmm0 +; KNL-NEXT: kmovw %k1, %ecx +; KNL-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0 ; KNL-NEXT: kshiftrw $15, %k0, %k0 -; KNL-NEXT: kmovw %k0, %eax -; KNL-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0 +; KNL-NEXT: kmovw %k0, %ecx +; KNL-NEXT: vpinsrb $15, %ecx, %xmm0, %xmm0 ; KNL-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 ; KNL-NEXT: vpsllw $7, %ymm0, %ymm0 ; KNL-NEXT: vpand {{.*}}(%rip), %ymm0, %ymm0 @@ -1261,8 +1263,6 @@ ; KNL-NEXT: vpslld $31, %zmm1, %zmm1 ; KNL-NEXT: vptestmd %zmm1, %zmm1, %k0 ; KNL-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; KNL-NEXT: sbbl %eax, %eax -; KNL-NEXT: andl $1, %eax ; KNL-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0 ; KNL-NEXT: vpmovsxbd %xmm0, %zmm0 ; KNL-NEXT: vpslld $31, %zmm0, %zmm0 @@ -1276,7 +1276,7 @@ ; SKX-LABEL: test_insertelement_v32i1: ; SKX: ## BB#0: ; SKX-NEXT: cmpl %esi, %edi -; SKX-NEXT: sbbb %al, %al +; SKX-NEXT: setb %al ; SKX-NEXT: andl $1, %eax ; SKX-NEXT: kmovw %eax, %k0 ; SKX-NEXT: vpcmpltud %zmm2, %zmm0, %k1 @@ -1301,7 +1301,7 @@ ; KNL-LABEL: test_iinsertelement_v4i1: ; KNL: ## BB#0: ; KNL-NEXT: cmpl %esi, %edi -; KNL-NEXT: sbbb %al, %al +; KNL-NEXT: setb %al ; KNL-NEXT: andl $1, %eax ; KNL-NEXT: kmovw %eax, %k1 ; KNL-NEXT: vpbroadcastd {{.*}}(%rip), %xmm2 @@ -1341,7 +1341,7 @@ ; SKX-LABEL: test_iinsertelement_v4i1: ; SKX: ## BB#0: ; SKX-NEXT: cmpl %esi, %edi -; SKX-NEXT: sbbb %al, %al +; SKX-NEXT: setb %al ; SKX-NEXT: andl $1, %eax ; SKX-NEXT: kmovw %eax, %k0 ; SKX-NEXT: vpcmpltud %xmm1, %xmm0, %k1 @@ -1364,7 +1364,7 @@ ; KNL-LABEL: test_iinsertelement_v2i1: ; KNL: ## BB#0: ; KNL-NEXT: cmpl %esi, %edi -; KNL-NEXT: sbbb %al, %al +; KNL-NEXT: setb %al ; KNL-NEXT: andl $1, %eax ; KNL-NEXT: kmovw %eax, %k1 ; KNL-NEXT: vmovdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808] @@ -1386,7 +1386,7 @@ ; SKX-LABEL: test_iinsertelement_v2i1: ; SKX: ## BB#0: ; SKX-NEXT: cmpl %esi, %edi -; SKX-NEXT: sbbb %al, %al +; SKX-NEXT: setb %al ; SKX-NEXT: andl $1, %eax ; SKX-NEXT: kmovw %eax, %k0 ; SKX-NEXT: vpcmpltuq %xmm1, %xmm0, %k1 Index: llvm/trunk/test/CodeGen/X86/avx512-intrinsics.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/avx512-intrinsics.ll +++ llvm/trunk/test/CodeGen/X86/avx512-intrinsics.ll @@ -21,9 +21,9 @@ ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %esi, %k0 ; CHECK-NEXT: kmovw %edi, %k1 +; CHECK-NEXT: xorl %eax, %eax ; CHECK-NEXT: kortestw %k0, %k1 -; CHECK-NEXT: sbbl %eax, %eax -; CHECK-NEXT: andl $1, %eax +; CHECK-NEXT: setb %al ; CHECK-NEXT: retq %res = call i32 @llvm.x86.avx512.kortestc.w(i16 %a0, i16 %a1) ret i32 %res Index: llvm/trunk/test/CodeGen/X86/ctpop-combine.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/ctpop-combine.ll +++ llvm/trunk/test/CodeGen/X86/ctpop-combine.ll @@ -36,11 +36,11 @@ define i32 @test3(i64 %x) nounwind readnone { ; CHECK-LABEL: test3: ; CHECK: # BB#0: -; CHECK-NEXT: popcntq %rdi, %rax -; CHECK-NEXT: andb $63, %al -; CHECK-NEXT: cmpb $2, %al -; CHECK-NEXT: sbbl %eax, %eax -; CHECK-NEXT: andl $1, %eax +; CHECK-NEXT: popcntq %rdi, %rcx +; CHECK-NEXT: andb $63, %cl +; CHECK-NEXT: xorl %eax, %eax +; CHECK-NEXT: cmpb $2, %cl +; CHECK-NEXT: setb %al ; CHECK-NEXT: retq %count = tail call i64 @llvm.ctpop.i64(i64 %x) %cast = trunc i64 %count to i6 ; Too small for 0-64 Index: llvm/trunk/test/CodeGen/X86/fast-isel-cmp.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/fast-isel-cmp.ll +++ llvm/trunk/test/CodeGen/X86/fast-isel-cmp.ll @@ -301,8 +301,8 @@ define zeroext i1 @icmp_ugt(i32 %x, i32 %y) { ; SDAG-LABEL: icmp_ugt: ; SDAG: ## BB#0: -; SDAG-NEXT: cmpl %edi, %esi -; SDAG-NEXT: setb %al +; SDAG-NEXT: cmpl %esi, %edi +; SDAG-NEXT: seta %al ; SDAG-NEXT: retq ; ; FAST-LABEL: icmp_ugt: Index: llvm/trunk/test/CodeGen/X86/peep-setb.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/peep-setb.ll +++ llvm/trunk/test/CodeGen/X86/peep-setb.ll @@ -1,6 +1,9 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mtriple=x86_64-unknown-unknown < %s | FileCheck %s +; These tests use cmp+adc/sbb in place of test+set+add/sub. Should this transform +; be enabled by micro-architecture rather than as part of generic lowering/isel? + define i8 @test1(i8 %a, i8 %b) nounwind { ; CHECK-LABEL: test1: ; CHECK: # BB#0: Index: llvm/trunk/test/CodeGen/X86/pr26350.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/pr26350.ll +++ llvm/trunk/test/CodeGen/X86/pr26350.ll @@ -15,8 +15,8 @@ ; CHECK-NEXT: andl $16, %eax ; CHECK-NEXT: cmpl $-1, %eax ; CHECK-NEXT: sbbl $0, %ecx -; CHECK-NEXT: sbbl %eax, %eax -; CHECK-NEXT: andl $1, %eax +; CHECK-NEXT: setb %al +; CHECK-NEXT: movzbl %al, %eax ; CHECK-NEXT: retl entry: %load = load i32, i32* @d, align 4 Index: llvm/trunk/test/CodeGen/X86/setcc.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/setcc.ll +++ llvm/trunk/test/CodeGen/X86/setcc.ll @@ -21,9 +21,10 @@ define zeroext i16 @t2(i16 zeroext %x) nounwind readnone ssp { ; CHECK-LABEL: t2: ; CHECK: ## BB#0: +; CHECK-NEXT: xorl %eax, %eax ; CHECK-NEXT: cmpl $26, %edi -; CHECK-NEXT: sbbl %eax, %eax -; CHECK-NEXT: andl $32, %eax +; CHECK-NEXT: setb %al +; CHECK-NEXT: shll $5, %eax ; CHECK-NEXT: retq %t0 = icmp ult i16 %x, 26 %if = select i1 %t0, i16 32, i16 0 @@ -33,9 +34,10 @@ define i64 @t3(i64 %x) nounwind readnone ssp { ; CHECK-LABEL: t3: ; CHECK: ## BB#0: +; CHECK-NEXT: xorl %eax, %eax ; CHECK-NEXT: cmpq $18, %rdi -; CHECK-NEXT: sbbq %rax, %rax -; CHECK-NEXT: andl $64, %eax +; CHECK-NEXT: setb %al +; CHECK-NEXT: shlq $6, %rax ; CHECK-NEXT: retq %t0 = icmp ult i64 %x, 18 %if = select i1 %t0, i64 64, i64 0 Index: llvm/trunk/test/CodeGen/X86/sse41-intrinsics-fast-isel.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/sse41-intrinsics-fast-isel.ll +++ llvm/trunk/test/CodeGen/X86/sse41-intrinsics-fast-isel.ll @@ -898,17 +898,17 @@ ; X32-LABEL: test_mm_test_all_ones: ; X32: # BB#0: ; X32-NEXT: pcmpeqd %xmm1, %xmm1 +; X32-NEXT: xorl %eax, %eax ; X32-NEXT: ptest %xmm1, %xmm0 -; X32-NEXT: sbbl %eax, %eax -; X32-NEXT: andl $1, %eax +; X32-NEXT: setb %al ; X32-NEXT: retl ; ; X64-LABEL: test_mm_test_all_ones: ; X64: # BB#0: ; X64-NEXT: pcmpeqd %xmm1, %xmm1 +; X64-NEXT: xorl %eax, %eax ; X64-NEXT: ptest %xmm1, %xmm0 -; X64-NEXT: sbbl %eax, %eax -; X64-NEXT: andl $1, %eax +; X64-NEXT: setb %al ; X64-NEXT: retq %res = call i32 @llvm.x86.sse41.ptestc(<2 x i64> %a0, <2 x i64> ) ret i32 %res @@ -956,16 +956,16 @@ define i32 @test_mm_testc_si128(<2 x i64> %a0, <2 x i64> %a1) { ; X32-LABEL: test_mm_testc_si128: ; X32: # BB#0: +; X32-NEXT: xorl %eax, %eax ; X32-NEXT: ptest %xmm1, %xmm0 -; X32-NEXT: sbbl %eax, %eax -; X32-NEXT: andl $1, %eax +; X32-NEXT: setb %al ; X32-NEXT: retl ; ; X64-LABEL: test_mm_testc_si128: ; X64: # BB#0: +; X64-NEXT: xorl %eax, %eax ; X64-NEXT: ptest %xmm1, %xmm0 -; X64-NEXT: sbbl %eax, %eax -; X64-NEXT: andl $1, %eax +; X64-NEXT: setb %al ; X64-NEXT: retq %res = call i32 @llvm.x86.sse41.ptestc(<2 x i64> %a0, <2 x i64> %a1) ret i32 %res Index: llvm/trunk/test/CodeGen/X86/sse41-intrinsics-x86.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/sse41-intrinsics-x86.ll +++ llvm/trunk/test/CodeGen/X86/sse41-intrinsics-x86.ll @@ -362,16 +362,16 @@ define i32 @test_x86_sse41_ptestc(<2 x i64> %a0, <2 x i64> %a1) { ; SSE41-LABEL: test_x86_sse41_ptestc: ; SSE41: ## BB#0: +; SSE41-NEXT: xorl %eax, %eax ## encoding: [0x31,0xc0] ; SSE41-NEXT: ptest %xmm1, %xmm0 ## encoding: [0x66,0x0f,0x38,0x17,0xc1] -; SSE41-NEXT: sbbl %eax, %eax ## encoding: [0x19,0xc0] -; SSE41-NEXT: andl $1, %eax ## encoding: [0x83,0xe0,0x01] +; SSE41-NEXT: setb %al ## encoding: [0x0f,0x92,0xc0] ; SSE41-NEXT: retl ## encoding: [0xc3] ; ; VCHECK-LABEL: test_x86_sse41_ptestc: ; VCHECK: ## BB#0: +; VCHECK-NEXT: xorl %eax, %eax ## encoding: [0x31,0xc0] ; VCHECK-NEXT: vptest %xmm1, %xmm0 ## encoding: [0xc4,0xe2,0x79,0x17,0xc1] -; VCHECK-NEXT: sbbl %eax, %eax ## encoding: [0x19,0xc0] -; VCHECK-NEXT: andl $1, %eax ## encoding: [0x83,0xe0,0x01] +; VCHECK-NEXT: setb %al ## encoding: [0x0f,0x92,0xc0] ; VCHECK-NEXT: retl ## encoding: [0xc3] %res = call i32 @llvm.x86.sse41.ptestc(<2 x i64> %a0, <2 x i64> %a1) ; [#uses=1] ret i32 %res Index: llvm/trunk/test/CodeGen/X86/sse41.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/sse41.ll +++ llvm/trunk/test/CodeGen/X86/sse41.ll @@ -228,16 +228,16 @@ define i32 @ptestz_2(<2 x i64> %t1, <2 x i64> %t2) nounwind { ; X32-LABEL: ptestz_2: ; X32: ## BB#0: +; X32-NEXT: xorl %eax, %eax ; X32-NEXT: ptest %xmm1, %xmm0 -; X32-NEXT: sbbl %eax, %eax -; X32-NEXT: andl $1, %eax +; X32-NEXT: setb %al ; X32-NEXT: retl ; ; X64-LABEL: ptestz_2: ; X64: ## BB#0: +; X64-NEXT: xorl %eax, %eax ; X64-NEXT: ptest %xmm1, %xmm0 -; X64-NEXT: sbbl %eax, %eax -; X64-NEXT: andl $1, %eax +; X64-NEXT: setb %al ; X64-NEXT: retq %tmp1 = call i32 @llvm.x86.sse41.ptestc(<2 x i64> %t1, <2 x i64> %t2) nounwind readnone ret i32 %tmp1 Index: llvm/trunk/test/CodeGen/X86/sse42-intrinsics-fast-isel.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/sse42-intrinsics-fast-isel.ll +++ llvm/trunk/test/CodeGen/X86/sse42-intrinsics-fast-isel.ll @@ -33,23 +33,27 @@ } declare i32 @llvm.x86.sse42.pcmpestria128(<16 x i8>, i32, <16 x i8>, i32, i8) nounwind readnone -define i32 @test_mm_cmpestrc(<2 x i64> %a0, i32 %a1, <2 x i64> %a2, i32 %a3) { +define i32 @test_mm_cmpestrc(<2 x i64> %a0, i32 %a1, <2 x i64> %a2, i32 %a3) nounwind { ; X32-LABEL: test_mm_cmpestrc: ; X32: # BB#0: +; X32-NEXT: pushl %ebx ; X32-NEXT: movl {{[0-9]+}}(%esp), %edx ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax +; X32-NEXT: xorl %ebx, %ebx ; X32-NEXT: pcmpestri $7, %xmm1, %xmm0 -; X32-NEXT: sbbl %eax, %eax -; X32-NEXT: andl $1, %eax +; X32-NEXT: setb %bl +; X32-NEXT: movl %ebx, %eax +; X32-NEXT: popl %ebx ; X32-NEXT: retl ; ; X64-LABEL: test_mm_cmpestrc: ; X64: # BB#0: +; X64-NEXT: xorl %r8d, %r8d ; X64-NEXT: movl %edi, %eax ; X64-NEXT: movl %esi, %edx ; X64-NEXT: pcmpestri $7, %xmm1, %xmm0 -; X64-NEXT: sbbl %eax, %eax -; X64-NEXT: andl $1, %eax +; X64-NEXT: setb %r8b +; X64-NEXT: movl %r8d, %eax ; X64-NEXT: retq %arg0 = bitcast <2 x i64> %a0 to <16 x i8> %arg2 = bitcast <2 x i64> %a2 to <16 x i8> @@ -229,16 +233,16 @@ define i32 @test_mm_cmpistrc(<2 x i64> %a0, <2 x i64> %a1) { ; X32-LABEL: test_mm_cmpistrc: ; X32: # BB#0: +; X32-NEXT: xorl %eax, %eax ; X32-NEXT: pcmpistri $7, %xmm1, %xmm0 -; X32-NEXT: sbbl %eax, %eax -; X32-NEXT: andl $1, %eax +; X32-NEXT: setb %al ; X32-NEXT: retl ; ; X64-LABEL: test_mm_cmpistrc: ; X64: # BB#0: +; X64-NEXT: xorl %eax, %eax ; X64-NEXT: pcmpistri $7, %xmm1, %xmm0 -; X64-NEXT: sbbl %eax, %eax -; X64-NEXT: andl $1, %eax +; X64-NEXT: setb %al ; X64-NEXT: retq %arg0 = bitcast <2 x i64> %a0 to <16 x i8> %arg1 = bitcast <2 x i64> %a1 to <16 x i8> Index: llvm/trunk/test/CodeGen/X86/sse42-intrinsics-x86.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/sse42-intrinsics-x86.ll +++ llvm/trunk/test/CodeGen/X86/sse42-intrinsics-x86.ll @@ -95,23 +95,29 @@ declare i32 @llvm.x86.sse42.pcmpestria128(<16 x i8>, i32, <16 x i8>, i32, i8) nounwind readnone -define i32 @test_x86_sse42_pcmpestric128(<16 x i8> %a0, <16 x i8> %a2) { +define i32 @test_x86_sse42_pcmpestric128(<16 x i8> %a0, <16 x i8> %a2) nounwind { ; SSE42-LABEL: test_x86_sse42_pcmpestric128: ; SSE42: ## BB#0: +; SSE42-NEXT: pushl %ebx ## encoding: [0x53] ; SSE42-NEXT: movl $7, %eax ## encoding: [0xb8,0x07,0x00,0x00,0x00] ; SSE42-NEXT: movl $7, %edx ## encoding: [0xba,0x07,0x00,0x00,0x00] +; SSE42-NEXT: xorl %ebx, %ebx ## encoding: [0x31,0xdb] ; SSE42-NEXT: pcmpestri $7, %xmm1, %xmm0 ## encoding: [0x66,0x0f,0x3a,0x61,0xc1,0x07] -; SSE42-NEXT: sbbl %eax, %eax ## encoding: [0x19,0xc0] -; SSE42-NEXT: andl $1, %eax ## encoding: [0x83,0xe0,0x01] +; SSE42-NEXT: setb %bl ## encoding: [0x0f,0x92,0xc3] +; SSE42-NEXT: movl %ebx, %eax ## encoding: [0x89,0xd8] +; SSE42-NEXT: popl %ebx ## encoding: [0x5b] ; SSE42-NEXT: retl ## encoding: [0xc3] ; ; VCHECK-LABEL: test_x86_sse42_pcmpestric128: ; VCHECK: ## BB#0: +; VCHECK-NEXT: pushl %ebx ## encoding: [0x53] ; VCHECK-NEXT: movl $7, %eax ## encoding: [0xb8,0x07,0x00,0x00,0x00] ; VCHECK-NEXT: movl $7, %edx ## encoding: [0xba,0x07,0x00,0x00,0x00] +; VCHECK-NEXT: xorl %ebx, %ebx ## encoding: [0x31,0xdb] ; VCHECK-NEXT: vpcmpestri $7, %xmm1, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x61,0xc1,0x07] -; VCHECK-NEXT: sbbl %eax, %eax ## encoding: [0x19,0xc0] -; VCHECK-NEXT: andl $1, %eax ## encoding: [0x83,0xe0,0x01] +; VCHECK-NEXT: setb %bl ## encoding: [0x0f,0x92,0xc3] +; VCHECK-NEXT: movl %ebx, %eax ## encoding: [0x89,0xd8] +; VCHECK-NEXT: popl %ebx ## encoding: [0x5b] ; VCHECK-NEXT: retl ## encoding: [0xc3] %res = call i32 @llvm.x86.sse42.pcmpestric128(<16 x i8> %a0, i32 7, <16 x i8> %a2, i32 7, i8 7) ; [#uses=1] ret i32 %res @@ -326,16 +332,16 @@ define i32 @test_x86_sse42_pcmpistric128(<16 x i8> %a0, <16 x i8> %a1) { ; SSE42-LABEL: test_x86_sse42_pcmpistric128: ; SSE42: ## BB#0: +; SSE42-NEXT: xorl %eax, %eax ## encoding: [0x31,0xc0] ; SSE42-NEXT: pcmpistri $7, %xmm1, %xmm0 ## encoding: [0x66,0x0f,0x3a,0x63,0xc1,0x07] -; SSE42-NEXT: sbbl %eax, %eax ## encoding: [0x19,0xc0] -; SSE42-NEXT: andl $1, %eax ## encoding: [0x83,0xe0,0x01] +; SSE42-NEXT: setb %al ## encoding: [0x0f,0x92,0xc0] ; SSE42-NEXT: retl ## encoding: [0xc3] ; ; VCHECK-LABEL: test_x86_sse42_pcmpistric128: ; VCHECK: ## BB#0: +; VCHECK-NEXT: xorl %eax, %eax ## encoding: [0x31,0xc0] ; VCHECK-NEXT: vpcmpistri $7, %xmm1, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x63,0xc1,0x07] -; VCHECK-NEXT: sbbl %eax, %eax ## encoding: [0x19,0xc0] -; VCHECK-NEXT: andl $1, %eax ## encoding: [0x83,0xe0,0x01] +; VCHECK-NEXT: setb %al ## encoding: [0x0f,0x92,0xc0] ; VCHECK-NEXT: retl ## encoding: [0xc3] %res = call i32 @llvm.x86.sse42.pcmpistric128(<16 x i8> %a0, <16 x i8> %a1, i8 7) ; [#uses=1] ret i32 %res