Index: ../lib/Target/X86/X86InstrAVX512.td =================================================================== --- ../lib/Target/X86/X86InstrAVX512.td +++ ../lib/Target/X86/X86InstrAVX512.td @@ -2189,10 +2189,14 @@ def : Pat<(i1 (trunc (i32 GR32:$src))), (COPY_TO_REGCLASS (KMOVWkr (AND32ri8 $src, (i32 1))), VK1)>; + def : Pat<(i1 (trunc (i32 (assertzext_i1 GR32:$src)))), + (COPY_TO_REGCLASS GR32:$src, VK1)>; + def : Pat<(i1 (trunc (i8 GR8:$src))), (COPY_TO_REGCLASS (KMOVWkr (AND32ri8 (SUBREG_TO_REG (i32 0), GR8:$src, sub_8bit), (i32 1))), VK1)>; + def : Pat<(i1 (trunc (i16 GR16:$src))), (COPY_TO_REGCLASS (KMOVWkr (AND32ri8 (SUBREG_TO_REG (i32 0), $src, sub_16bit), (i32 1))), @@ -2200,32 +2204,33 @@ def : Pat<(i32 (zext VK1:$src)), (AND32ri8 (KMOVWrk (COPY_TO_REGCLASS VK1:$src, VK16)), (i32 1))>; + def : Pat<(i32 (anyext VK1:$src)), - (KMOVWrk (COPY_TO_REGCLASS VK1:$src, VK16))>; + (COPY_TO_REGCLASS VK1:$src, GR32)>; def : Pat<(i8 (zext VK1:$src)), (EXTRACT_SUBREG (AND32ri8 (KMOVWrk (COPY_TO_REGCLASS VK1:$src, VK16)), (i32 1)), sub_8bit)>; + def : Pat<(i8 (anyext VK1:$src)), - (EXTRACT_SUBREG - (KMOVWrk (COPY_TO_REGCLASS VK1:$src, VK16)), sub_8bit)>; + (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS VK1:$src, GR32)), sub_8bit)>; def : Pat<(i64 (zext VK1:$src)), (AND64ri8 (SUBREG_TO_REG (i64 0), (KMOVWrk (COPY_TO_REGCLASS VK1:$src, VK16)), sub_32bit), (i64 1))>; + def : Pat<(i64 (anyext VK1:$src)), (SUBREG_TO_REG (i64 0), - (KMOVWrk (COPY_TO_REGCLASS VK1:$src, VK16)), sub_32bit)>; + (i32 (COPY_TO_REGCLASS VK1:$src, GR32)), sub_32bit)>; def : Pat<(i16 (zext VK1:$src)), (EXTRACT_SUBREG (AND32ri8 (KMOVWrk (COPY_TO_REGCLASS VK1:$src, VK16)), (i32 1)), sub_16bit)>; + def : Pat<(i16 (anyext VK1:$src)), - (EXTRACT_SUBREG - (KMOVWrk (COPY_TO_REGCLASS VK1:$src, VK16)), - sub_16bit)>; + (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS VK1:$src, GR32)), sub_16bit)>; } def : Pat<(v16i1 (scalar_to_vector VK1:$src)), (COPY_TO_REGCLASS VK1:$src, VK16)>; Index: ../lib/Target/X86/X86InstrFragmentsSIMD.td =================================================================== --- ../lib/Target/X86/X86InstrFragmentsSIMD.td +++ ../lib/Target/X86/X86InstrFragmentsSIMD.td @@ -1024,3 +1024,8 @@ (X86mtruncstore node:$src1, node:$src2, node:$src3), [{ return cast(N)->getMemoryVT().getScalarType() == MVT::i32; }]>; + +def assertzext_i1 : + PatFrag<(ops node:$src), (assertzext node:$src), [{ + return cast(N->getOperand(1))->getVT() == MVT::i1; +}]>; \ No newline at end of file Index: ../test/CodeGen/X86/avx512-fsel.ll =================================================================== --- ../test/CodeGen/X86/avx512-fsel.ll +++ ../test/CodeGen/X86/avx512-fsel.ll @@ -0,0 +1,52 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -O0 -mattr=+avx512f < %s | FileCheck %s + +target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-apple-macosx10.11.0" + +define i32 @test(float %a, float %b) { +; CHECK-LABEL: test: +; CHECK: ## BB#0: +; CHECK-NEXT: pushq %rax +; CHECK-NEXT: Ltmp0: +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: movb $1, %al +; CHECK-NEXT: vucomiss %xmm1, %xmm0 +; CHECK-NEXT: setnp %cl +; CHECK-NEXT: sete %dl +; CHECK-NEXT: setp %sil +; CHECK-NEXT: setne %dil +; CHECK-NEXT: andb %cl, %dl +; CHECK-NEXT: movb %dl, %r8b +; CHECK-NEXT: andl $1, %r8d +; CHECK-NEXT: kmovw %r8d, %k0 +; CHECK-NEXT: orb %sil, %dil +; CHECK-NEXT: movb %dil, %r8b +; CHECK-NEXT: andl $1, %r8d +; CHECK-NEXT: kmovw %r8d, %k1 +; CHECK-NEXT: kortestw %k1, %k1 +; CHECK-NEXT: movb %al, {{[0-9]+}}(%rsp) ## 1-byte Spill +; CHECK-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ## 2-byte Spill +; CHECK-NEXT: jne LBB0_1 +; CHECK-NEXT: jmp LBB0_2 +; CHECK-NEXT: LBB0_1: ## %L_0 +; CHECK-NEXT: callq ___assert_rtn +; CHECK-NEXT: LBB0_2: ## %L_1 +; CHECK-NEXT: xorl %eax, %eax +; CHECK-NEXT: popq %rcx +; CHECK-NEXT: retq + %x10 = fcmp oeq float %a, %b + %x11 = xor i1 %x10, true + br i1 %x11, label %L_0, label %L_1 + +L_0: ; preds = %2 + call void @__assert_rtn() + unreachable + ; No predecessors! +L_1: ; preds = %2 + ret i32 0 +} + +; Function Attrs: noreturn +declare void @__assert_rtn() + Index: ../test/CodeGen/X86/avx512-i1test.ll =================================================================== --- ../test/CodeGen/X86/avx512-i1test.ll +++ ../test/CodeGen/X86/avx512-i1test.ll @@ -66,15 +66,14 @@ define i64 @func2(i1 zeroext %i, i32 %j) { ; CHECK-LABEL: func2: ; CHECK: # BB#0: # %entry +; CHECK-NEXT: # kill: %EDI %EDI %RDI ; CHECK-NEXT: testl %esi, %esi ; CHECK-NEXT: je .LBB1_1 ; CHECK-NEXT: # BB#2: # %if.then ; CHECK-NEXT: jmp bar # TAILCALL ; CHECK-NEXT: .LBB1_1: # %return -; CHECK-NEXT: andl $1, %edi -; CHECK-NEXT: kmovw %edi, %k0 -; CHECK-NEXT: kmovw %k0, %eax -; CHECK-NEXT: orq $-2, %rax +; CHECK-NEXT: orq $-2, %rdi +; CHECK-NEXT: movq %rdi, %rax ; CHECK-NEXT: retq entry: %tobool = icmp eq i32 %j, 0 Index: ../test/CodeGen/X86/fast-isel-select-cmov.ll =================================================================== --- ../test/CodeGen/X86/fast-isel-select-cmov.ll +++ ../test/CodeGen/X86/fast-isel-select-cmov.ll @@ -15,7 +15,6 @@ ; ; AVX512-LABEL: select_cmov_i16: ; AVX512: ## BB#0: -; AVX512-NEXT: andl $1, %edi ; AVX512-NEXT: kmovw %edi, %k0 ; AVX512-NEXT: kortestw %k0, %k0 ; AVX512-NEXT: cmovew %dx, %si @@ -47,7 +46,6 @@ ; ; AVX512-LABEL: select_cmov_i32: ; AVX512: ## BB#0: -; AVX512-NEXT: andl $1, %edi ; AVX512-NEXT: kmovw %edi, %k0 ; AVX512-NEXT: kortestw %k0, %k0 ; AVX512-NEXT: cmovel %edx, %esi @@ -79,7 +77,6 @@ ; ; AVX512-LABEL: select_cmov_i64: ; AVX512: ## BB#0: -; AVX512-NEXT: andl $1, %edi ; AVX512-NEXT: kmovw %edi, %k0 ; AVX512-NEXT: kortestw %k0, %k0 ; AVX512-NEXT: cmoveq %rdx, %rsi Index: ../test/CodeGen/X86/masked_gather_scatter.ll =================================================================== --- ../test/CodeGen/X86/masked_gather_scatter.ll +++ ../test/CodeGen/X86/masked_gather_scatter.ll @@ -1,8 +1,8 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mattr=+avx512f < %s | FileCheck %s --check-prefix=KNL_64 -; RUN: llc -mtriple=i386-unknown-linux-gnu -mattr=+avx512f < %s | FileCheck %s --check-prefix=KNL_32 -; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mattr=+avx512vl -mattr=+avx512dq < %s | FileCheck %s --check-prefix=SKX -; RUN: llc -mtriple=i386-unknown-linux-gnu -mattr=+avx512vl -mattr=+avx512dq < %s | FileCheck %s --check-prefix=SKX_32 +; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mattr=+avx512f < %s | FileCheck %s --check-prefix=ALL --check-prefix=KNL_64 +; RUN: llc -mtriple=i386-unknown-linux-gnu -mattr=+avx512f < %s | FileCheck %s --check-prefix=ALL --check-prefix=KNL_32 +; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mattr=+avx512vl -mattr=+avx512dq < %s | FileCheck %s --check-prefix=ALL --check-prefix=SKX +; RUN: llc -mtriple=i386-unknown-linux-gnu -mattr=+avx512vl -mattr=+avx512dq < %s | FileCheck %s --check-prefix=ALL --check-prefix=SKX_32 ; RUN: opt -mtriple=x86_64-apple-darwin -codegenprepare -mcpu=corei7-avx -S < %s | FileCheck %s -check-prefix=SCALAR ; RUN: llc -O0 -mtriple=x86_64-unknown-linux-gnu -mcpu=skx < %s -o /dev/null @@ -1547,185 +1547,8 @@ ; Check non-power-of-2 case. It should be scalarized. declare <3 x i32> @llvm.masked.gather.v3i32(<3 x i32*>, i32, <3 x i1>, <3 x i32>) define <3 x i32> @test30(<3 x i32*> %base, <3 x i32> %ind, <3 x i1> %mask, <3 x i32> %src0) { -; KNL_64-LABEL: test30: -; KNL_64: # BB#0: -; KNL_64-NEXT: andl $1, %edx -; KNL_64-NEXT: kmovw %edx, %k1 -; KNL_64-NEXT: andl $1, %esi -; KNL_64-NEXT: kmovw %esi, %k2 -; KNL_64-NEXT: movl %edi, %eax -; KNL_64-NEXT: andl $1, %eax -; KNL_64-NEXT: kmovw %eax, %k0 -; KNL_64-NEXT: vpmovsxdq %xmm1, %ymm1 -; KNL_64-NEXT: vpsllq $2, %ymm1, %ymm1 -; KNL_64-NEXT: vpaddq %ymm1, %ymm0, %ymm1 -; KNL_64-NEXT: # implicit-def: %XMM0 -; KNL_64-NEXT: testb $1, %dil -; KNL_64-NEXT: je .LBB29_2 -; KNL_64-NEXT: # BB#1: # %cond.load -; KNL_64-NEXT: vmovq %xmm1, %rax -; KNL_64-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero -; KNL_64-NEXT: .LBB29_2: # %else -; KNL_64-NEXT: kmovw %k2, %eax -; KNL_64-NEXT: movl %eax, %ecx -; KNL_64-NEXT: andl $1, %ecx -; KNL_64-NEXT: testb %cl, %cl -; KNL_64-NEXT: je .LBB29_4 -; KNL_64-NEXT: # BB#3: # %cond.load1 -; KNL_64-NEXT: vpextrq $1, %xmm1, %rcx -; KNL_64-NEXT: vpinsrd $1, (%rcx), %xmm0, %xmm0 -; KNL_64-NEXT: .LBB29_4: # %else2 -; KNL_64-NEXT: kmovw %k1, %ecx -; KNL_64-NEXT: movl %ecx, %edx -; KNL_64-NEXT: andl $1, %edx -; KNL_64-NEXT: testb %dl, %dl -; KNL_64-NEXT: je .LBB29_6 -; KNL_64-NEXT: # BB#5: # %cond.load4 -; KNL_64-NEXT: vextracti128 $1, %ymm1, %xmm1 -; KNL_64-NEXT: vmovq %xmm1, %rdx -; KNL_64-NEXT: vpinsrd $2, (%rdx), %xmm0, %xmm0 -; KNL_64-NEXT: .LBB29_6: # %else5 -; KNL_64-NEXT: kmovw %k0, %edx -; KNL_64-NEXT: vmovd %edx, %xmm1 -; KNL_64-NEXT: vpinsrd $1, %eax, %xmm1, %xmm1 -; KNL_64-NEXT: vpinsrd $2, %ecx, %xmm1, %xmm1 -; KNL_64-NEXT: vpslld $31, %xmm1, %xmm1 -; KNL_64-NEXT: vblendvps %xmm1, %xmm0, %xmm2, %xmm0 -; KNL_64-NEXT: retq -; -; KNL_32-LABEL: test30: -; KNL_32: # BB#0: -; KNL_32-NEXT: movl {{[0-9]+}}(%esp), %eax -; KNL_32-NEXT: andl $1, %eax -; KNL_32-NEXT: kmovw %eax, %k1 -; KNL_32-NEXT: movl {{[0-9]+}}(%esp), %eax -; KNL_32-NEXT: andl $1, %eax -; KNL_32-NEXT: kmovw %eax, %k2 -; KNL_32-NEXT: movl {{[0-9]+}}(%esp), %eax -; KNL_32-NEXT: movl %eax, %ecx -; KNL_32-NEXT: andl $1, %ecx -; KNL_32-NEXT: kmovw %ecx, %k0 -; KNL_32-NEXT: vpslld $2, %xmm1, %xmm1 -; KNL_32-NEXT: vpaddd %xmm1, %xmm0, %xmm1 -; KNL_32-NEXT: # implicit-def: %XMM0 -; KNL_32-NEXT: testb $1, %al -; KNL_32-NEXT: je .LBB29_2 -; KNL_32-NEXT: # BB#1: # %cond.load -; KNL_32-NEXT: vmovd %xmm1, %eax -; KNL_32-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero -; KNL_32-NEXT: .LBB29_2: # %else -; KNL_32-NEXT: kmovw %k2, %eax -; KNL_32-NEXT: movl %eax, %ecx -; KNL_32-NEXT: andl $1, %ecx -; KNL_32-NEXT: testb %cl, %cl -; KNL_32-NEXT: je .LBB29_4 -; KNL_32-NEXT: # BB#3: # %cond.load1 -; KNL_32-NEXT: vpextrd $1, %xmm1, %ecx -; KNL_32-NEXT: vpinsrd $1, (%ecx), %xmm0, %xmm0 -; KNL_32-NEXT: .LBB29_4: # %else2 -; KNL_32-NEXT: kmovw %k1, %ecx -; KNL_32-NEXT: movl %ecx, %edx -; KNL_32-NEXT: andl $1, %edx -; KNL_32-NEXT: testb %dl, %dl -; KNL_32-NEXT: je .LBB29_6 -; KNL_32-NEXT: # BB#5: # %cond.load4 -; KNL_32-NEXT: vpextrd $2, %xmm1, %edx -; KNL_32-NEXT: vpinsrd $2, (%edx), %xmm0, %xmm0 -; KNL_32-NEXT: .LBB29_6: # %else5 -; KNL_32-NEXT: kmovw %k0, %edx -; KNL_32-NEXT: vmovd %edx, %xmm1 -; KNL_32-NEXT: vpinsrd $1, %eax, %xmm1, %xmm1 -; KNL_32-NEXT: vpinsrd $2, %ecx, %xmm1, %xmm1 -; KNL_32-NEXT: vpslld $31, %xmm1, %xmm1 -; KNL_32-NEXT: vblendvps %xmm1, %xmm0, %xmm2, %xmm0 -; KNL_32-NEXT: retl -; -; SKX-LABEL: test30: -; SKX: # BB#0: -; SKX-NEXT: vpslld $31, %xmm2, %xmm2 -; SKX-NEXT: vptestmd %xmm2, %xmm2, %k1 -; SKX-NEXT: kshiftlw $15, %k1, %k0 -; SKX-NEXT: kshiftrw $15, %k0, %k0 -; SKX-NEXT: vpmovsxdq %xmm1, %ymm1 -; SKX-NEXT: vpsllq $2, %ymm1, %ymm1 -; SKX-NEXT: vpaddq %ymm1, %ymm0, %ymm0 -; SKX-NEXT: kmovw %k0, %eax -; SKX-NEXT: andl $1, %eax -; SKX-NEXT: # implicit-def: %XMM1 -; SKX-NEXT: testb %al, %al -; SKX-NEXT: je .LBB29_2 -; SKX-NEXT: # BB#1: # %cond.load -; SKX-NEXT: vmovq %xmm0, %rax -; SKX-NEXT: vmovd {{.*#+}} xmm1 = mem[0],zero,zero,zero -; SKX-NEXT: .LBB29_2: # %else -; SKX-NEXT: kshiftlw $14, %k1, %k0 -; SKX-NEXT: kshiftrw $15, %k0, %k0 -; SKX-NEXT: kmovw %k0, %eax -; SKX-NEXT: andl $1, %eax -; SKX-NEXT: testb %al, %al -; SKX-NEXT: je .LBB29_4 -; SKX-NEXT: # BB#3: # %cond.load1 -; SKX-NEXT: vpextrq $1, %xmm0, %rax -; SKX-NEXT: vpinsrd $1, (%rax), %xmm1, %xmm1 -; SKX-NEXT: .LBB29_4: # %else2 -; SKX-NEXT: kshiftlw $13, %k1, %k0 -; SKX-NEXT: kshiftrw $15, %k0, %k0 -; SKX-NEXT: kmovw %k0, %eax -; SKX-NEXT: andl $1, %eax -; SKX-NEXT: testb %al, %al -; SKX-NEXT: je .LBB29_6 -; SKX-NEXT: # BB#5: # %cond.load4 -; SKX-NEXT: vextracti64x2 $1, %ymm0, %xmm0 -; SKX-NEXT: vmovq %xmm0, %rax -; SKX-NEXT: vpinsrd $2, (%rax), %xmm1, %xmm1 -; SKX-NEXT: .LBB29_6: # %else5 -; SKX-NEXT: vpblendmd %xmm1, %xmm3, %xmm0 {%k1} -; SKX-NEXT: retq -; -; SKX_32-LABEL: test30: -; SKX_32: # BB#0: -; SKX_32-NEXT: subl $12, %esp -; SKX_32-NEXT: .Ltmp0: -; SKX_32-NEXT: .cfi_def_cfa_offset 16 -; SKX_32-NEXT: vpslld $31, %xmm2, %xmm2 -; SKX_32-NEXT: vptestmd %xmm2, %xmm2, %k1 -; SKX_32-NEXT: kshiftlw $15, %k1, %k0 -; SKX_32-NEXT: kshiftrw $15, %k0, %k0 -; SKX_32-NEXT: vpslld $2, %xmm1, %xmm1 -; SKX_32-NEXT: vpaddd %xmm1, %xmm0, %xmm0 -; SKX_32-NEXT: kmovw %k0, %eax -; SKX_32-NEXT: andl $1, %eax -; SKX_32-NEXT: # implicit-def: %XMM1 -; SKX_32-NEXT: testb %al, %al -; SKX_32-NEXT: je .LBB29_2 -; SKX_32-NEXT: # BB#1: # %cond.load -; SKX_32-NEXT: vmovd %xmm0, %eax -; SKX_32-NEXT: vmovd {{.*#+}} xmm1 = mem[0],zero,zero,zero -; SKX_32-NEXT: .LBB29_2: # %else -; SKX_32-NEXT: kshiftlw $14, %k1, %k0 -; SKX_32-NEXT: kshiftrw $15, %k0, %k0 -; SKX_32-NEXT: kmovw %k0, %eax -; SKX_32-NEXT: andl $1, %eax -; SKX_32-NEXT: testb %al, %al -; SKX_32-NEXT: je .LBB29_4 -; SKX_32-NEXT: # BB#3: # %cond.load1 -; SKX_32-NEXT: vpextrd $1, %xmm0, %eax -; SKX_32-NEXT: vpinsrd $1, (%eax), %xmm1, %xmm1 -; SKX_32-NEXT: .LBB29_4: # %else2 -; SKX_32-NEXT: vmovdqa32 {{[0-9]+}}(%esp), %xmm2 -; SKX_32-NEXT: kshiftlw $13, %k1, %k0 -; SKX_32-NEXT: kshiftrw $15, %k0, %k0 -; SKX_32-NEXT: kmovw %k0, %eax -; SKX_32-NEXT: andl $1, %eax -; SKX_32-NEXT: testb %al, %al -; SKX_32-NEXT: je .LBB29_6 -; SKX_32-NEXT: # BB#5: # %cond.load4 -; SKX_32-NEXT: vpextrd $2, %xmm0, %eax -; SKX_32-NEXT: vpinsrd $2, (%eax), %xmm1, %xmm1 -; SKX_32-NEXT: .LBB29_6: # %else5 -; SKX_32-NEXT: vpblendmd %xmm1, %xmm2, %xmm0 {%k1} -; SKX_32-NEXT: addl $12, %esp -; SKX_32-NEXT: retl +; ALL-LABEL: test30: +; ALL-NOT: gather %sext_ind = sext <3 x i32> %ind to <3 x i64> %gep.random = getelementptr i32, <3 x i32*> %base, <3 x i64> %sext_ind Index: ../test/CodeGen/X86/pr28173.ll =================================================================== --- ../test/CodeGen/X86/pr28173.ll +++ ../test/CodeGen/X86/pr28173.ll @@ -1,16 +1,20 @@ -; RUN: llc -mattr=+avx512f < %s | FileCheck %s +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mattr=+avx512f | FileCheck %s --check-prefix=CHECK --check-prefix=KNL +; RUN: llc < %s -mattr=+avx512f,+avx512vl,+avx512bw,+avx512dq | FileCheck %s --check-prefix=CHECK --check-prefix=SKX + target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" target triple = "x86_64-unknown-linux-gnu" ; Note that the kmovs should really *not* appear in the output, this is an ; artifact of the current poor lowering. This is tracked by PR28175. -; CHECK-LABEL: @foo64 -; CHECK: kmov -; CHECK: kmov -; CHECK: orq $-2, %rax -; CHECK: ret -define i64 @foo64(i1 zeroext %i, i32 %j) #0 { +define i64 @foo64(i1 zeroext %i) #0 { +; CHECK-LABEL: foo64: +; CHECK: # BB#0: +; CHECK-NEXT: # kill: %EDI %EDI %RDI +; CHECK-NEXT: orq $-2, %rdi +; CHECK-NEXT: movq %rdi, %rax +; CHECK-NEXT: retq br label %bb bb: @@ -22,12 +26,12 @@ ret i64 %v } -; CHECK-LABEL: @foo16 -; CHECK: kmov -; CHECK: kmov -; CHECK: orl $65534, %eax -; CHECK: retq -define i16 @foo16(i1 zeroext %i, i32 %j) #0 { +define i16 @foo16(i1 zeroext %i) #0 { +; CHECK-LABEL: foo16: +; CHECK: # BB#0: +; CHECK-NEXT: orl $65534, %edi # imm = 0xFFFE +; CHECK-NEXT: movl %edi, %eax +; CHECK-NEXT: retq br label %bb bb: @@ -38,3 +42,68 @@ end: ret i16 %v } + +; This code is still not optimal +define i16 @foo16_1(i1 zeroext %i, i32 %j) #0 { +; KNL-LABEL: foo16_1: +; KNL: # BB#0: +; KNL-NEXT: kmovw %edi, %k0 +; KNL-NEXT: kmovw %k0, %eax +; KNL-NEXT: andl $1, %eax +; KNL-NEXT: orl $2, %eax +; KNL-NEXT: # kill: %AX %AX %EAX +; KNL-NEXT: retq +; +; SKX-LABEL: foo16_1: +; SKX: # BB#0: +; SKX-NEXT: kmovd %edi, %k0 +; SKX-NEXT: kmovw %k0, %eax +; SKX-NEXT: andl $1, %eax +; SKX-NEXT: orl $2, %eax +; SKX-NEXT: # kill: %AX %AX %EAX +; SKX-NEXT: retq + br label %bb + +bb: + %z = zext i1 %i to i16 + %v = or i16 %z, 2 + br label %end + +end: + ret i16 %v +} + +define i32 @foo32(i1 zeroext %i) #0 { +; CHECK-LABEL: foo32: +; CHECK: # BB#0: +; CHECK-NEXT: orl $-2, %edi +; CHECK-NEXT: movl %edi, %eax +; CHECK-NEXT: retq + br label %bb + +bb: + %z = zext i1 %i to i32 + %v = or i32 %z, -2 + br label %end + +end: + ret i32 %v +} + +define i8 @foo8(i1 zeroext %i) #0 { +; CHECK-LABEL: foo8: +; CHECK: # BB#0: +; CHECK-NEXT: orb $-2, %dil +; CHECK-NEXT: movl %edi, %eax +; CHECK-NEXT: retq + br label %bb + +bb: + %z = zext i1 %i to i8 + %v = or i8 %z, -2 + br label %end + +end: + ret i8 %v +} + Index: ../test/CodeGen/X86/xaluo.ll =================================================================== --- ../test/CodeGen/X86/xaluo.ll +++ ../test/CodeGen/X86/xaluo.ll @@ -1,3 +1,4 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mtriple=x86_64-darwin-unknown < %s | FileCheck %s --check-prefix=CHECK --check-prefix=SDAG ; RUN: llc -mtriple=x86_64-darwin-unknown -fast-isel -fast-isel-abort=1 < %s | FileCheck %s --check-prefix=CHECK --check-prefix=FAST ; RUN: llc -mtriple=x86_64-darwin-unknown -mcpu=knl < %s | FileCheck %s --check-prefix=KNL @@ -738,15 +739,15 @@ ; KNL-LABEL: bug27873: ; KNL: ## BB#0: ; KNL-NEXT: andl $1, %esi +; KNL-NEXT: kmovw %esi, %k0 ; KNL-NEXT: movl $160, %ecx ; KNL-NEXT: movq %rdi, %rax ; KNL-NEXT: mulq %rcx -; KNL-NEXT: kmovw %esi, %k0 ; KNL-NEXT: seto %al ; KNL-NEXT: kmovw %eax, %k1 ; KNL-NEXT: korw %k1, %k0, %k0 ; KNL-NEXT: kmovw %k0, %eax -; KNL-NEXT: # kill +; KNL-NEXT: ## kill: %AL %AL %EAX ; KNL-NEXT: retq %mul = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 %c1, i64 160) %mul.overflow = extractvalue { i64, i1 } %mul, 1