Index: llvm/trunk/lib/Target/X86/X86InstrAVX512.td =================================================================== --- llvm/trunk/lib/Target/X86/X86InstrAVX512.td +++ llvm/trunk/lib/Target/X86/X86InstrAVX512.td @@ -2189,10 +2189,14 @@ def : Pat<(i1 (trunc (i32 GR32:$src))), (COPY_TO_REGCLASS (KMOVWkr (AND32ri8 $src, (i32 1))), VK1)>; + def : Pat<(i1 (trunc (i32 (assertzext_i1 GR32:$src)))), + (COPY_TO_REGCLASS GR32:$src, VK1)>; + def : Pat<(i1 (trunc (i8 GR8:$src))), (COPY_TO_REGCLASS (KMOVWkr (AND32ri8 (SUBREG_TO_REG (i32 0), GR8:$src, sub_8bit), (i32 1))), VK1)>; + def : Pat<(i1 (trunc (i16 GR16:$src))), (COPY_TO_REGCLASS (KMOVWkr (AND32ri8 (SUBREG_TO_REG (i32 0), $src, sub_16bit), (i32 1))), @@ -2200,32 +2204,33 @@ def : Pat<(i32 (zext VK1:$src)), (AND32ri8 (KMOVWrk (COPY_TO_REGCLASS VK1:$src, VK16)), (i32 1))>; + def : Pat<(i32 (anyext VK1:$src)), - (KMOVWrk (COPY_TO_REGCLASS VK1:$src, VK16))>; + (COPY_TO_REGCLASS VK1:$src, GR32)>; def : Pat<(i8 (zext VK1:$src)), (EXTRACT_SUBREG (AND32ri8 (KMOVWrk (COPY_TO_REGCLASS VK1:$src, VK16)), (i32 1)), sub_8bit)>; + def : Pat<(i8 (anyext VK1:$src)), - (EXTRACT_SUBREG - (KMOVWrk (COPY_TO_REGCLASS VK1:$src, VK16)), sub_8bit)>; + (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS VK1:$src, GR32)), sub_8bit)>; def : Pat<(i64 (zext VK1:$src)), (AND64ri8 (SUBREG_TO_REG (i64 0), (KMOVWrk (COPY_TO_REGCLASS VK1:$src, VK16)), sub_32bit), (i64 1))>; + def : Pat<(i64 (anyext VK1:$src)), (SUBREG_TO_REG (i64 0), - (KMOVWrk (COPY_TO_REGCLASS VK1:$src, VK16)), sub_32bit)>; + (i32 (COPY_TO_REGCLASS VK1:$src, GR32)), sub_32bit)>; def : Pat<(i16 (zext VK1:$src)), (EXTRACT_SUBREG (AND32ri8 (KMOVWrk (COPY_TO_REGCLASS VK1:$src, VK16)), (i32 1)), sub_16bit)>; + def : Pat<(i16 (anyext VK1:$src)), - (EXTRACT_SUBREG - (KMOVWrk (COPY_TO_REGCLASS VK1:$src, VK16)), - sub_16bit)>; + (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS VK1:$src, GR32)), sub_16bit)>; } def : Pat<(v16i1 (scalar_to_vector VK1:$src)), (COPY_TO_REGCLASS VK1:$src, VK16)>; Index: llvm/trunk/lib/Target/X86/X86InstrFragmentsSIMD.td =================================================================== --- llvm/trunk/lib/Target/X86/X86InstrFragmentsSIMD.td +++ llvm/trunk/lib/Target/X86/X86InstrFragmentsSIMD.td @@ -1024,3 +1024,8 @@ (X86mtruncstore node:$src1, node:$src2, node:$src3), [{ return cast(N)->getMemoryVT().getScalarType() == MVT::i32; }]>; + +def assertzext_i1 : + PatFrag<(ops node:$src), (assertzext node:$src), [{ + return cast(N->getOperand(1))->getVT() == MVT::i1; +}]>; \ No newline at end of file Index: llvm/trunk/test/CodeGen/X86/avx512-i1test.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/avx512-i1test.ll +++ llvm/trunk/test/CodeGen/X86/avx512-i1test.ll @@ -66,15 +66,14 @@ define i64 @func2(i1 zeroext %i, i32 %j) { ; CHECK-LABEL: func2: ; CHECK: # BB#0: # %entry +; CHECK-NEXT: # kill: %EDI %EDI %RDI ; CHECK-NEXT: testl %esi, %esi ; CHECK-NEXT: je .LBB1_1 ; CHECK-NEXT: # BB#2: # %if.then ; CHECK-NEXT: jmp bar # TAILCALL ; CHECK-NEXT: .LBB1_1: # %return -; CHECK-NEXT: andl $1, %edi -; CHECK-NEXT: kmovw %edi, %k0 -; CHECK-NEXT: kmovw %k0, %eax -; CHECK-NEXT: orq $-2, %rax +; CHECK-NEXT: orq $-2, %rdi +; CHECK-NEXT: movq %rdi, %rax ; CHECK-NEXT: retq entry: %tobool = icmp eq i32 %j, 0 Index: llvm/trunk/test/CodeGen/X86/fast-isel-select-cmov.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/fast-isel-select-cmov.ll +++ llvm/trunk/test/CodeGen/X86/fast-isel-select-cmov.ll @@ -15,7 +15,6 @@ ; ; AVX512-LABEL: select_cmov_i16: ; AVX512: ## BB#0: -; AVX512-NEXT: andl $1, %edi ; AVX512-NEXT: kmovw %edi, %k0 ; AVX512-NEXT: kortestw %k0, %k0 ; AVX512-NEXT: cmovew %dx, %si @@ -47,7 +46,6 @@ ; ; AVX512-LABEL: select_cmov_i32: ; AVX512: ## BB#0: -; AVX512-NEXT: andl $1, %edi ; AVX512-NEXT: kmovw %edi, %k0 ; AVX512-NEXT: kortestw %k0, %k0 ; AVX512-NEXT: cmovel %edx, %esi @@ -79,7 +77,6 @@ ; ; AVX512-LABEL: select_cmov_i64: ; AVX512: ## BB#0: -; AVX512-NEXT: andl $1, %edi ; AVX512-NEXT: kmovw %edi, %k0 ; AVX512-NEXT: kortestw %k0, %k0 ; AVX512-NEXT: cmoveq %rdx, %rsi Index: llvm/trunk/test/CodeGen/X86/pr28173.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/pr28173.ll +++ llvm/trunk/test/CodeGen/X86/pr28173.ll @@ -1,16 +1,20 @@ -; RUN: llc -mattr=+avx512f < %s | FileCheck %s +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mattr=+avx512f | FileCheck %s --check-prefix=CHECK --check-prefix=KNL +; RUN: llc < %s -mattr=+avx512f,+avx512vl,+avx512bw,+avx512dq | FileCheck %s --check-prefix=CHECK --check-prefix=SKX + target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" target triple = "x86_64-unknown-linux-gnu" ; Note that the kmovs should really *not* appear in the output, this is an ; artifact of the current poor lowering. This is tracked by PR28175. -; CHECK-LABEL: @foo64 -; CHECK: kmov -; CHECK: kmov -; CHECK: orq $-2, %rax -; CHECK: ret -define i64 @foo64(i1 zeroext %i, i32 %j) #0 { +define i64 @foo64(i1 zeroext %i) #0 { +; CHECK-LABEL: foo64: +; CHECK: # BB#0: +; CHECK-NEXT: # kill: %EDI %EDI %RDI +; CHECK-NEXT: orq $-2, %rdi +; CHECK-NEXT: movq %rdi, %rax +; CHECK-NEXT: retq br label %bb bb: @@ -22,12 +26,12 @@ ret i64 %v } -; CHECK-LABEL: @foo16 -; CHECK: kmov -; CHECK: kmov -; CHECK: orl $65534, %eax -; CHECK: retq -define i16 @foo16(i1 zeroext %i, i32 %j) #0 { +define i16 @foo16(i1 zeroext %i) #0 { +; CHECK-LABEL: foo16: +; CHECK: # BB#0: +; CHECK-NEXT: orl $65534, %edi # imm = 0xFFFE +; CHECK-NEXT: movl %edi, %eax +; CHECK-NEXT: retq br label %bb bb: @@ -38,3 +42,68 @@ end: ret i16 %v } + +; This code is still not optimal +define i16 @foo16_1(i1 zeroext %i, i32 %j) #0 { +; KNL-LABEL: foo16_1: +; KNL: # BB#0: +; KNL-NEXT: kmovw %edi, %k0 +; KNL-NEXT: kmovw %k0, %eax +; KNL-NEXT: andl $1, %eax +; KNL-NEXT: orl $2, %eax +; KNL-NEXT: # kill: %AX %AX %EAX +; KNL-NEXT: retq +; +; SKX-LABEL: foo16_1: +; SKX: # BB#0: +; SKX-NEXT: kmovd %edi, %k0 +; SKX-NEXT: kmovw %k0, %eax +; SKX-NEXT: andl $1, %eax +; SKX-NEXT: orl $2, %eax +; SKX-NEXT: # kill: %AX %AX %EAX +; SKX-NEXT: retq + br label %bb + +bb: + %z = zext i1 %i to i16 + %v = or i16 %z, 2 + br label %end + +end: + ret i16 %v +} + +define i32 @foo32(i1 zeroext %i) #0 { +; CHECK-LABEL: foo32: +; CHECK: # BB#0: +; CHECK-NEXT: orl $-2, %edi +; CHECK-NEXT: movl %edi, %eax +; CHECK-NEXT: retq + br label %bb + +bb: + %z = zext i1 %i to i32 + %v = or i32 %z, -2 + br label %end + +end: + ret i32 %v +} + +define i8 @foo8(i1 zeroext %i) #0 { +; CHECK-LABEL: foo8: +; CHECK: # BB#0: +; CHECK-NEXT: orb $-2, %dil +; CHECK-NEXT: movl %edi, %eax +; CHECK-NEXT: retq + br label %bb + +bb: + %z = zext i1 %i to i8 + %v = or i8 %z, -2 + br label %end + +end: + ret i8 %v +} + Index: llvm/trunk/test/CodeGen/X86/xaluo.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/xaluo.ll +++ llvm/trunk/test/CodeGen/X86/xaluo.ll @@ -1,3 +1,4 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mtriple=x86_64-darwin-unknown < %s | FileCheck %s --check-prefix=CHECK --check-prefix=SDAG ; RUN: llc -mtriple=x86_64-darwin-unknown -fast-isel -fast-isel-abort=1 < %s | FileCheck %s --check-prefix=CHECK --check-prefix=FAST ; RUN: llc -mtriple=x86_64-darwin-unknown -mcpu=knl < %s | FileCheck %s --check-prefix=KNL @@ -738,15 +739,15 @@ ; KNL-LABEL: bug27873: ; KNL: ## BB#0: ; KNL-NEXT: andl $1, %esi +; KNL-NEXT: kmovw %esi, %k0 ; KNL-NEXT: movl $160, %ecx ; KNL-NEXT: movq %rdi, %rax ; KNL-NEXT: mulq %rcx -; KNL-NEXT: kmovw %esi, %k0 ; KNL-NEXT: seto %al ; KNL-NEXT: kmovw %eax, %k1 ; KNL-NEXT: korw %k1, %k0, %k0 ; KNL-NEXT: kmovw %k0, %eax -; KNL-NEXT: # kill +; KNL-NEXT: ## kill: %AL %AL %EAX ; KNL-NEXT: retq %mul = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 %c1, i64 160) %mul.overflow = extractvalue { i64, i1 } %mul, 1