Index: lib/Target/X86/X86InstrInfo.cpp =================================================================== --- lib/Target/X86/X86InstrInfo.cpp +++ lib/Target/X86/X86InstrInfo.cpp @@ -7627,6 +7627,14 @@ return Expand2AddrUndef(MIB, get(HasAVX ? X86::VPCMPEQDrr : X86::PCMPEQDrr)); case X86::AVX2_SETALLONES: return Expand2AddrUndef(MIB, get(X86::VPCMPEQDYrr)); + case X86::AVX1_SETALLONES: { + unsigned Reg = MIB->getOperand(0).getReg(); + // VCMPPSYrri with an immediate 0xf should + // produce VCMPTRUEPS. + MIB->setDesc(get(X86::VCMPPSYrri)); + MIB.addReg(Reg, RegState::Undef).addReg(Reg, RegState::Undef).addImm(0xf); + return true; + } case X86::AVX512_512_SETALLONES: { unsigned Reg = MIB->getOperand(0).getReg(); MIB->setDesc(get(X86::VPTERNLOGDZrri)); @@ -8515,6 +8523,7 @@ Alignment = 64; break; case X86::AVX2_SETALLONES: + case X86::AVX1_SETALLONES: case X86::AVX_SET0: case X86::AVX512_256_SET0: Alignment = 32; @@ -8560,6 +8569,7 @@ case X86::V_SET0: case X86::V_SETALLONES: case X86::AVX2_SETALLONES: + case X86::AVX1_SETALLONES: case X86::AVX_SET0: case X86::AVX512_128_SET0: case X86::AVX512_256_SET0: @@ -8601,13 +8611,14 @@ else if (Opc == X86::AVX512_512_SET0 || Opc == X86::AVX512_512_SETALLONES) Ty = VectorType::get(Type::getInt32Ty(MF.getFunction()->getContext()),16); else if (Opc == X86::AVX2_SETALLONES || Opc == X86::AVX_SET0 || - Opc == X86::AVX512_256_SET0) + Opc == X86::AVX512_256_SET0 || Opc == X86::AVX1_SETALLONES) Ty = VectorType::get(Type::getInt32Ty(MF.getFunction()->getContext()), 8); else Ty = VectorType::get(Type::getInt32Ty(MF.getFunction()->getContext()), 4); bool IsAllOnes = (Opc == X86::V_SETALLONES || Opc == X86::AVX2_SETALLONES || - Opc == X86::AVX512_512_SETALLONES); + Opc == X86::AVX512_512_SETALLONES || + Opc == X86::AVX1_SETALLONES); const Constant *C = IsAllOnes ? Constant::getAllOnesValue(Ty) : Constant::getNullValue(Ty); unsigned CPI = MCP.getConstantPoolIndex(C, Alignment); Index: lib/Target/X86/X86InstrSSE.td =================================================================== --- lib/Target/X86/X86InstrSSE.td +++ lib/Target/X86/X86InstrSSE.td @@ -486,6 +486,10 @@ isPseudo = 1, SchedRW = [WriteZero] in { def V_SETALLONES : I<0, Pseudo, (outs VR128:$dst), (ins), "", [(set VR128:$dst, (v4i32 immAllOnesV))]>; + let Predicates = [HasAVX1Only, OptForMinSize] in { + def AVX1_SETALLONES: I<0, Pseudo, (outs VR256:$dst), (ins), "", + [(set VR256:$dst, (v8i32 immAllOnesV))]>; + } let Predicates = [HasAVX2] in def AVX2_SETALLONES : I<0, Pseudo, (outs VR256:$dst), (ins), "", [(set VR256:$dst, (v8i32 immAllOnesV))]>; @@ -7755,14 +7759,12 @@ []>, Sched<[WriteFShuffleLd, ReadAfterLd]>, VEX_4V, VEX_L; } - -// Without AVX2 we need to concat two v4i32 V_SETALLONES to create a 256-bit -// all ones value. -let Predicates = [HasAVX1Only] in -def : Pat<(v8i32 immAllOnesV), - (VINSERTF128rr - (INSERT_SUBREG (v8i32 (IMPLICIT_DEF)), (V_SETALLONES), sub_xmm), - (V_SETALLONES), 1)>; +// To create a 256-bit all ones value, we should produce VCMPTRUEPS +// with YMM register containing zero. +// FIXME: Avoid producing vxorps to clear the fake inputs. +let Predicates = [HasAVX1Only] in { +def : Pat<(v8i32 immAllOnesV), (VCMPPSYrri (AVX_SET0), (AVX_SET0), 0xf)>; +} multiclass vinsert_lowering { Index: test/CodeGen/X86/all-ones-vector.ll =================================================================== --- test/CodeGen/X86/all-ones-vector.ll +++ test/CodeGen/X86/all-ones-vector.ll @@ -1,4 +1,4 @@ -; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py ; RUN: llc < %s -mtriple=i386-unknown -mattr=+sse2 | FileCheck %s --check-prefix=X32-SSE ; RUN: llc < %s -mtriple=i386-unknown -mattr=+avx | FileCheck %s --check-prefix=X32-AVX --check-prefix=X32-AVX1 ; RUN: llc < %s -mtriple=i386-unknown -mattr=+avx2 | FileCheck %s --check-prefix=X32-AVX --check-prefix=X32-AVX256 --check-prefix=X32-AVX2 @@ -30,6 +30,7 @@ ; X64-AVX: # BB#0: ; X64-AVX-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 ; X64-AVX-NEXT: retq +; ret <16 x i8> } @@ -53,6 +54,7 @@ ; X64-AVX: # BB#0: ; X64-AVX-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 ; X64-AVX-NEXT: retq +; ret <8 x i16> } @@ -76,6 +78,7 @@ ; X64-AVX: # BB#0: ; X64-AVX-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 ; X64-AVX-NEXT: retq +; ret <4 x i32> } @@ -99,6 +102,7 @@ ; X64-AVX: # BB#0: ; X64-AVX-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 ; X64-AVX-NEXT: retq +; ret <2 x i64> } @@ -122,6 +126,7 @@ ; X64-AVX: # BB#0: ; X64-AVX-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 ; X64-AVX-NEXT: retq +; ret <2 x double> } @@ -145,6 +150,7 @@ ; X64-AVX: # BB#0: ; X64-AVX-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 ; X64-AVX-NEXT: retq +; ret <4 x float> } @@ -157,8 +163,8 @@ ; ; X32-AVX1-LABEL: allones_v32i8: ; X32-AVX1: # BB#0: -; X32-AVX1-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 -; X32-AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 +; X32-AVX1-NEXT: vxorps %ymm0, %ymm0, %ymm0 +; X32-AVX1-NEXT: vcmptrueps %ymm0, %ymm0, %ymm0 ; X32-AVX1-NEXT: retl ; ; X32-AVX256-LABEL: allones_v32i8: @@ -174,14 +180,15 @@ ; ; X64-AVX1-LABEL: allones_v32i8: ; X64-AVX1: # BB#0: -; X64-AVX1-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 -; X64-AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 +; X64-AVX1-NEXT: vxorps %ymm0, %ymm0, %ymm0 +; X64-AVX1-NEXT: vcmptrueps %ymm0, %ymm0, %ymm0 ; X64-AVX1-NEXT: retq ; ; X64-AVX256-LABEL: allones_v32i8: ; X64-AVX256: # BB#0: ; X64-AVX256-NEXT: vpcmpeqd %ymm0, %ymm0, %ymm0 ; X64-AVX256-NEXT: retq +; ret <32 x i8> } @@ -194,8 +201,8 @@ ; ; X32-AVX1-LABEL: allones_v16i16: ; X32-AVX1: # BB#0: -; X32-AVX1-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 -; X32-AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 +; X32-AVX1-NEXT: vxorps %ymm0, %ymm0, %ymm0 +; X32-AVX1-NEXT: vcmptrueps %ymm0, %ymm0, %ymm0 ; X32-AVX1-NEXT: retl ; ; X32-AVX256-LABEL: allones_v16i16: @@ -211,14 +218,15 @@ ; ; X64-AVX1-LABEL: allones_v16i16: ; X64-AVX1: # BB#0: -; X64-AVX1-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 -; X64-AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 +; X64-AVX1-NEXT: vxorps %ymm0, %ymm0, %ymm0 +; X64-AVX1-NEXT: vcmptrueps %ymm0, %ymm0, %ymm0 ; X64-AVX1-NEXT: retq ; ; X64-AVX256-LABEL: allones_v16i16: ; X64-AVX256: # BB#0: ; X64-AVX256-NEXT: vpcmpeqd %ymm0, %ymm0, %ymm0 ; X64-AVX256-NEXT: retq +; ret <16 x i16> } @@ -231,8 +239,8 @@ ; ; X32-AVX1-LABEL: allones_v8i32: ; X32-AVX1: # BB#0: -; X32-AVX1-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 -; X32-AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 +; X32-AVX1-NEXT: vxorps %ymm0, %ymm0, %ymm0 +; X32-AVX1-NEXT: vcmptrueps %ymm0, %ymm0, %ymm0 ; X32-AVX1-NEXT: retl ; ; X32-AVX256-LABEL: allones_v8i32: @@ -248,14 +256,15 @@ ; ; X64-AVX1-LABEL: allones_v8i32: ; X64-AVX1: # BB#0: -; X64-AVX1-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 -; X64-AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 +; X64-AVX1-NEXT: vxorps %ymm0, %ymm0, %ymm0 +; X64-AVX1-NEXT: vcmptrueps %ymm0, %ymm0, %ymm0 ; X64-AVX1-NEXT: retq ; ; X64-AVX256-LABEL: allones_v8i32: ; X64-AVX256: # BB#0: ; X64-AVX256-NEXT: vpcmpeqd %ymm0, %ymm0, %ymm0 ; X64-AVX256-NEXT: retq +; ret <8 x i32> } @@ -268,8 +277,8 @@ ; ; X32-AVX1-LABEL: allones_v4i64: ; X32-AVX1: # BB#0: -; X32-AVX1-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 -; X32-AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 +; X32-AVX1-NEXT: vxorps %ymm0, %ymm0, %ymm0 +; X32-AVX1-NEXT: vcmptrueps %ymm0, %ymm0, %ymm0 ; X32-AVX1-NEXT: retl ; ; X32-AVX256-LABEL: allones_v4i64: @@ -285,14 +294,15 @@ ; ; X64-AVX1-LABEL: allones_v4i64: ; X64-AVX1: # BB#0: -; X64-AVX1-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 -; X64-AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 +; X64-AVX1-NEXT: vxorps %ymm0, %ymm0, %ymm0 +; X64-AVX1-NEXT: vcmptrueps %ymm0, %ymm0, %ymm0 ; X64-AVX1-NEXT: retq ; ; X64-AVX256-LABEL: allones_v4i64: ; X64-AVX256: # BB#0: ; X64-AVX256-NEXT: vpcmpeqd %ymm0, %ymm0, %ymm0 ; X64-AVX256-NEXT: retq +; ret <4 x i64> } @@ -305,8 +315,8 @@ ; ; X32-AVX1-LABEL: allones_v4f64: ; X32-AVX1: # BB#0: -; X32-AVX1-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 -; X32-AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 +; X32-AVX1-NEXT: vxorps %ymm0, %ymm0, %ymm0 +; X32-AVX1-NEXT: vcmptrueps %ymm0, %ymm0, %ymm0 ; X32-AVX1-NEXT: retl ; ; X32-AVX256-LABEL: allones_v4f64: @@ -322,14 +332,15 @@ ; ; X64-AVX1-LABEL: allones_v4f64: ; X64-AVX1: # BB#0: -; X64-AVX1-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 -; X64-AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 +; X64-AVX1-NEXT: vxorps %ymm0, %ymm0, %ymm0 +; X64-AVX1-NEXT: vcmptrueps %ymm0, %ymm0, %ymm0 ; X64-AVX1-NEXT: retq ; ; X64-AVX256-LABEL: allones_v4f64: ; X64-AVX256: # BB#0: ; X64-AVX256-NEXT: vpcmpeqd %ymm0, %ymm0, %ymm0 ; X64-AVX256-NEXT: retq +; ret <4 x double> } @@ -342,8 +353,8 @@ ; ; X32-AVX1-LABEL: allones_v4f64_optsize: ; X32-AVX1: # BB#0: -; X32-AVX1-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 -; X32-AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 +; X32-AVX1-NEXT: vxorps %ymm0, %ymm0, %ymm0 +; X32-AVX1-NEXT: vcmptrueps %ymm0, %ymm0, %ymm0 ; X32-AVX1-NEXT: retl ; ; X32-AVX256-LABEL: allones_v4f64_optsize: @@ -359,14 +370,15 @@ ; ; X64-AVX1-LABEL: allones_v4f64_optsize: ; X64-AVX1: # BB#0: -; X64-AVX1-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 -; X64-AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 +; X64-AVX1-NEXT: vxorps %ymm0, %ymm0, %ymm0 +; X64-AVX1-NEXT: vcmptrueps %ymm0, %ymm0, %ymm0 ; X64-AVX1-NEXT: retq ; ; X64-AVX256-LABEL: allones_v4f64_optsize: ; X64-AVX256: # BB#0: ; X64-AVX256-NEXT: vpcmpeqd %ymm0, %ymm0, %ymm0 ; X64-AVX256-NEXT: retq +; ret <4 x double> } @@ -379,8 +391,8 @@ ; ; X32-AVX1-LABEL: allones_v8f32: ; X32-AVX1: # BB#0: -; X32-AVX1-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 -; X32-AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 +; X32-AVX1-NEXT: vxorps %ymm0, %ymm0, %ymm0 +; X32-AVX1-NEXT: vcmptrueps %ymm0, %ymm0, %ymm0 ; X32-AVX1-NEXT: retl ; ; X32-AVX256-LABEL: allones_v8f32: @@ -396,14 +408,15 @@ ; ; X64-AVX1-LABEL: allones_v8f32: ; X64-AVX1: # BB#0: -; X64-AVX1-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 -; X64-AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 +; X64-AVX1-NEXT: vxorps %ymm0, %ymm0, %ymm0 +; X64-AVX1-NEXT: vcmptrueps %ymm0, %ymm0, %ymm0 ; X64-AVX1-NEXT: retq ; ; X64-AVX256-LABEL: allones_v8f32: ; X64-AVX256: # BB#0: ; X64-AVX256-NEXT: vpcmpeqd %ymm0, %ymm0, %ymm0 ; X64-AVX256-NEXT: retq +; ret <8 x float> } @@ -416,8 +429,8 @@ ; ; X32-AVX1-LABEL: allones_v8f32_optsize: ; X32-AVX1: # BB#0: -; X32-AVX1-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 -; X32-AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 +; X32-AVX1-NEXT: vxorps %ymm0, %ymm0, %ymm0 +; X32-AVX1-NEXT: vcmptrueps %ymm0, %ymm0, %ymm0 ; X32-AVX1-NEXT: retl ; ; X32-AVX256-LABEL: allones_v8f32_optsize: @@ -433,14 +446,15 @@ ; ; X64-AVX1-LABEL: allones_v8f32_optsize: ; X64-AVX1: # BB#0: -; X64-AVX1-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 -; X64-AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 +; X64-AVX1-NEXT: vxorps %ymm0, %ymm0, %ymm0 +; X64-AVX1-NEXT: vcmptrueps %ymm0, %ymm0, %ymm0 ; X64-AVX1-NEXT: retq ; ; X64-AVX256-LABEL: allones_v8f32_optsize: ; X64-AVX256: # BB#0: ; X64-AVX256-NEXT: vpcmpeqd %ymm0, %ymm0, %ymm0 ; X64-AVX256-NEXT: retq +; ret <8 x float> } @@ -455,8 +469,8 @@ ; ; X32-AVX1-LABEL: allones_v64i8: ; X32-AVX1: # BB#0: -; X32-AVX1-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 -; X32-AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 +; X32-AVX1-NEXT: vxorps %ymm0, %ymm0, %ymm0 +; X32-AVX1-NEXT: vcmptrueps %ymm0, %ymm0, %ymm0 ; X32-AVX1-NEXT: vmovaps %ymm0, %ymm1 ; X32-AVX1-NEXT: retl ; @@ -487,8 +501,8 @@ ; ; X64-AVX1-LABEL: allones_v64i8: ; X64-AVX1: # BB#0: -; X64-AVX1-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 -; X64-AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 +; X64-AVX1-NEXT: vxorps %ymm0, %ymm0, %ymm0 +; X64-AVX1-NEXT: vcmptrueps %ymm0, %ymm0, %ymm0 ; X64-AVX1-NEXT: vmovaps %ymm0, %ymm1 ; X64-AVX1-NEXT: retq ; @@ -508,6 +522,7 @@ ; X64-SKX: # BB#0: ; X64-SKX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 ; X64-SKX-NEXT: retq +; ret <64 x i8> } @@ -522,8 +537,8 @@ ; ; X32-AVX1-LABEL: allones_v32i16: ; X32-AVX1: # BB#0: -; X32-AVX1-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 -; X32-AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 +; X32-AVX1-NEXT: vxorps %ymm0, %ymm0, %ymm0 +; X32-AVX1-NEXT: vcmptrueps %ymm0, %ymm0, %ymm0 ; X32-AVX1-NEXT: vmovaps %ymm0, %ymm1 ; X32-AVX1-NEXT: retl ; @@ -554,8 +569,8 @@ ; ; X64-AVX1-LABEL: allones_v32i16: ; X64-AVX1: # BB#0: -; X64-AVX1-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 -; X64-AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 +; X64-AVX1-NEXT: vxorps %ymm0, %ymm0, %ymm0 +; X64-AVX1-NEXT: vcmptrueps %ymm0, %ymm0, %ymm0 ; X64-AVX1-NEXT: vmovaps %ymm0, %ymm1 ; X64-AVX1-NEXT: retq ; @@ -575,6 +590,7 @@ ; X64-SKX: # BB#0: ; X64-SKX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 ; X64-SKX-NEXT: retq +; ret <32 x i16> } @@ -589,8 +605,8 @@ ; ; X32-AVX1-LABEL: allones_v16i32: ; X32-AVX1: # BB#0: -; X32-AVX1-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 -; X32-AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 +; X32-AVX1-NEXT: vxorps %ymm0, %ymm0, %ymm0 +; X32-AVX1-NEXT: vcmptrueps %ymm0, %ymm0, %ymm0 ; X32-AVX1-NEXT: vmovaps %ymm0, %ymm1 ; X32-AVX1-NEXT: retl ; @@ -615,8 +631,8 @@ ; ; X64-AVX1-LABEL: allones_v16i32: ; X64-AVX1: # BB#0: -; X64-AVX1-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 -; X64-AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 +; X64-AVX1-NEXT: vxorps %ymm0, %ymm0, %ymm0 +; X64-AVX1-NEXT: vcmptrueps %ymm0, %ymm0, %ymm0 ; X64-AVX1-NEXT: vmovaps %ymm0, %ymm1 ; X64-AVX1-NEXT: retq ; @@ -630,6 +646,7 @@ ; X64-AVX512: # BB#0: ; X64-AVX512-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 ; X64-AVX512-NEXT: retq +; ret <16 x i32> } @@ -644,8 +661,8 @@ ; ; X32-AVX1-LABEL: allones_v8i64: ; X32-AVX1: # BB#0: -; X32-AVX1-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 -; X32-AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 +; X32-AVX1-NEXT: vxorps %ymm0, %ymm0, %ymm0 +; X32-AVX1-NEXT: vcmptrueps %ymm0, %ymm0, %ymm0 ; X32-AVX1-NEXT: vmovaps %ymm0, %ymm1 ; X32-AVX1-NEXT: retl ; @@ -670,8 +687,8 @@ ; ; X64-AVX1-LABEL: allones_v8i64: ; X64-AVX1: # BB#0: -; X64-AVX1-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 -; X64-AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 +; X64-AVX1-NEXT: vxorps %ymm0, %ymm0, %ymm0 +; X64-AVX1-NEXT: vcmptrueps %ymm0, %ymm0, %ymm0 ; X64-AVX1-NEXT: vmovaps %ymm0, %ymm1 ; X64-AVX1-NEXT: retq ; @@ -685,6 +702,7 @@ ; X64-AVX512: # BB#0: ; X64-AVX512-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 ; X64-AVX512-NEXT: retq +; ret <8 x i64> } @@ -699,8 +717,8 @@ ; ; X32-AVX1-LABEL: allones_v8f64: ; X32-AVX1: # BB#0: -; X32-AVX1-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 -; X32-AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 +; X32-AVX1-NEXT: vxorps %ymm0, %ymm0, %ymm0 +; X32-AVX1-NEXT: vcmptrueps %ymm0, %ymm0, %ymm0 ; X32-AVX1-NEXT: vmovaps %ymm0, %ymm1 ; X32-AVX1-NEXT: retl ; @@ -725,8 +743,8 @@ ; ; X64-AVX1-LABEL: allones_v8f64: ; X64-AVX1: # BB#0: -; X64-AVX1-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 -; X64-AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 +; X64-AVX1-NEXT: vxorps %ymm0, %ymm0, %ymm0 +; X64-AVX1-NEXT: vcmptrueps %ymm0, %ymm0, %ymm0 ; X64-AVX1-NEXT: vmovaps %ymm0, %ymm1 ; X64-AVX1-NEXT: retq ; @@ -740,6 +758,7 @@ ; X64-AVX512: # BB#0: ; X64-AVX512-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 ; X64-AVX512-NEXT: retq +; ret <8 x double> } @@ -754,8 +773,8 @@ ; ; X32-AVX1-LABEL: allones_v16f32: ; X32-AVX1: # BB#0: -; X32-AVX1-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 -; X32-AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 +; X32-AVX1-NEXT: vxorps %ymm0, %ymm0, %ymm0 +; X32-AVX1-NEXT: vcmptrueps %ymm0, %ymm0, %ymm0 ; X32-AVX1-NEXT: vmovaps %ymm0, %ymm1 ; X32-AVX1-NEXT: retl ; @@ -780,8 +799,8 @@ ; ; X64-AVX1-LABEL: allones_v16f32: ; X64-AVX1: # BB#0: -; X64-AVX1-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 -; X64-AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 +; X64-AVX1-NEXT: vxorps %ymm0, %ymm0, %ymm0 +; X64-AVX1-NEXT: vcmptrueps %ymm0, %ymm0, %ymm0 ; X64-AVX1-NEXT: vmovaps %ymm0, %ymm1 ; X64-AVX1-NEXT: retq ; @@ -795,5 +814,6 @@ ; X64-AVX512: # BB#0: ; X64-AVX512-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 ; X64-AVX512-NEXT: retq +; ret <16 x float> } Index: test/CodeGen/X86/avx-basic.ll =================================================================== --- test/CodeGen/X86/avx-basic.ll +++ test/CodeGen/X86/avx-basic.ll @@ -34,8 +34,8 @@ define void @ones([0 x float]* nocapture %RET, [0 x float]* nocapture %aFOO) nounwind { ; CHECK-LABEL: ones: ; CHECK: ## BB#0: ## %allocas -; CHECK-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 -; CHECK-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 +; CHECK-NEXT: vxorps %ymm0, %ymm0, %ymm0 +; CHECK-NEXT: vcmptrueps %ymm0, %ymm0, %ymm0 ; CHECK-NEXT: vmovaps %ymm0, (%rdi) ; CHECK-NEXT: vzeroupper ; CHECK-NEXT: retq @@ -51,8 +51,8 @@ define void @ones2([0 x i32]* nocapture %RET, [0 x i32]* nocapture %aFOO) nounwind { ; CHECK-LABEL: ones2: ; CHECK: ## BB#0: ## %allocas -; CHECK-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 -; CHECK-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 +; CHECK-NEXT: vxorps %ymm0, %ymm0, %ymm0 +; CHECK-NEXT: vcmptrueps %ymm0, %ymm0, %ymm0 ; CHECK-NEXT: vmovaps %ymm0, (%rdi) ; CHECK-NEXT: vzeroupper ; CHECK-NEXT: retq Index: test/CodeGen/X86/avx-cvt-3.ll =================================================================== --- test/CodeGen/X86/avx-cvt-3.ll +++ test/CodeGen/X86/avx-cvt-3.ll @@ -48,16 +48,16 @@ define <8 x float> @sitofp_insert_allbits_v8i32(<8 x i32> %a0) { ; X86-LABEL: sitofp_insert_allbits_v8i32: ; X86: # BB#0: -; X86-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 -; X86-NEXT: vinsertf128 $1, %xmm1, %ymm1, %ymm1 +; X86-NEXT: vxorps %ymm1, %ymm1, %ymm1 +; X86-NEXT: vcmptrueps %ymm1, %ymm1, %ymm1 ; X86-NEXT: vblendps {{.*#+}} ymm0 = ymm1[0],ymm0[1],ymm1[2],ymm0[3],ymm1[4,5],ymm0[6,7] ; X86-NEXT: vcvtdq2ps %ymm0, %ymm0 ; X86-NEXT: retl ; ; X64-LABEL: sitofp_insert_allbits_v8i32: ; X64: # BB#0: -; X64-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 -; X64-NEXT: vinsertf128 $1, %xmm1, %ymm1, %ymm1 +; X64-NEXT: vxorps %ymm1, %ymm1, %ymm1 +; X64-NEXT: vcmptrueps %ymm1, %ymm1, %ymm1 ; X64-NEXT: vblendps {{.*#+}} ymm0 = ymm1[0],ymm0[1],ymm1[2],ymm0[3],ymm1[4,5],ymm0[6,7] ; X64-NEXT: vcvtdq2ps %ymm0, %ymm0 ; X64-NEXT: retq @@ -72,16 +72,16 @@ define <8 x float> @sitofp_shuffle_allbits_v8i32(<8 x i32> %a0) { ; X86-LABEL: sitofp_shuffle_allbits_v8i32: ; X86: # BB#0: -; X86-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 -; X86-NEXT: vinsertf128 $1, %xmm1, %ymm1, %ymm1 +; X86-NEXT: vxorps %ymm1, %ymm1, %ymm1 +; X86-NEXT: vcmptrueps %ymm1, %ymm1, %ymm1 ; X86-NEXT: vblendps {{.*#+}} ymm0 = ymm1[0],ymm0[1],ymm1[2],ymm0[3],ymm1[4],ymm0[5],ymm1[6],ymm0[7] ; X86-NEXT: vcvtdq2ps %ymm0, %ymm0 ; X86-NEXT: retl ; ; X64-LABEL: sitofp_shuffle_allbits_v8i32: ; X64: # BB#0: -; X64-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 -; X64-NEXT: vinsertf128 $1, %xmm1, %ymm1, %ymm1 +; X64-NEXT: vxorps %ymm1, %ymm1, %ymm1 +; X64-NEXT: vcmptrueps %ymm1, %ymm1, %ymm1 ; X64-NEXT: vblendps {{.*#+}} ymm0 = ymm1[0],ymm0[1],ymm1[2],ymm0[3],ymm1[4],ymm0[5],ymm1[6],ymm0[7] ; X64-NEXT: vcvtdq2ps %ymm0, %ymm0 ; X64-NEXT: retq @@ -95,8 +95,7 @@ ; X86: # BB#0: ; X86-NEXT: vxorps %ymm1, %ymm1, %ymm1 ; X86-NEXT: vblendps {{.*#+}} ymm0 = ymm1[0],ymm0[1,2,3,4,5,6,7] -; X86-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 -; X86-NEXT: vinsertf128 $1, %xmm1, %ymm1, %ymm1 +; X86-NEXT: vcmptrueps %ymm1, %ymm1, %ymm1 ; X86-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0,1],ymm1[2],ymm0[3,4,5,6,7] ; X86-NEXT: vextractf128 $1, %ymm0, %xmm1 ; X86-NEXT: movl $2, %eax @@ -111,8 +110,7 @@ ; X64: # BB#0: ; X64-NEXT: vxorps %ymm1, %ymm1, %ymm1 ; X64-NEXT: vblendps {{.*#+}} ymm0 = ymm1[0],ymm0[1,2,3,4,5,6,7] -; X64-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 -; X64-NEXT: vinsertf128 $1, %xmm1, %ymm1, %ymm1 +; X64-NEXT: vcmptrueps %ymm1, %ymm1, %ymm1 ; X64-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0,1],ymm1[2],ymm0[3,4,5,6,7] ; X64-NEXT: vextractf128 $1, %ymm0, %xmm1 ; X64-NEXT: movl $2, %eax Index: test/CodeGen/X86/avx-intrinsics-fast-isel.ll =================================================================== --- test/CodeGen/X86/avx-intrinsics-fast-isel.ll +++ test/CodeGen/X86/avx-intrinsics-fast-isel.ll @@ -99,16 +99,16 @@ define <4 x double> @test_mm256_andnot_pd(<4 x double> %a0, <4 x double> %a1) nounwind { ; X32-LABEL: test_mm256_andnot_pd: ; X32: # BB#0: -; X32-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2 -; X32-NEXT: vinsertf128 $1, %xmm2, %ymm2, %ymm2 +; X32-NEXT: vxorps %ymm2, %ymm2, %ymm2 +; X32-NEXT: vcmptrueps %ymm2, %ymm2, %ymm2 ; X32-NEXT: vxorps %ymm2, %ymm0, %ymm0 ; X32-NEXT: vandps %ymm1, %ymm0, %ymm0 ; X32-NEXT: retl ; ; X64-LABEL: test_mm256_andnot_pd: ; X64: # BB#0: -; X64-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2 -; X64-NEXT: vinsertf128 $1, %xmm2, %ymm2, %ymm2 +; X64-NEXT: vxorps %ymm2, %ymm2, %ymm2 +; X64-NEXT: vcmptrueps %ymm2, %ymm2, %ymm2 ; X64-NEXT: vxorps %ymm2, %ymm0, %ymm0 ; X64-NEXT: vandps %ymm1, %ymm0, %ymm0 ; X64-NEXT: retq Index: test/CodeGen/X86/pr28129.ll =================================================================== --- test/CodeGen/X86/pr28129.ll +++ test/CodeGen/X86/pr28129.ll @@ -1,21 +1,22 @@ -; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py ; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=avx | FileCheck %s --check-prefix=X86 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx | FileCheck %s --check-prefix=X64 define <4 x double> @cmp4f64_domain(<4 x double> %a) { ; X86-LABEL: cmp4f64_domain: ; X86: # BB#0: -; X86-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 -; X86-NEXT: vinsertf128 $1, %xmm1, %ymm1, %ymm1 +; X86-NEXT: vxorps %ymm1, %ymm1, %ymm1 +; X86-NEXT: vcmptrueps %ymm1, %ymm1, %ymm1 ; X86-NEXT: vaddpd %ymm1, %ymm0, %ymm0 ; X86-NEXT: retl ; ; X64-LABEL: cmp4f64_domain: ; X64: # BB#0: -; X64-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 -; X64-NEXT: vinsertf128 $1, %xmm1, %ymm1, %ymm1 +; X64-NEXT: vxorps %ymm1, %ymm1, %ymm1 +; X64-NEXT: vcmptrueps %ymm1, %ymm1, %ymm1 ; X64-NEXT: vaddpd %ymm1, %ymm0, %ymm0 ; X64-NEXT: retq +; %cmp = fcmp oeq <4 x double> zeroinitializer, zeroinitializer %sext = sext <4 x i1> %cmp to <4 x i64> %mask = bitcast <4 x i64> %sext to <4 x double> @@ -26,17 +27,18 @@ define <4 x double> @cmp4f64_domain_optsize(<4 x double> %a) optsize { ; X86-LABEL: cmp4f64_domain_optsize: ; X86: # BB#0: -; X86-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 -; X86-NEXT: vinsertf128 $1, %xmm1, %ymm1, %ymm1 +; X86-NEXT: vxorps %ymm1, %ymm1, %ymm1 +; X86-NEXT: vcmptrueps %ymm1, %ymm1, %ymm1 ; X86-NEXT: vaddpd %ymm1, %ymm0, %ymm0 ; X86-NEXT: retl ; ; X64-LABEL: cmp4f64_domain_optsize: ; X64: # BB#0: -; X64-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 -; X64-NEXT: vinsertf128 $1, %xmm1, %ymm1, %ymm1 +; X64-NEXT: vxorps %ymm1, %ymm1, %ymm1 +; X64-NEXT: vcmptrueps %ymm1, %ymm1, %ymm1 ; X64-NEXT: vaddpd %ymm1, %ymm0, %ymm0 ; X64-NEXT: retq +; %cmp = fcmp oeq <4 x double> zeroinitializer, zeroinitializer %sext = sext <4 x i1> %cmp to <4 x i64> %mask = bitcast <4 x i64> %sext to <4 x double> @@ -47,17 +49,18 @@ define <8 x float> @cmp8f32_domain(<8 x float> %a) { ; X86-LABEL: cmp8f32_domain: ; X86: # BB#0: -; X86-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 -; X86-NEXT: vinsertf128 $1, %xmm1, %ymm1, %ymm1 +; X86-NEXT: vxorps %ymm1, %ymm1, %ymm1 +; X86-NEXT: vcmptrueps %ymm1, %ymm1, %ymm1 ; X86-NEXT: vaddps %ymm1, %ymm0, %ymm0 ; X86-NEXT: retl ; ; X64-LABEL: cmp8f32_domain: ; X64: # BB#0: -; X64-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 -; X64-NEXT: vinsertf128 $1, %xmm1, %ymm1, %ymm1 +; X64-NEXT: vxorps %ymm1, %ymm1, %ymm1 +; X64-NEXT: vcmptrueps %ymm1, %ymm1, %ymm1 ; X64-NEXT: vaddps %ymm1, %ymm0, %ymm0 ; X64-NEXT: retq +; %cmp = fcmp oeq <8 x float> zeroinitializer, zeroinitializer %sext = sext <8 x i1> %cmp to <8 x i32> %mask = bitcast <8 x i32> %sext to <8 x float> @@ -68,17 +71,18 @@ define <8 x float> @cmp8f32_domain_optsize(<8 x float> %a) optsize { ; X86-LABEL: cmp8f32_domain_optsize: ; X86: # BB#0: -; X86-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 -; X86-NEXT: vinsertf128 $1, %xmm1, %ymm1, %ymm1 +; X86-NEXT: vxorps %ymm1, %ymm1, %ymm1 +; X86-NEXT: vcmptrueps %ymm1, %ymm1, %ymm1 ; X86-NEXT: vaddps %ymm1, %ymm0, %ymm0 ; X86-NEXT: retl ; ; X64-LABEL: cmp8f32_domain_optsize: ; X64: # BB#0: -; X64-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 -; X64-NEXT: vinsertf128 $1, %xmm1, %ymm1, %ymm1 +; X64-NEXT: vxorps %ymm1, %ymm1, %ymm1 +; X64-NEXT: vcmptrueps %ymm1, %ymm1, %ymm1 ; X64-NEXT: vaddps %ymm1, %ymm0, %ymm0 ; X64-NEXT: retq +; %cmp = fcmp oeq <8 x float> zeroinitializer, zeroinitializer %sext = sext <8 x i1> %cmp to <8 x i32> %mask = bitcast <8 x i32> %sext to <8 x float> Index: test/CodeGen/X86/vector-pcmp.ll =================================================================== --- test/CodeGen/X86/vector-pcmp.ll +++ test/CodeGen/X86/vector-pcmp.ll @@ -1,4 +1,4 @@ -; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefix=SSE --check-prefix=SSE2 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse4.2 | FileCheck %s --check-prefix=SSE --check-prefix=SSE42 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefix=AVX --check-prefix=AVX1 @@ -19,6 +19,7 @@ ; AVX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 ; AVX-NEXT: vpcmpgtb %xmm1, %xmm0, %xmm0 ; AVX-NEXT: retq +; %sign = ashr <16 x i8> %x, %not = xor <16 x i8> %sign, ret <16 x i8> %not @@ -36,6 +37,7 @@ ; AVX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 ; AVX-NEXT: vpcmpgtw %xmm1, %xmm0, %xmm0 ; AVX-NEXT: retq +; %sign = ashr <8 x i16> %x, %not = xor <8 x i16> %sign, ret <8 x i16> %not @@ -53,6 +55,7 @@ ; AVX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 ; AVX-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0 ; AVX-NEXT: retq +; %sign = ashr <4 x i32> %x, %not = xor <4 x i32> %sign, ret <4 x i32> %not @@ -78,6 +81,7 @@ ; AVX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 ; AVX-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 ; AVX-NEXT: retq +; %sign = ashr <2 x i64> %x, %not = xor <2 x i64> %sign, ret <2 x i64> %not @@ -128,6 +132,7 @@ ; AVX2-NEXT: vmovq %xmm0, %rax ; AVX2-NEXT: vpextrq $1, %xmm0, %rdx ; AVX2-NEXT: retq +; %sign = ashr <1 x i128> %x, %not = xor <1 x i128> %sign, ret <1 x i128> %not @@ -148,8 +153,8 @@ ; AVX1-NEXT: vpcmpgtb %xmm1, %xmm2, %xmm1 ; AVX1-NEXT: vpcmpgtb %xmm0, %xmm2, %xmm0 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 -; AVX1-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 -; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm1, %ymm1 +; AVX1-NEXT: vxorps %ymm1, %ymm1, %ymm1 +; AVX1-NEXT: vcmptrueps %ymm1, %ymm1, %ymm1 ; AVX1-NEXT: vxorps %ymm1, %ymm0, %ymm0 ; AVX1-NEXT: retq ; @@ -158,6 +163,7 @@ ; AVX2-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1 ; AVX2-NEXT: vpcmpgtb %ymm1, %ymm0, %ymm0 ; AVX2-NEXT: retq +; %sign = ashr <32 x i8> %x, %not = xor <32 x i8> %sign, ret <32 x i8> %not @@ -177,8 +183,8 @@ ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 ; AVX1-NEXT: vpsraw $15, %xmm0, %xmm0 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 -; AVX1-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 -; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm1, %ymm1 +; AVX1-NEXT: vxorps %ymm1, %ymm1, %ymm1 +; AVX1-NEXT: vcmptrueps %ymm1, %ymm1, %ymm1 ; AVX1-NEXT: vxorps %ymm1, %ymm0, %ymm0 ; AVX1-NEXT: retq ; @@ -187,6 +193,7 @@ ; AVX2-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1 ; AVX2-NEXT: vpcmpgtw %ymm1, %ymm0, %ymm0 ; AVX2-NEXT: retq +; %sign = ashr <16 x i16> %x, %not = xor <16 x i16> %sign, ret <16 x i16> %not @@ -206,8 +213,8 @@ ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 ; AVX1-NEXT: vpsrad $31, %xmm0, %xmm0 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 -; AVX1-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 -; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm1, %ymm1 +; AVX1-NEXT: vxorps %ymm1, %ymm1, %ymm1 +; AVX1-NEXT: vcmptrueps %ymm1, %ymm1, %ymm1 ; AVX1-NEXT: vxorps %ymm1, %ymm0, %ymm0 ; AVX1-NEXT: retq ; @@ -216,6 +223,7 @@ ; AVX2-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1 ; AVX2-NEXT: vpcmpgtd %ymm1, %ymm0, %ymm0 ; AVX2-NEXT: retq +; %sign = ashr <8 x i32> %x, %not = xor <8 x i32> %sign, ret <8 x i32> %not @@ -247,8 +255,8 @@ ; AVX1-NEXT: vpcmpgtq %xmm1, %xmm2, %xmm1 ; AVX1-NEXT: vpcmpgtq %xmm0, %xmm2, %xmm0 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 -; AVX1-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 -; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm1, %ymm1 +; AVX1-NEXT: vxorps %ymm1, %ymm1, %ymm1 +; AVX1-NEXT: vcmptrueps %ymm1, %ymm1, %ymm1 ; AVX1-NEXT: vxorps %ymm1, %ymm0, %ymm0 ; AVX1-NEXT: retq ; @@ -257,6 +265,7 @@ ; AVX2-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1 ; AVX2-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm0 ; AVX2-NEXT: retq +; %sign = ashr <4 x i64> %x, %not = xor <4 x i64> %sign, ret <4 x i64> %not @@ -274,6 +283,7 @@ ; AVX-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0 ; AVX-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0 ; AVX-NEXT: retq +; %cmp = icmp eq <16 x i8> %a, %b %zext = zext <16 x i1> %cmp to <16 x i8> ret <16 x i8> %zext @@ -303,6 +313,7 @@ ; AVX2-NEXT: vpcmpeqw %ymm1, %ymm0, %ymm0 ; AVX2-NEXT: vpsrlw $15, %ymm0, %ymm0 ; AVX2-NEXT: retq +; %cmp = icmp eq <16 x i16> %a, %b %zext = zext <16 x i1> %cmp to <16 x i16> ret <16 x i16> %zext @@ -320,6 +331,7 @@ ; AVX-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 ; AVX-NEXT: vpsrld $31, %xmm0, %xmm0 ; AVX-NEXT: retq +; %cmp = icmp eq <4 x i32> %a, %b %zext = zext <4 x i1> %cmp to <4 x i32> ret <4 x i32> %zext @@ -362,6 +374,7 @@ ; AVX2-NEXT: vpcmpeqq %ymm1, %ymm0, %ymm0 ; AVX2-NEXT: vpsrlq $63, %ymm0, %ymm0 ; AVX2-NEXT: retq +; %cmp = icmp eq <4 x i64> %a, %b %zext = zext <4 x i1> %cmp to <4 x i64> ret <4 x i64> %zext @@ -392,6 +405,7 @@ ; AVX2-NEXT: vpcmpgtb %ymm1, %ymm0, %ymm0 ; AVX2-NEXT: vpand {{.*}}(%rip), %ymm0, %ymm0 ; AVX2-NEXT: retq +; %cmp = icmp sgt <32 x i8> %a, %b %zext = zext <32 x i1> %cmp to <32 x i8> ret <32 x i8> %zext @@ -409,6 +423,7 @@ ; AVX-NEXT: vpcmpgtw %xmm1, %xmm0, %xmm0 ; AVX-NEXT: vpsrlw $15, %xmm0, %xmm0 ; AVX-NEXT: retq +; %cmp = icmp sgt <8 x i16> %a, %b %zext = zext <8 x i1> %cmp to <8 x i16> ret <8 x i16> %zext @@ -438,6 +453,7 @@ ; AVX2-NEXT: vpcmpgtd %ymm1, %ymm0, %ymm0 ; AVX2-NEXT: vpsrld $31, %ymm0, %ymm0 ; AVX2-NEXT: retq +; %cmp = icmp sgt <8 x i32> %a, %b %zext = zext <8 x i1> %cmp to <8 x i32> ret <8 x i32> %zext @@ -471,6 +487,7 @@ ; AVX-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 ; AVX-NEXT: vpsrlq $63, %xmm0, %xmm0 ; AVX-NEXT: retq +; %cmp = icmp sgt <2 x i64> %a, %b %zext = zext <2 x i1> %cmp to <2 x i64> ret <2 x i64> %zext Index: test/CodeGen/X86/xop-intrinsics-fast-isel.ll =================================================================== --- test/CodeGen/X86/xop-intrinsics-fast-isel.ll +++ test/CodeGen/X86/xop-intrinsics-fast-isel.ll @@ -499,8 +499,8 @@ define <4 x i64> @test_mm256_cmov_si256(<4 x i64> %a0, <4 x i64> %a1, <4 x i64> %a2) { ; X32-LABEL: test_mm256_cmov_si256: ; X32: # BB#0: -; X32-NEXT: vpcmpeqd %xmm3, %xmm3, %xmm3 -; X32-NEXT: vinsertf128 $1, %xmm3, %ymm3, %ymm3 +; X32-NEXT: vxorps %ymm3, %ymm3, %ymm3 +; X32-NEXT: vcmptrueps %ymm3, %ymm3, %ymm3 ; X32-NEXT: vxorps %ymm3, %ymm2, %ymm3 ; X32-NEXT: vandps %ymm2, %ymm0, %ymm0 ; X32-NEXT: vandps %ymm3, %ymm1, %ymm1 @@ -509,8 +509,8 @@ ; ; X64-LABEL: test_mm256_cmov_si256: ; X64: # BB#0: -; X64-NEXT: vpcmpeqd %xmm3, %xmm3, %xmm3 -; X64-NEXT: vinsertf128 $1, %xmm3, %ymm3, %ymm3 +; X64-NEXT: vxorps %ymm3, %ymm3, %ymm3 +; X64-NEXT: vcmptrueps %ymm3, %ymm3, %ymm3 ; X64-NEXT: vxorps %ymm3, %ymm2, %ymm3 ; X64-NEXT: vandps %ymm2, %ymm0, %ymm0 ; X64-NEXT: vandps %ymm3, %ymm1, %ymm1