Index: lib/Target/X86/X86InstrInfo.h =================================================================== --- lib/Target/X86/X86InstrInfo.h +++ lib/Target/X86/X86InstrInfo.h @@ -490,8 +490,12 @@ std::pair getExecutionDomain(const MachineInstr &MI) const override; + uint16_t getExecutionDomainCustom(const MachineInstr &MI) const; + void setExecutionDomain(MachineInstr &MI, unsigned Domain) const override; + bool setExecutionDomainCustom(MachineInstr &MI, unsigned Domain) const; + unsigned getPartialRegUpdateClearance(const MachineInstr &MI, unsigned OpNum, const TargetRegisterInfo *TRI) const override; Index: lib/Target/X86/X86InstrInfo.cpp =================================================================== --- lib/Target/X86/X86InstrInfo.cpp +++ lib/Target/X86/X86InstrInfo.cpp @@ -9694,8 +9694,6 @@ { X86::VBROADCASTSDYrr, X86::VBROADCASTSDYrr, X86::VPBROADCASTQYrr}, { X86::VBROADCASTSDYrm, X86::VBROADCASTSDYrm, X86::VPBROADCASTQYrm}, { X86::VBROADCASTF128, X86::VBROADCASTF128, X86::VBROADCASTI128 }, - { X86::VBLENDPSrri, X86::VBLENDPSrri, X86::VPBLENDDrri }, - { X86::VBLENDPSrmi, X86::VBLENDPSrmi, X86::VPBLENDDrmi }, { X86::VBLENDPSYrri, X86::VBLENDPSYrri, X86::VPBLENDDYrri }, { X86::VBLENDPSYrmi, X86::VBLENDPSYrmi, X86::VPBLENDDYrmi }, { X86::VPERMILPSYmi, X86::VPERMILPSYmi, X86::VPSHUFDYmi }, @@ -9949,6 +9947,24 @@ X86::VPXORQZrmbkz, X86::VPXORDZrmbkz }, }; +// NOTE: These should only be used by the custom domain methods. +static const uint16_t ReplaceableCustomInstrs[][3] = { + //PackedSingle PackedDouble PackedInt + { X86::BLENDPSrmi, X86::BLENDPDrmi, X86::PBLENDWrmi }, + { X86::BLENDPSrri, X86::BLENDPDrri, X86::PBLENDWrri }, + { X86::VBLENDPSrmi, X86::VBLENDPDrmi, X86::VPBLENDWrmi }, + { X86::VBLENDPSrri, X86::VBLENDPDrri, X86::VPBLENDWrri }, + { X86::VBLENDPSYrmi, X86::VBLENDPDYrmi, X86::VPBLENDWYrmi }, + { X86::VBLENDPSYrri, X86::VBLENDPDYrri, X86::VPBLENDWYrri }, +}; +static const uint16_t ReplaceableCustomAVX2Instrs[][3] = { + //PackedSingle PackedDouble PackedInt + { X86::VBLENDPSrmi, X86::VBLENDPDrmi, X86::VPBLENDDrmi }, + { X86::VBLENDPSrri, X86::VBLENDPDrri, X86::VPBLENDDrri }, + { X86::VBLENDPSYrmi, X86::VBLENDPDYrmi, X86::VPBLENDDYrmi }, + { X86::VBLENDPSYrri, X86::VBLENDPDYrri, X86::VPBLENDDYrri }, +}; + // FIXME: Some shuffle and unpack instructions have equivalents in different // domains, but they require a bit more work than just switching opcodes. @@ -9969,13 +9985,181 @@ return nullptr; } +// Helper to attempt to widen/narrow blend masks. +static bool AdjustBlendMask(unsigned OldMask, unsigned OldWidth, + unsigned NewWidth, unsigned *pNewMask = nullptr) { + assert(((OldWidth % NewWidth) == 0 || (NewWidth % OldWidth) == 0) && + "Illegal blend mask scale"); + unsigned NewMask = 0; + + if ((OldWidth % NewWidth) == 0) { + unsigned Scale = OldWidth / NewWidth; + unsigned SubMask = (1u << Scale) - 1; + for (unsigned i = 0; i != NewWidth; ++i) { + unsigned Sub = (OldMask >> (i * Scale)) & SubMask; + if (Sub == SubMask) + NewMask |= (1u << i); + else if (Sub != 0x0) + return false; + } + } else { + unsigned Scale = NewWidth / OldWidth; + unsigned SubMask = (1u << Scale) - 1; + for (unsigned i = 0; i != OldWidth; ++i) { + if (OldMask & (1 << i)) { + NewMask |= (SubMask << (i * Scale)); + } + } + } + + if (pNewMask) + *pNewMask = NewMask; + return true; +} + +uint16_t X86InstrInfo::getExecutionDomainCustom(const MachineInstr &MI) const { + unsigned Opcode = MI.getOpcode(); + unsigned NumOperands = MI.getNumOperands(); + + auto GetBlendDomains = [&](unsigned ImmWidth, bool Is256) { + uint16_t validDomains = 0; + if (MI.getOperand(NumOperands - 1).isImm()) { + unsigned Imm = MI.getOperand(NumOperands - 1).getImm(); + if (AdjustBlendMask(Imm, ImmWidth, Is256 ? 8 : 4)) + validDomains |= 0x2; // PackedSingle + if (AdjustBlendMask(Imm, ImmWidth, Is256 ? 4 : 2)) + validDomains |= 0x4; // PackedDouble + if (!Is256 || Subtarget.hasAVX2()) + validDomains |= 0x8; // PackedInt + } + return validDomains; + }; + + switch (Opcode) { + case X86::BLENDPDrmi: + case X86::BLENDPDrri: + case X86::VBLENDPDrmi: + case X86::VBLENDPDrri: + return GetBlendDomains(2, false); + case X86::VBLENDPDYrmi: + case X86::VBLENDPDYrri: + return GetBlendDomains(4, true); + case X86::BLENDPSrmi: + case X86::BLENDPSrri: + case X86::VBLENDPSrmi: + case X86::VBLENDPSrri: + case X86::VPBLENDDrmi: + case X86::VPBLENDDrri: + return GetBlendDomains(4, false); + case X86::VBLENDPSYrmi: + case X86::VBLENDPSYrri: + case X86::VPBLENDDYrmi: + case X86::VPBLENDDYrri: + return GetBlendDomains(8, true); + case X86::PBLENDWrmi: + case X86::PBLENDWrri: + case X86::VPBLENDWrmi: + case X86::VPBLENDWrri: + // Treat VPBLENDWY as a 128-bit vector as it repeats the lo/hi masks. + case X86::VPBLENDWYrmi: + case X86::VPBLENDWYrri: + return GetBlendDomains(8, false); + } + return 0; +} + +bool X86InstrInfo::setExecutionDomainCustom(MachineInstr &MI, + unsigned Domain) const { + assert(Domain > 0 && Domain < 4 && "Invalid execution domain"); + uint16_t dom = (MI.getDesc().TSFlags >> X86II::SSEDomainShift) & 3; + assert(dom && "Not an SSE instruction"); + + unsigned Opcode = MI.getOpcode(); + unsigned NumOperands = MI.getNumOperands(); + + auto SetBlendDomain = [&](unsigned ImmWidth, bool Is256, + bool Repeat = false) { + if (MI.getOperand(NumOperands - 1).isImm()) { + assert((!Repeat || ImmWidth == 16) && "Illegal VPBLENDWY mask"); + unsigned Imm = MI.getOperand(NumOperands - 1).getImm(); + Imm = (Repeat ? ((Imm & 255) << 8) | (Imm & 255) : Imm); + unsigned NewImm = Imm; + + const uint16_t *table = lookup(Opcode, dom, ReplaceableCustomInstrs); + if (!table) + table = lookup(Opcode, dom, ReplaceableCustomAVX2Instrs); + + if (Domain == 1) { // PackedSingle + AdjustBlendMask(Imm, ImmWidth, Is256 ? 8 : 4, &NewImm); + } else if (Domain == 2) { // PackedDouble + AdjustBlendMask(Imm, ImmWidth, Is256 ? 4 : 2, &NewImm); + } else if (Domain == 3) { // PackedInt + if (Subtarget.hasAVX2()) { + if (AdjustBlendMask(Imm, ImmWidth, Is256 ? 8 : 4, &NewImm)) + table = lookup(Opcode, dom, ReplaceableCustomAVX2Instrs); + else { + AdjustBlendMask(Imm, ImmWidth, Is256 ? 16 : 8, &NewImm); + assert((!Is256 || ((NewImm >> 8) & 255) == (NewImm & 255)) && + "Illegal VPBLENDWY mask"); + NewImm &= 255; + } + } else { + assert(!Is256 && "128-bit vector expected"); + AdjustBlendMask(Imm, ImmWidth, 8, &NewImm); + } + } + + assert(table && table[Domain - 1] && "Unknown domain op"); + MI.setDesc(get(table[Domain - 1])); + MI.getOperand(NumOperands - 1).setImm(NewImm); + } + return true; + }; + + switch (Opcode) { + case X86::BLENDPDrmi: + case X86::BLENDPDrri: + case X86::VBLENDPDrmi: + case X86::VBLENDPDrri: + return SetBlendDomain(2, false); + case X86::VBLENDPDYrmi: + case X86::VBLENDPDYrri: + return SetBlendDomain(4, true); + case X86::BLENDPSrmi: + case X86::BLENDPSrri: + case X86::VBLENDPSrmi: + case X86::VBLENDPSrri: + case X86::VPBLENDDrmi: + case X86::VPBLENDDrri: + return SetBlendDomain(4, false); + case X86::VBLENDPSYrmi: + case X86::VBLENDPSYrri: + case X86::VPBLENDDYrmi: + case X86::VPBLENDDYrri: + return SetBlendDomain(8, true); + case X86::PBLENDWrmi: + case X86::PBLENDWrri: + case X86::VPBLENDWrmi: + case X86::VPBLENDWrri: + return SetBlendDomain(8, false); + case X86::VPBLENDWYrmi: + case X86::VPBLENDWYrri: + return SetBlendDomain(16, true, true); + } + return false; +} + std::pair X86InstrInfo::getExecutionDomain(const MachineInstr &MI) const { uint16_t domain = (MI.getDesc().TSFlags >> X86II::SSEDomainShift) & 3; unsigned opcode = MI.getOpcode(); uint16_t validDomains = 0; if (domain) { - if (lookup(MI.getOpcode(), domain, ReplaceableInstrs)) { + // Attempt to match for custom instructions. + if (validDomains = getExecutionDomainCustom(MI)) + return std::make_pair(domain, validDomains); + + if (lookup(opcode, domain, ReplaceableInstrs)) { validDomains = 0xe; } else if (lookup(opcode, domain, ReplaceableInstrsAVX2)) { validDomains = Subtarget.hasAVX2() ? 0xe : 0x6; @@ -10007,6 +10191,11 @@ assert(Domain>0 && Domain<4 && "Invalid execution domain"); uint16_t dom = (MI.getDesc().TSFlags >> X86II::SSEDomainShift) & 3; assert(dom && "Not an SSE instruction"); + + // Attempt to match for custom instructions. + if (setExecutionDomainCustom(MI, Domain)) + return; + const uint16_t *table = lookup(MI.getOpcode(), dom, ReplaceableInstrs); if (!table) { // try the other table assert((Subtarget.hasAVX2() || Domain < 3) && Index: test/CodeGen/X86/avx-cast.ll =================================================================== --- test/CodeGen/X86/avx-cast.ll +++ test/CodeGen/X86/avx-cast.ll @@ -21,8 +21,8 @@ ; AVX-LABEL: castB: ; AVX: ## %bb.0: ; AVX-NEXT: ## kill: def %xmm0 killed %xmm0 def %ymm0 -; AVX-NEXT: vxorpd %xmm1, %xmm1, %xmm1 -; AVX-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3] +; AVX-NEXT: vxorps %xmm1, %xmm1, %xmm1 +; AVX-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm1[4,5,6,7] ; AVX-NEXT: retq %shuffle.i = shufflevector <2 x double> %m, <2 x double> zeroinitializer, <4 x i32> ret <4 x double> %shuffle.i @@ -31,19 +31,12 @@ ; AVX2 is needed for integer types. define <4 x i64> @castC(<2 x i64> %m) nounwind uwtable readnone ssp { -; AVX1-LABEL: castC: -; AVX1: ## %bb.0: -; AVX1-NEXT: ## kill: def %xmm0 killed %xmm0 def %ymm0 -; AVX1-NEXT: vxorpd %xmm1, %xmm1, %xmm1 -; AVX1-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3] -; AVX1-NEXT: retq -; -; AVX2-LABEL: castC: -; AVX2: ## %bb.0: -; AVX2-NEXT: ## kill: def %xmm0 killed %xmm0 def %ymm0 -; AVX2-NEXT: vxorps %xmm1, %xmm1, %xmm1 -; AVX2-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm1[4,5,6,7] -; AVX2-NEXT: retq +; AVX-LABEL: castC: +; AVX: ## %bb.0: +; AVX-NEXT: ## kill: def %xmm0 killed %xmm0 def %ymm0 +; AVX-NEXT: vxorps %xmm1, %xmm1, %xmm1 +; AVX-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm1[4,5,6,7] +; AVX-NEXT: retq %shuffle.i = shufflevector <2 x i64> %m, <2 x i64> zeroinitializer, <4 x i32> ret <4 x i64> %shuffle.i } Index: test/CodeGen/X86/avx-insertelt.ll =================================================================== --- test/CodeGen/X86/avx-insertelt.ll +++ test/CodeGen/X86/avx-insertelt.ll @@ -16,7 +16,7 @@ ; ALL-LABEL: insert_f64: ; ALL: # %bb.0: ; ALL-NEXT: # kill: def %xmm1 killed %xmm1 def %ymm1 -; ALL-NEXT: vblendpd {{.*#+}} ymm0 = ymm1[0],ymm0[1,2,3] +; ALL-NEXT: vblendps {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,3,4,5,6,7] ; ALL-NEXT: retq %i0 = insertelement <4 x double> %y, double %f, i32 0 ret <4 x double> %i0 Index: test/CodeGen/X86/avx-intrinsics-fast-isel.ll =================================================================== --- test/CodeGen/X86/avx-intrinsics-fast-isel.ll +++ test/CodeGen/X86/avx-intrinsics-fast-isel.ll @@ -141,12 +141,12 @@ define <4 x double> @test_mm256_blend_pd(<4 x double> %a0, <4 x double> %a1) nounwind { ; X32-LABEL: test_mm256_blend_pd: ; X32: # %bb.0: -; X32-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2],ymm0[3] +; X32-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3,4,5],ymm0[6,7] ; X32-NEXT: retl ; ; X64-LABEL: test_mm256_blend_pd: ; X64: # %bb.0: -; X64-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2],ymm0[3] +; X64-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3,4,5],ymm0[6,7] ; X64-NEXT: retq %res = shufflevector <4 x double> %a0, <4 x double> %a1, <4 x i32> ret <4 x double> %res @@ -1044,13 +1044,13 @@ ; X32-LABEL: test_mm256_insertf128_pd: ; X32: # %bb.0: ; X32-NEXT: # kill: def %xmm1 killed %xmm1 def %ymm1 -; X32-NEXT: vblendpd {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,3] +; X32-NEXT: vblendps {{.*#+}} ymm0 = ymm1[0,1,2,3],ymm0[4,5,6,7] ; X32-NEXT: retl ; ; X64-LABEL: test_mm256_insertf128_pd: ; X64: # %bb.0: ; X64-NEXT: # kill: def %xmm1 killed %xmm1 def %ymm1 -; X64-NEXT: vblendpd {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,3] +; X64-NEXT: vblendps {{.*#+}} ymm0 = ymm1[0,1,2,3],ymm0[4,5,6,7] ; X64-NEXT: retq %ext = shufflevector <2 x double> %a1, <2 x double> %a1, <4 x i32> %res = shufflevector <4 x double> %a0, <4 x double> %ext, <4 x i32> @@ -1076,13 +1076,13 @@ ; X32-LABEL: test_mm256_insertf128_si256: ; X32: # %bb.0: ; X32-NEXT: # kill: def %xmm1 killed %xmm1 def %ymm1 -; X32-NEXT: vblendpd {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,3] +; X32-NEXT: vblendps {{.*#+}} ymm0 = ymm1[0,1,2,3],ymm0[4,5,6,7] ; X32-NEXT: retl ; ; X64-LABEL: test_mm256_insertf128_si256: ; X64: # %bb.0: ; X64-NEXT: # kill: def %xmm1 killed %xmm1 def %ymm1 -; X64-NEXT: vblendpd {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,3] +; X64-NEXT: vblendps {{.*#+}} ymm0 = ymm1[0,1,2,3],ymm0[4,5,6,7] ; X64-NEXT: retq %ext = shufflevector <2 x i64> %a1, <2 x i64> %a1, <4 x i32> %res = shufflevector <4 x i64> %a0, <4 x i64> %ext, <4 x i32> Index: test/CodeGen/X86/avx-intrinsics-x86-upgrade.ll =================================================================== --- test/CodeGen/X86/avx-intrinsics-x86-upgrade.ll +++ test/CodeGen/X86/avx-intrinsics-x86-upgrade.ll @@ -40,7 +40,7 @@ ; CHECK-LABEL: test_x86_avx_vinsertf128_si_256_2: ; CHECK: # %bb.0: ; CHECK-NEXT: # kill: def %xmm1 killed %xmm1 def %ymm1 -; CHECK-NEXT: vblendpd {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,3] +; CHECK-NEXT: vblendps {{.*#+}} ymm0 = ymm1[0,1,2,3],ymm0[4,5,6,7] ; CHECK-NEXT: ret{{[l|q]}} %res = call <8 x i32> @llvm.x86.avx.vinsertf128.si.256(<8 x i32> %a0, <4 x i32> %a1, i8 2) ret <8 x i32> %res @@ -133,7 +133,7 @@ define <4 x double> @test_x86_avx_blend_pd_256(<4 x double> %a0, <4 x double> %a1) { ; CHECK-LABEL: test_x86_avx_blend_pd_256: ; CHECK: # %bb.0: -; CHECK-NEXT: vblendpd {{.*#+}} ymm0 = ymm1[0,1,2],ymm0[3] +; CHECK-NEXT: vblendps {{.*#+}} ymm0 = ymm1[0,1,2,3,4,5],ymm0[6,7] ; CHECK-NEXT: ret{{[l|q]}} %res = call <4 x double> @llvm.x86.avx.blend.pd.256(<4 x double> %a0, <4 x double> %a1, i32 7) ; <<4 x double>> [#uses=1] ret <4 x double> %res @@ -188,7 +188,7 @@ define <2 x double> @test_x86_sse41_blendpd(<2 x double> %a0, <2 x double> %a1) { ; CHECK-LABEL: test_x86_sse41_blendpd: ; CHECK: # %bb.0: -; CHECK-NEXT: vblendpd {{.*#+}} xmm0 = xmm0[0],xmm1[1] +; CHECK-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3] ; CHECK-NEXT: ret{{[l|q]}} %res = call <2 x double> @llvm.x86.sse41.blendpd(<2 x double> %a0, <2 x double> %a1, i8 2) ; <<2 x double>> [#uses=1] ret <2 x double> %res Index: test/CodeGen/X86/avx-vperm2x128.ll =================================================================== --- test/CodeGen/X86/avx-vperm2x128.ll +++ test/CodeGen/X86/avx-vperm2x128.ll @@ -37,7 +37,7 @@ define <8 x float> @shuffle_v8f32_0123cdef(<8 x float> %a, <8 x float> %b) nounwind uwtable readnone ssp { ; ALL-LABEL: shuffle_v8f32_0123cdef: ; ALL: # %bb.0: # %entry -; ALL-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3] +; ALL-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm1[4,5,6,7] ; ALL-NEXT: retq entry: %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> @@ -380,8 +380,8 @@ define <4 x double> @shuffle_v4f64_zz23(<4 x double> %a) { ; ALL-LABEL: shuffle_v4f64_zz23: ; ALL: # %bb.0: -; ALL-NEXT: vxorpd %xmm1, %xmm1, %xmm1 -; ALL-NEXT: vblendpd {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,3] +; ALL-NEXT: vxorps %xmm1, %xmm1, %xmm1 +; ALL-NEXT: vblendps {{.*#+}} ymm0 = ymm1[0,1,2,3],ymm0[4,5,6,7] ; ALL-NEXT: retq %s = shufflevector <4 x double> %a, <4 x double> , <4 x i32> ret <4 x double> %s @@ -389,8 +389,8 @@ define <4 x double> @shuffle_v4f64_zz23_optsize(<4 x double> %a) optsize { ; ALL-LABEL: shuffle_v4f64_zz23_optsize: ; ALL: # %bb.0: -; ALL-NEXT: vxorpd %xmm1, %xmm1, %xmm1 -; ALL-NEXT: vblendpd {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,3] +; ALL-NEXT: vxorps %xmm1, %xmm1, %xmm1 +; ALL-NEXT: vblendps {{.*#+}} ymm0 = ymm1[0,1,2,3],ymm0[4,5,6,7] ; ALL-NEXT: retq %s = shufflevector <4 x double> %a, <4 x double> , <4 x i32> ret <4 x double> %s @@ -416,8 +416,8 @@ define <4 x double> @shuffle_v4f64_zz67(<4 x double> %a) { ; ALL-LABEL: shuffle_v4f64_zz67: ; ALL: # %bb.0: -; ALL-NEXT: vxorpd %xmm1, %xmm1, %xmm1 -; ALL-NEXT: vblendpd {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,3] +; ALL-NEXT: vxorps %xmm1, %xmm1, %xmm1 +; ALL-NEXT: vblendps {{.*#+}} ymm0 = ymm1[0,1,2,3],ymm0[4,5,6,7] ; ALL-NEXT: retq %s = shufflevector <4 x double> , <4 x double> %a, <4 x i32> ret <4 x double> %s @@ -425,8 +425,8 @@ define <4 x double> @shuffle_v4f64_zz67_optsize(<4 x double> %a) optsize { ; ALL-LABEL: shuffle_v4f64_zz67_optsize: ; ALL: # %bb.0: -; ALL-NEXT: vxorpd %xmm1, %xmm1, %xmm1 -; ALL-NEXT: vblendpd {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,3] +; ALL-NEXT: vxorps %xmm1, %xmm1, %xmm1 +; ALL-NEXT: vblendps {{.*#+}} ymm0 = ymm1[0,1,2,3],ymm0[4,5,6,7] ; ALL-NEXT: retq %s = shufflevector <4 x double> , <4 x double> %a, <4 x i32> ret <4 x double> %s @@ -435,8 +435,8 @@ define <4 x double> @shuffle_v4f64_01zz(<4 x double> %a) { ; ALL-LABEL: shuffle_v4f64_01zz: ; ALL: # %bb.0: -; ALL-NEXT: vxorpd %xmm1, %xmm1, %xmm1 -; ALL-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3] +; ALL-NEXT: vxorps %xmm1, %xmm1, %xmm1 +; ALL-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm1[4,5,6,7] ; ALL-NEXT: retq %s = shufflevector <4 x double> %a, <4 x double> , <4 x i32> ret <4 x double> %s @@ -444,8 +444,8 @@ define <4 x double> @shuffle_v4f64_01zz_optsize(<4 x double> %a) optsize { ; ALL-LABEL: shuffle_v4f64_01zz_optsize: ; ALL: # %bb.0: -; ALL-NEXT: vxorpd %xmm1, %xmm1, %xmm1 -; ALL-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3] +; ALL-NEXT: vxorps %xmm1, %xmm1, %xmm1 +; ALL-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm1[4,5,6,7] ; ALL-NEXT: retq %s = shufflevector <4 x double> %a, <4 x double> , <4 x i32> ret <4 x double> %s @@ -471,8 +471,8 @@ define <4 x double> @shuffle_v4f64_45zz(<4 x double> %a) { ; ALL-LABEL: shuffle_v4f64_45zz: ; ALL: # %bb.0: -; ALL-NEXT: vxorpd %xmm1, %xmm1, %xmm1 -; ALL-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3] +; ALL-NEXT: vxorps %xmm1, %xmm1, %xmm1 +; ALL-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm1[4,5,6,7] ; ALL-NEXT: retq %s = shufflevector <4 x double> , <4 x double> %a, <4 x i32> ret <4 x double> %s @@ -480,8 +480,8 @@ define <4 x double> @shuffle_v4f64_45zz_optsize(<4 x double> %a) optsize { ; ALL-LABEL: shuffle_v4f64_45zz_optsize: ; ALL: # %bb.0: -; ALL-NEXT: vxorpd %xmm1, %xmm1, %xmm1 -; ALL-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3] +; ALL-NEXT: vxorps %xmm1, %xmm1, %xmm1 +; ALL-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm1[4,5,6,7] ; ALL-NEXT: retq %s = shufflevector <4 x double> , <4 x double> %a, <4 x i32> ret <4 x double> %s @@ -511,7 +511,7 @@ ; AVX1: # %bb.0: ; AVX1-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],zero,zero ; AVX1-NEXT: vpaddq %xmm0, %xmm1, %xmm0 -; AVX1-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3] +; AVX1-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm1[4,5,6,7] ; AVX1-NEXT: retq ; ; AVX2-LABEL: shuffle_v4i64_67zz: Index: test/CodeGen/X86/avx512-shuffles/partial_permute.ll =================================================================== --- test/CodeGen/X86/avx512-shuffles/partial_permute.ll +++ test/CodeGen/X86/avx512-shuffles/partial_permute.ll @@ -1115,9 +1115,9 @@ define <4 x i32> @test_8xi32_to_4xi32_perm_mask3(<8 x i32> %vec) { ; CHECK-LABEL: test_8xi32_to_4xi32_perm_mask3: ; CHECK: # %bb.0: -; CHECK-NEXT: vextractf128 $1, %ymm0, %xmm1 -; CHECK-NEXT: vblendps {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3] -; CHECK-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[1,3,2,1] +; CHECK-NEXT: vextracti128 $1, %ymm0, %xmm1 +; CHECK-NEXT: vpblendd {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3] +; CHECK-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,3,2,1] ; CHECK-NEXT: vzeroupper ; CHECK-NEXT: retq %res = shufflevector <8 x i32> %vec, <8 x i32> undef, <4 x i32> @@ -3087,9 +3087,9 @@ define <4 x float> @test_masked_8xfloat_to_4xfloat_perm_mem_mask1(<8 x float>* %vp, <4 x float> %vec2, <4 x float> %mask) { ; CHECK-LABEL: test_masked_8xfloat_to_4xfloat_perm_mem_mask1: ; CHECK: # %bb.0: -; CHECK-NEXT: vmovdqa (%rdi), %ymm2 -; CHECK-NEXT: vextracti128 $1, %ymm2, %xmm3 -; CHECK-NEXT: vpblendd {{.*#+}} xmm2 = xmm3[0,1,2],xmm2[3] +; CHECK-NEXT: vmovaps (%rdi), %ymm2 +; CHECK-NEXT: vextractf128 $1, %ymm2, %xmm3 +; CHECK-NEXT: vblendps {{.*#+}} xmm2 = xmm3[0,1,2],xmm2[3] ; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3 ; CHECK-NEXT: vcmpeqps %xmm3, %xmm1, %k1 ; CHECK-NEXT: vpermilps {{.*#+}} xmm0 {%k1} = xmm2[2,3,3,2] @@ -3105,9 +3105,9 @@ define <4 x float> @test_masked_z_8xfloat_to_4xfloat_perm_mem_mask1(<8 x float>* %vp, <4 x float> %mask) { ; CHECK-LABEL: test_masked_z_8xfloat_to_4xfloat_perm_mem_mask1: ; CHECK: # %bb.0: -; CHECK-NEXT: vmovdqa (%rdi), %ymm1 -; CHECK-NEXT: vextracti128 $1, %ymm1, %xmm2 -; CHECK-NEXT: vpblendd {{.*#+}} xmm1 = xmm2[0,1,2],xmm1[3] +; CHECK-NEXT: vmovaps (%rdi), %ymm1 +; CHECK-NEXT: vextractf128 $1, %ymm1, %xmm2 +; CHECK-NEXT: vblendps {{.*#+}} xmm1 = xmm2[0,1,2],xmm1[3] ; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2 ; CHECK-NEXT: vcmpeqps %xmm2, %xmm0, %k1 ; CHECK-NEXT: vpermilps {{.*#+}} xmm0 {%k1} {z} = xmm1[2,3,3,2] @@ -3401,10 +3401,10 @@ define <4 x float> @test_masked_16xfloat_to_4xfloat_perm_mask1(<16 x float> %vec, <4 x float> %vec2, <4 x float> %mask) { ; CHECK-LABEL: test_masked_16xfloat_to_4xfloat_perm_mask1: ; CHECK: # %bb.0: -; CHECK-NEXT: vextracti64x4 $1, %zmm0, %ymm3 -; CHECK-NEXT: vextracti128 $1, %ymm0, %xmm0 -; CHECK-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,2] -; CHECK-NEXT: vpblendd {{.*#+}} xmm0 = xmm3[0],xmm0[1],xmm3[2],xmm0[3] +; CHECK-NEXT: vextractf64x4 $1, %zmm0, %ymm3 +; CHECK-NEXT: vextractf128 $1, %ymm0, %xmm0 +; CHECK-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,2,2,2] +; CHECK-NEXT: vblendps {{.*#+}} xmm0 = xmm3[0],xmm0[1],xmm3[2],xmm0[3] ; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3 ; CHECK-NEXT: vcmpeqps %xmm3, %xmm2, %k1 ; CHECK-NEXT: vblendmps %xmm0, %xmm1, %xmm0 {%k1} @@ -3419,10 +3419,10 @@ define <4 x float> @test_masked_z_16xfloat_to_4xfloat_perm_mask1(<16 x float> %vec, <4 x float> %mask) { ; CHECK-LABEL: test_masked_z_16xfloat_to_4xfloat_perm_mask1: ; CHECK: # %bb.0: -; CHECK-NEXT: vextracti64x4 $1, %zmm0, %ymm2 -; CHECK-NEXT: vextracti128 $1, %ymm0, %xmm0 -; CHECK-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,2] -; CHECK-NEXT: vpblendd {{.*#+}} xmm0 = xmm2[0],xmm0[1],xmm2[2],xmm0[3] +; CHECK-NEXT: vextractf64x4 $1, %zmm0, %ymm2 +; CHECK-NEXT: vextractf128 $1, %ymm0, %xmm0 +; CHECK-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,2,2,2] +; CHECK-NEXT: vblendps {{.*#+}} xmm0 = xmm2[0],xmm0[1],xmm2[2],xmm0[3] ; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2 ; CHECK-NEXT: vcmpeqps %xmm2, %xmm1, %k1 ; CHECK-NEXT: vmovaps %xmm0, %xmm0 {%k1} {z} @@ -3481,11 +3481,11 @@ define <4 x float> @test_masked_16xfloat_to_4xfloat_perm_mask3(<16 x float> %vec, <4 x float> %vec2, <4 x float> %mask) { ; CHECK-LABEL: test_masked_16xfloat_to_4xfloat_perm_mask3: ; CHECK: # %bb.0: -; CHECK-NEXT: vmovdqa {{.*#+}} ymm3 = [0,2,4,6,4,6,6,7] -; CHECK-NEXT: vpermd %ymm0, %ymm3, %ymm3 -; CHECK-NEXT: vextracti64x4 $1, %zmm0, %ymm0 -; CHECK-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,1,3,3] -; CHECK-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0],xmm3[1],xmm0[2],xmm3[3] +; CHECK-NEXT: vmovaps {{.*#+}} ymm3 = [0,2,4,6,4,6,6,7] +; CHECK-NEXT: vpermps %ymm0, %ymm3, %ymm3 +; CHECK-NEXT: vextractf64x4 $1, %zmm0, %ymm0 +; CHECK-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[2,1,3,3] +; CHECK-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm3[1],xmm0[2],xmm3[3] ; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3 ; CHECK-NEXT: vcmpeqps %xmm3, %xmm2, %k1 ; CHECK-NEXT: vblendmps %xmm0, %xmm1, %xmm0 {%k1} @@ -3500,11 +3500,11 @@ define <4 x float> @test_masked_z_16xfloat_to_4xfloat_perm_mask3(<16 x float> %vec, <4 x float> %mask) { ; CHECK-LABEL: test_masked_z_16xfloat_to_4xfloat_perm_mask3: ; CHECK: # %bb.0: -; CHECK-NEXT: vmovdqa {{.*#+}} ymm2 = [0,2,4,6,4,6,6,7] -; CHECK-NEXT: vpermd %ymm0, %ymm2, %ymm2 -; CHECK-NEXT: vextracti64x4 $1, %zmm0, %ymm0 -; CHECK-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,1,3,3] -; CHECK-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0],xmm2[1],xmm0[2],xmm2[3] +; CHECK-NEXT: vmovaps {{.*#+}} ymm2 = [0,2,4,6,4,6,6,7] +; CHECK-NEXT: vpermps %ymm0, %ymm2, %ymm2 +; CHECK-NEXT: vextractf64x4 $1, %zmm0, %ymm0 +; CHECK-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[2,1,3,3] +; CHECK-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm2[1],xmm0[2],xmm2[3] ; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2 ; CHECK-NEXT: vcmpeqps %xmm2, %xmm1, %k1 ; CHECK-NEXT: vmovaps %xmm0, %xmm0 {%k1} {z} @@ -3703,12 +3703,12 @@ define <4 x float> @test_masked_16xfloat_to_4xfloat_perm_mem_mask0(<16 x float>* %vp, <4 x float> %vec2, <4 x float> %mask) { ; CHECK-LABEL: test_masked_16xfloat_to_4xfloat_perm_mem_mask0: ; CHECK: # %bb.0: -; CHECK-NEXT: vmovdqa64 (%rdi), %zmm2 -; CHECK-NEXT: vextracti128 $1, %ymm2, %xmm3 -; CHECK-NEXT: vpshufd {{.*#+}} xmm3 = xmm3[0,2,3,3] -; CHECK-NEXT: vextracti64x4 $1, %zmm2, %ymm2 -; CHECK-NEXT: vpermq {{.*#+}} ymm2 = ymm2[3,1,2,3] -; CHECK-NEXT: vpblendd {{.*#+}} xmm2 = xmm2[0],xmm3[1,2],xmm2[3] +; CHECK-NEXT: vmovaps (%rdi), %zmm2 +; CHECK-NEXT: vextractf128 $1, %ymm2, %xmm3 +; CHECK-NEXT: vpermilps {{.*#+}} xmm3 = xmm3[0,2,3,3] +; CHECK-NEXT: vextractf64x4 $1, %zmm2, %ymm2 +; CHECK-NEXT: vpermpd {{.*#+}} ymm2 = ymm2[3,1,2,3] +; CHECK-NEXT: vblendps {{.*#+}} xmm2 = xmm2[0],xmm3[1,2],xmm2[3] ; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3 ; CHECK-NEXT: vcmpeqps %xmm3, %xmm1, %k1 ; CHECK-NEXT: vmovaps %xmm2, %xmm0 {%k1} @@ -3724,12 +3724,12 @@ define <4 x float> @test_masked_z_16xfloat_to_4xfloat_perm_mem_mask0(<16 x float>* %vp, <4 x float> %mask) { ; CHECK-LABEL: test_masked_z_16xfloat_to_4xfloat_perm_mem_mask0: ; CHECK: # %bb.0: -; CHECK-NEXT: vmovdqa64 (%rdi), %zmm1 -; CHECK-NEXT: vextracti128 $1, %ymm1, %xmm2 -; CHECK-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[0,2,3,3] -; CHECK-NEXT: vextracti64x4 $1, %zmm1, %ymm1 -; CHECK-NEXT: vpermq {{.*#+}} ymm1 = ymm1[3,1,2,3] -; CHECK-NEXT: vpblendd {{.*#+}} xmm1 = xmm1[0],xmm2[1,2],xmm1[3] +; CHECK-NEXT: vmovaps (%rdi), %zmm1 +; CHECK-NEXT: vextractf128 $1, %ymm1, %xmm2 +; CHECK-NEXT: vpermilps {{.*#+}} xmm2 = xmm2[0,2,3,3] +; CHECK-NEXT: vextractf64x4 $1, %zmm1, %ymm1 +; CHECK-NEXT: vpermpd {{.*#+}} ymm1 = ymm1[3,1,2,3] +; CHECK-NEXT: vblendps {{.*#+}} xmm1 = xmm1[0],xmm2[1,2],xmm1[3] ; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2 ; CHECK-NEXT: vcmpeqps %xmm2, %xmm0, %k1 ; CHECK-NEXT: vmovaps %xmm1, %xmm0 {%k1} {z} Index: test/CodeGen/X86/avx512vl-intrinsics-upgrade.ll =================================================================== --- test/CodeGen/X86/avx512vl-intrinsics-upgrade.ll +++ test/CodeGen/X86/avx512vl-intrinsics-upgrade.ll @@ -3256,8 +3256,8 @@ define <8 x float>@test_int_x86_avx512_mask_shuf_f32x4_256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x3, i8 %x4) { ; CHECK-LABEL: test_int_x86_avx512_mask_shuf_f32x4_256: ; CHECK: ## %bb.0: -; CHECK-NEXT: vblendpd $12, %ymm1, %ymm0, %ymm0 ## encoding: [0xc4,0xe3,0x7d,0x0d,0xc1,0x0c] -; CHECK-NEXT: ## ymm0 = ymm0[0,1],ymm1[2,3] +; CHECK-NEXT: vblendps $240, %ymm1, %ymm0, %ymm0 ## encoding: [0xc4,0xe3,0x7d,0x0c,0xc1,0xf0] +; CHECK-NEXT: ## ymm0 = ymm0[0,1,2,3],ymm1[4,5,6,7] ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] ; CHECK-NEXT: vmovaps %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf1,0x7c,0x29,0x28,0xd0] ; CHECK-NEXT: vmovaps %ymm0, %ymm1 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0xa9,0x28,0xc8] Index: test/CodeGen/X86/buildvec-insertvec.ll =================================================================== --- test/CodeGen/X86/buildvec-insertvec.ll +++ test/CodeGen/X86/buildvec-insertvec.ll @@ -72,7 +72,7 @@ ; ; SSE41-LABEL: test_negative_zero_2: ; SSE41: # %bb.0: # %entry -; SSE41-NEXT: blendpd {{.*#+}} xmm0 = xmm0[0],mem[1] +; SSE41-NEXT: blendps {{.*#+}} xmm0 = xmm0[0,1],mem[2,3] ; SSE41-NEXT: retq entry: %0 = extractelement <2 x double> %A, i32 0 Index: test/CodeGen/X86/clear_upper_vector_element_bits.ll =================================================================== --- test/CodeGen/X86/clear_upper_vector_element_bits.ll +++ test/CodeGen/X86/clear_upper_vector_element_bits.ll @@ -16,21 +16,15 @@ ; ; SSE42-LABEL: _clearupper2xi64a: ; SSE42: # %bb.0: -; SSE42-NEXT: pxor %xmm1, %xmm1 -; SSE42-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3],xmm0[4,5],xmm1[6,7] +; SSE42-NEXT: xorps %xmm1, %xmm1 +; SSE42-NEXT: blendps {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3] ; SSE42-NEXT: retq ; -; AVX1-LABEL: _clearupper2xi64a: -; AVX1: # %bb.0: -; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3],xmm0[4,5],xmm1[6,7] -; AVX1-NEXT: retq -; -; AVX2-LABEL: _clearupper2xi64a: -; AVX2: # %bb.0: -; AVX2-NEXT: vxorps %xmm1, %xmm1, %xmm1 -; AVX2-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3] -; AVX2-NEXT: retq +; AVX-LABEL: _clearupper2xi64a: +; AVX: # %bb.0: +; AVX-NEXT: vxorps %xmm1, %xmm1, %xmm1 +; AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3] +; AVX-NEXT: retq %x0 = extractelement <2 x i64> %0, i32 0 %x1 = extractelement <2 x i64> %0, i32 1 %trunc0 = trunc i64 %x0 to i32 @@ -52,9 +46,9 @@ ; ; SSE42-LABEL: _clearupper4xi64a: ; SSE42: # %bb.0: -; SSE42-NEXT: pxor %xmm2, %xmm2 -; SSE42-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1],xmm2[2,3],xmm0[4,5],xmm2[6,7] -; SSE42-NEXT: pblendw {{.*#+}} xmm1 = xmm1[0,1],xmm2[2,3],xmm1[4,5],xmm2[6,7] +; SSE42-NEXT: xorps %xmm2, %xmm2 +; SSE42-NEXT: blendps {{.*#+}} xmm0 = xmm0[0],xmm2[1],xmm0[2],xmm2[3] +; SSE42-NEXT: blendps {{.*#+}} xmm1 = xmm1[0],xmm2[1],xmm1[2],xmm2[3] ; SSE42-NEXT: retq ; ; AVX-LABEL: _clearupper4xi64a: @@ -673,21 +667,15 @@ ; ; SSE42-LABEL: _clearupper2xi64b: ; SSE42: # %bb.0: -; SSE42-NEXT: pxor %xmm1, %xmm1 -; SSE42-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3],xmm0[4,5],xmm1[6,7] +; SSE42-NEXT: xorps %xmm1, %xmm1 +; SSE42-NEXT: blendps {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3] ; SSE42-NEXT: retq ; -; AVX1-LABEL: _clearupper2xi64b: -; AVX1: # %bb.0: -; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3],xmm0[4,5],xmm1[6,7] -; AVX1-NEXT: retq -; -; AVX2-LABEL: _clearupper2xi64b: -; AVX2: # %bb.0: -; AVX2-NEXT: vxorps %xmm1, %xmm1, %xmm1 -; AVX2-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3] -; AVX2-NEXT: retq +; AVX-LABEL: _clearupper2xi64b: +; AVX: # %bb.0: +; AVX-NEXT: vxorps %xmm1, %xmm1, %xmm1 +; AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3] +; AVX-NEXT: retq %x32 = bitcast <2 x i64> %0 to <4 x i32> %r0 = insertelement <4 x i32> %x32, i32 zeroinitializer, i32 1 %r1 = insertelement <4 x i32> %r0, i32 zeroinitializer, i32 3 @@ -705,9 +693,9 @@ ; ; SSE42-LABEL: _clearupper4xi64b: ; SSE42: # %bb.0: -; SSE42-NEXT: pxor %xmm2, %xmm2 -; SSE42-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1],xmm2[2,3],xmm0[4,5],xmm2[6,7] -; SSE42-NEXT: pblendw {{.*#+}} xmm1 = xmm1[0,1],xmm2[2,3],xmm1[4,5],xmm2[6,7] +; SSE42-NEXT: xorps %xmm2, %xmm2 +; SSE42-NEXT: blendps {{.*#+}} xmm0 = xmm0[0],xmm2[1],xmm0[2],xmm2[3] +; SSE42-NEXT: blendps {{.*#+}} xmm1 = xmm1[0],xmm2[1],xmm1[2],xmm2[3] ; SSE42-NEXT: retq ; ; AVX-LABEL: _clearupper4xi64b: @@ -1639,21 +1627,15 @@ ; ; SSE42-LABEL: _clearupper2xi64c: ; SSE42: # %bb.0: -; SSE42-NEXT: pxor %xmm1, %xmm1 -; SSE42-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3],xmm0[4,5],xmm1[6,7] +; SSE42-NEXT: xorps %xmm1, %xmm1 +; SSE42-NEXT: blendps {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3] ; SSE42-NEXT: retq ; -; AVX1-LABEL: _clearupper2xi64c: -; AVX1: # %bb.0: -; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3],xmm0[4,5],xmm1[6,7] -; AVX1-NEXT: retq -; -; AVX2-LABEL: _clearupper2xi64c: -; AVX2: # %bb.0: -; AVX2-NEXT: vxorps %xmm1, %xmm1, %xmm1 -; AVX2-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3] -; AVX2-NEXT: retq +; AVX-LABEL: _clearupper2xi64c: +; AVX: # %bb.0: +; AVX-NEXT: vxorps %xmm1, %xmm1, %xmm1 +; AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3] +; AVX-NEXT: retq %r = and <2 x i64> , %0 ret <2 x i64> %r } @@ -1668,9 +1650,9 @@ ; ; SSE42-LABEL: _clearupper4xi64c: ; SSE42: # %bb.0: -; SSE42-NEXT: pxor %xmm2, %xmm2 -; SSE42-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1],xmm2[2,3],xmm0[4,5],xmm2[6,7] -; SSE42-NEXT: pblendw {{.*#+}} xmm1 = xmm1[0,1],xmm2[2,3],xmm1[4,5],xmm2[6,7] +; SSE42-NEXT: xorps %xmm2, %xmm2 +; SSE42-NEXT: blendps {{.*#+}} xmm0 = xmm0[0],xmm2[1],xmm0[2],xmm2[3] +; SSE42-NEXT: blendps {{.*#+}} xmm1 = xmm1[0],xmm2[1],xmm1[2],xmm2[3] ; SSE42-NEXT: retq ; ; AVX-LABEL: _clearupper4xi64c: Index: test/CodeGen/X86/coalesce_commute_movsd.ll =================================================================== --- test/CodeGen/X86/coalesce_commute_movsd.ll +++ test/CodeGen/X86/coalesce_commute_movsd.ll @@ -15,12 +15,12 @@ ; ; SSE41-LABEL: insert_f64: ; SSE41: # %bb.0: -; SSE41-NEXT: blendpd {{.*#+}} xmm0 = xmm0[0],xmm1[1] +; SSE41-NEXT: blendps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3] ; SSE41-NEXT: retq ; ; AVX-LABEL: insert_f64: ; AVX: # %bb.0: -; AVX-NEXT: vblendpd {{.*#+}} xmm0 = xmm0[0],xmm1[1] +; AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3] ; AVX-NEXT: retq ; ; AVX512-LABEL: insert_f64: Index: test/CodeGen/X86/combine-and.ll =================================================================== --- test/CodeGen/X86/combine-and.ll +++ test/CodeGen/X86/combine-and.ll @@ -27,8 +27,8 @@ define <4 x i32> @test1(<4 x i32> %A) { ; CHECK-LABEL: test1: ; CHECK: # %bb.0: -; CHECK-NEXT: pxor %xmm1, %xmm1 -; CHECK-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3,4,5,6,7] +; CHECK-NEXT: xorps %xmm1, %xmm1 +; CHECK-NEXT: blendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3] ; CHECK-NEXT: retq %1 = and <4 x i32> %A, ret <4 x i32> %1 @@ -37,8 +37,8 @@ define <4 x i32> @test2(<4 x i32> %A) { ; CHECK-LABEL: test2: ; CHECK: # %bb.0: -; CHECK-NEXT: pxor %xmm1, %xmm1 -; CHECK-NEXT: pblendw {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3],xmm1[4,5,6,7] +; CHECK-NEXT: xorps %xmm1, %xmm1 +; CHECK-NEXT: blendps {{.*#+}} xmm0 = xmm1[0],xmm0[1],xmm1[2,3] ; CHECK-NEXT: retq %1 = and <4 x i32> %A, ret <4 x i32> %1 @@ -47,8 +47,8 @@ define <4 x i32> @test3(<4 x i32> %A) { ; CHECK-LABEL: test3: ; CHECK: # %bb.0: -; CHECK-NEXT: pxor %xmm1, %xmm1 -; CHECK-NEXT: pblendw {{.*#+}} xmm0 = xmm1[0,1,2,3],xmm0[4,5],xmm1[6,7] +; CHECK-NEXT: xorps %xmm1, %xmm1 +; CHECK-NEXT: blendps {{.*#+}} xmm0 = xmm1[0,1],xmm0[2],xmm1[3] ; CHECK-NEXT: retq %1 = and <4 x i32> %A, ret <4 x i32> %1 @@ -57,8 +57,8 @@ define <4 x i32> @test4(<4 x i32> %A) { ; CHECK-LABEL: test4: ; CHECK: # %bb.0: -; CHECK-NEXT: pxor %xmm1, %xmm1 -; CHECK-NEXT: pblendw {{.*#+}} xmm0 = xmm1[0,1,2,3,4,5],xmm0[6,7] +; CHECK-NEXT: xorps %xmm1, %xmm1 +; CHECK-NEXT: blendps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[3] ; CHECK-NEXT: retq %1 = and <4 x i32> %A, ret <4 x i32> %1 @@ -67,8 +67,8 @@ define <4 x i32> @test5(<4 x i32> %A) { ; CHECK-LABEL: test5: ; CHECK: # %bb.0: -; CHECK-NEXT: pxor %xmm1, %xmm1 -; CHECK-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3],xmm0[4,5],xmm1[6,7] +; CHECK-NEXT: xorps %xmm1, %xmm1 +; CHECK-NEXT: blendps {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3] ; CHECK-NEXT: retq %1 = and <4 x i32> %A, ret <4 x i32> %1 @@ -77,8 +77,8 @@ define <4 x i32> @test6(<4 x i32> %A) { ; CHECK-LABEL: test6: ; CHECK: # %bb.0: -; CHECK-NEXT: pxor %xmm1, %xmm1 -; CHECK-NEXT: pblendw {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3],xmm1[4,5],xmm0[6,7] +; CHECK-NEXT: xorps %xmm1, %xmm1 +; CHECK-NEXT: blendps {{.*#+}} xmm0 = xmm1[0],xmm0[1],xmm1[2],xmm0[3] ; CHECK-NEXT: retq %1 = and <4 x i32> %A, ret <4 x i32> %1 @@ -87,8 +87,8 @@ define <4 x i32> @test7(<4 x i32> %A) { ; CHECK-LABEL: test7: ; CHECK: # %bb.0: -; CHECK-NEXT: pxor %xmm1, %xmm1 -; CHECK-NEXT: pblendw {{.*#+}} xmm0 = xmm1[0,1,2,3],xmm0[4,5,6,7] +; CHECK-NEXT: xorps %xmm1, %xmm1 +; CHECK-NEXT: blendps {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3] ; CHECK-NEXT: retq %1 = and <4 x i32> %A, ret <4 x i32> %1 @@ -97,8 +97,8 @@ define <4 x i32> @test8(<4 x i32> %A) { ; CHECK-LABEL: test8: ; CHECK: # %bb.0: -; CHECK-NEXT: pxor %xmm1, %xmm1 -; CHECK-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3,4,5],xmm0[6,7] +; CHECK-NEXT: xorps %xmm1, %xmm1 +; CHECK-NEXT: blendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2],xmm0[3] ; CHECK-NEXT: retq %1 = and <4 x i32> %A, ret <4 x i32> %1 @@ -116,8 +116,8 @@ define <4 x i32> @test10(<4 x i32> %A) { ; CHECK-LABEL: test10: ; CHECK: # %bb.0: -; CHECK-NEXT: pxor %xmm1, %xmm1 -; CHECK-NEXT: pblendw {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3,4,5],xmm1[6,7] +; CHECK-NEXT: xorps %xmm1, %xmm1 +; CHECK-NEXT: blendps {{.*#+}} xmm0 = xmm1[0],xmm0[1,2],xmm1[3] ; CHECK-NEXT: retq %1 = and <4 x i32> %A, ret <4 x i32> %1 @@ -126,8 +126,8 @@ define <4 x i32> @test11(<4 x i32> %A) { ; CHECK-LABEL: test11: ; CHECK: # %bb.0: -; CHECK-NEXT: pxor %xmm1, %xmm1 -; CHECK-NEXT: pblendw {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3,4,5,6,7] +; CHECK-NEXT: xorps %xmm1, %xmm1 +; CHECK-NEXT: blendps {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3] ; CHECK-NEXT: retq %1 = and <4 x i32> %A, ret <4 x i32> %1 @@ -136,8 +136,8 @@ define <4 x i32> @test12(<4 x i32> %A) { ; CHECK-LABEL: test12: ; CHECK: # %bb.0: -; CHECK-NEXT: pxor %xmm1, %xmm1 -; CHECK-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5],xmm1[6,7] +; CHECK-NEXT: xorps %xmm1, %xmm1 +; CHECK-NEXT: blendps {{.*#+}} xmm0 = xmm0[0,1,2],xmm1[3] ; CHECK-NEXT: retq %1 = and <4 x i32> %A, ret <4 x i32> %1 @@ -146,8 +146,8 @@ define <4 x i32> @test13(<4 x i32> %A) { ; CHECK-LABEL: test13: ; CHECK: # %bb.0: -; CHECK-NEXT: pxor %xmm1, %xmm1 -; CHECK-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5],xmm0[6,7] +; CHECK-NEXT: xorps %xmm1, %xmm1 +; CHECK-NEXT: blendps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2],xmm0[3] ; CHECK-NEXT: retq %1 = and <4 x i32> %A, ret <4 x i32> %1 @@ -156,8 +156,8 @@ define <4 x i32> @test14(<4 x i32> %A) { ; CHECK-LABEL: test14: ; CHECK: # %bb.0: -; CHECK-NEXT: pxor %xmm1, %xmm1 -; CHECK-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3],xmm0[4,5,6,7] +; CHECK-NEXT: xorps %xmm1, %xmm1 +; CHECK-NEXT: blendps {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2,3] ; CHECK-NEXT: retq %1 = and <4 x i32> %A, ret <4 x i32> %1 @@ -166,7 +166,7 @@ define <4 x i32> @test15(<4 x i32> %A, <4 x i32> %B) { ; CHECK-LABEL: test15: ; CHECK: # %bb.0: -; CHECK-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3],xmm0[4,5,6,7] +; CHECK-NEXT: blendps {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2,3] ; CHECK-NEXT: retq %1 = and <4 x i32> %A, %2 = and <4 x i32> %B, @@ -177,7 +177,7 @@ define <4 x i32> @test16(<4 x i32> %A, <4 x i32> %B) { ; CHECK-LABEL: test16: ; CHECK: # %bb.0: -; CHECK-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3],xmm0[4,5],xmm1[6,7] +; CHECK-NEXT: blendps {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3] ; CHECK-NEXT: retq %1 = and <4 x i32> %A, %2 = and <4 x i32> %B, @@ -188,7 +188,7 @@ define <4 x i32> @test17(<4 x i32> %A, <4 x i32> %B) { ; CHECK-LABEL: test17: ; CHECK: # %bb.0: -; CHECK-NEXT: pblendw {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3],xmm1[4,5],xmm0[6,7] +; CHECK-NEXT: blendps {{.*#+}} xmm0 = xmm1[0],xmm0[1],xmm1[2],xmm0[3] ; CHECK-NEXT: retq %1 = and <4 x i32> %A, %2 = and <4 x i32> %B, Index: test/CodeGen/X86/combine-or.ll =================================================================== --- test/CodeGen/X86/combine-or.ll +++ test/CodeGen/X86/combine-or.ll @@ -24,7 +24,7 @@ define <2 x i64> @test1(<2 x i64> %a, <2 x i64> %b) { ; CHECK-LABEL: test1: ; CHECK: # %bb.0: -; CHECK-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5,6,7] +; CHECK-NEXT: blendps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3] ; CHECK-NEXT: retq %shuf1 = shufflevector <2 x i64> %a, <2 x i64> zeroinitializer, <2 x i32> %shuf2 = shufflevector <2 x i64> %b, <2 x i64> zeroinitializer, <2 x i32> @@ -36,7 +36,7 @@ define <4 x i32> @test2(<4 x i32> %a, <4 x i32> %b) { ; CHECK-LABEL: test2: ; CHECK: # %bb.0: -; CHECK-NEXT: pblendw {{.*#+}} xmm0 = xmm1[0,1,2,3],xmm0[4,5,6,7] +; CHECK-NEXT: blendps {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3] ; CHECK-NEXT: retq %shuf1 = shufflevector <4 x i32> %a, <4 x i32> zeroinitializer, <4 x i32> %shuf2 = shufflevector <4 x i32> %b, <4 x i32> zeroinitializer, <4 x i32> @@ -48,7 +48,7 @@ define <2 x i64> @test3(<2 x i64> %a, <2 x i64> %b) { ; CHECK-LABEL: test3: ; CHECK: # %bb.0: -; CHECK-NEXT: pblendw {{.*#+}} xmm0 = xmm1[0,1,2,3],xmm0[4,5,6,7] +; CHECK-NEXT: blendps {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3] ; CHECK-NEXT: retq %shuf1 = shufflevector <2 x i64> %a, <2 x i64> zeroinitializer, <2 x i32> %shuf2 = shufflevector <2 x i64> %b, <2 x i64> zeroinitializer, <2 x i32> @@ -60,7 +60,7 @@ define <4 x i32> @test4(<4 x i32> %a, <4 x i32> %b) { ; CHECK-LABEL: test4: ; CHECK: # %bb.0: -; CHECK-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3,4,5,6,7] +; CHECK-NEXT: blendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3] ; CHECK-NEXT: retq %shuf1 = shufflevector <4 x i32> %a, <4 x i32> zeroinitializer, <4 x i32> %shuf2 = shufflevector <4 x i32> %b, <4 x i32> zeroinitializer, <4 x i32> @@ -72,7 +72,7 @@ define <4 x i32> @test5(<4 x i32> %a, <4 x i32> %b) { ; CHECK-LABEL: test5: ; CHECK: # %bb.0: -; CHECK-NEXT: pblendw {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3,4,5,6,7] +; CHECK-NEXT: blendps {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3] ; CHECK-NEXT: retq %shuf1 = shufflevector <4 x i32> %a, <4 x i32> zeroinitializer, <4 x i32> %shuf2 = shufflevector <4 x i32> %b, <4 x i32> zeroinitializer, <4 x i32> @@ -84,7 +84,7 @@ define <4 x i32> @test6(<4 x i32> %a, <4 x i32> %b) { ; CHECK-LABEL: test6: ; CHECK: # %bb.0: -; CHECK-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5,6,7] +; CHECK-NEXT: blendps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3] ; CHECK-NEXT: retq %shuf1 = shufflevector <4 x i32> %a, <4 x i32> zeroinitializer, <4 x i32> %shuf2 = shufflevector <4 x i32> %b, <4 x i32> zeroinitializer, <4 x i32> @@ -96,7 +96,7 @@ define <4 x i32> @test7(<4 x i32> %a, <4 x i32> %b) { ; CHECK-LABEL: test7: ; CHECK: # %bb.0: -; CHECK-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5,6,7] +; CHECK-NEXT: blendps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3] ; CHECK-NEXT: retq %and1 = and <4 x i32> %a, %and2 = and <4 x i32> %b, @@ -108,7 +108,7 @@ define <2 x i64> @test8(<2 x i64> %a, <2 x i64> %b) { ; CHECK-LABEL: test8: ; CHECK: # %bb.0: -; CHECK-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5,6,7] +; CHECK-NEXT: blendps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3] ; CHECK-NEXT: retq %and1 = and <2 x i64> %a, %and2 = and <2 x i64> %b, @@ -120,7 +120,7 @@ define <4 x i32> @test9(<4 x i32> %a, <4 x i32> %b) { ; CHECK-LABEL: test9: ; CHECK: # %bb.0: -; CHECK-NEXT: pblendw {{.*#+}} xmm0 = xmm1[0,1,2,3],xmm0[4,5,6,7] +; CHECK-NEXT: blendps {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3] ; CHECK-NEXT: retq %and1 = and <4 x i32> %a, %and2 = and <4 x i32> %b, @@ -132,7 +132,7 @@ define <2 x i64> @test10(<2 x i64> %a, <2 x i64> %b) { ; CHECK-LABEL: test10: ; CHECK: # %bb.0: -; CHECK-NEXT: pblendw {{.*#+}} xmm0 = xmm1[0,1,2,3],xmm0[4,5,6,7] +; CHECK-NEXT: blendps {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3] ; CHECK-NEXT: retq %and1 = and <2 x i64> %a, %and2 = and <2 x i64> %b, @@ -144,7 +144,7 @@ define <4 x i32> @test11(<4 x i32> %a, <4 x i32> %b) { ; CHECK-LABEL: test11: ; CHECK: # %bb.0: -; CHECK-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3,4,5,6,7] +; CHECK-NEXT: blendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3] ; CHECK-NEXT: retq %and1 = and <4 x i32> %a, %and2 = and <4 x i32> %b, @@ -156,7 +156,7 @@ define <4 x i32> @test12(<4 x i32> %a, <4 x i32> %b) { ; CHECK-LABEL: test12: ; CHECK: # %bb.0: -; CHECK-NEXT: pblendw {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3,4,5,6,7] +; CHECK-NEXT: blendps {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3] ; CHECK-NEXT: retq %and1 = and <4 x i32> %a, %and2 = and <4 x i32> %b, @@ -299,7 +299,7 @@ define <2 x double> @test22(<2 x double> %a0, <2 x double> %a1) { ; CHECK-LABEL: test22: ; CHECK: # %bb.0: -; CHECK-NEXT: blendpd {{.*#+}} xmm0 = xmm1[0],xmm0[1] +; CHECK-NEXT: blendps {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3] ; CHECK-NEXT: retq %bc1 = bitcast <2 x double> %a0 to <2 x i64> %bc2 = bitcast <2 x double> %a1 to <2 x i64> @@ -329,7 +329,7 @@ define <4 x float> @test24(<4 x float> %a0, <4 x float> %a1) { ; CHECK-LABEL: test24: ; CHECK: # %bb.0: -; CHECK-NEXT: blendpd {{.*#+}} xmm0 = xmm1[0],xmm0[1] +; CHECK-NEXT: blendps {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3] ; CHECK-NEXT: retq %bc1 = bitcast <4 x float> %a0 to <2 x i64> %bc2 = bitcast <4 x float> %a1 to <2 x i64> @@ -362,7 +362,7 @@ define <4 x i8> @test_crash(<4 x i8> %a, <4 x i8> %b) { ; CHECK-LABEL: test_crash: ; CHECK: # %bb.0: -; CHECK-NEXT: pblendw {{.*#+}} xmm0 = xmm1[0,1,2,3],xmm0[4,5,6,7] +; CHECK-NEXT: blendps {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3] ; CHECK-NEXT: retq %shuf1 = shufflevector <4 x i8> %a, <4 x i8> zeroinitializer, <4 x i32> %shuf2 = shufflevector <4 x i8> %b, <4 x i8> zeroinitializer, <4 x i32> @@ -375,7 +375,7 @@ define <4 x i32> @test2b(<4 x i32> %a, <4 x i32> %b) { ; CHECK-LABEL: test2b: ; CHECK: # %bb.0: -; CHECK-NEXT: pblendw {{.*#+}} xmm0 = xmm1[0,1,2,3],xmm0[4,5,6,7] +; CHECK-NEXT: blendps {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3] ; CHECK-NEXT: retq %shuf1 = shufflevector <4 x i32> zeroinitializer, <4 x i32> %a, <4 x i32> %shuf2 = shufflevector <4 x i32> %b, <4 x i32> zeroinitializer, <4 x i32> @@ -386,7 +386,7 @@ define <4 x i32> @test2c(<4 x i32> %a, <4 x i32> %b) { ; CHECK-LABEL: test2c: ; CHECK: # %bb.0: -; CHECK-NEXT: pblendw {{.*#+}} xmm0 = xmm1[0,1,2,3],xmm0[4,5,6,7] +; CHECK-NEXT: blendps {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3] ; CHECK-NEXT: retq %shuf1 = shufflevector <4 x i32> zeroinitializer, <4 x i32> %a, <4 x i32> %shuf2 = shufflevector <4 x i32> zeroinitializer, <4 x i32> %b, <4 x i32> @@ -398,7 +398,7 @@ define <4 x i32> @test2d(<4 x i32> %a, <4 x i32> %b) { ; CHECK-LABEL: test2d: ; CHECK: # %bb.0: -; CHECK-NEXT: pblendw {{.*#+}} xmm0 = xmm1[0,1,2,3],xmm0[4,5,6,7] +; CHECK-NEXT: blendps {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3] ; CHECK-NEXT: retq %shuf1 = shufflevector <4 x i32> %a, <4 x i32> zeroinitializer, <4 x i32> %shuf2 = shufflevector <4 x i32> zeroinitializer, <4 x i32> %b, <4 x i32> @@ -411,7 +411,7 @@ define <4 x i32> @test2e(<4 x i32> %a, <4 x i32> %b) { ; CHECK-LABEL: test2e: ; CHECK: # %bb.0: -; CHECK-NEXT: pblendw {{.*#+}} xmm0 = xmm1[0,1,2,3],xmm0[4,5,6,7] +; CHECK-NEXT: blendps {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3] ; CHECK-NEXT: retq %shuf1 = shufflevector <4 x i32> %a, <4 x i32> , <4 x i32> %shuf2 = shufflevector <4 x i32> %b, <4 x i32> , <4 x i32> @@ -422,7 +422,7 @@ define <4 x i32> @test2f(<4 x i32> %a, <4 x i32> %b) { ; CHECK-LABEL: test2f: ; CHECK: # %bb.0: -; CHECK-NEXT: pblendw {{.*#+}} xmm0 = xmm1[0,1,2,3],xmm0[4,5,6,7] +; CHECK-NEXT: blendps {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3] ; CHECK-NEXT: retq %shuf1 = shufflevector <4 x i32> %a, <4 x i32> , <4 x i32> %shuf2 = shufflevector <4 x i32> %b, <4 x i32> , <4 x i32> Index: test/CodeGen/X86/combine-sra.ll =================================================================== --- test/CodeGen/X86/combine-sra.ll +++ test/CodeGen/X86/combine-sra.ll @@ -215,7 +215,7 @@ ; SSE-NEXT: movaps %xmm0, %xmm2 ; SSE-NEXT: movaps %xmm0, %xmm1 ; SSE-NEXT: psrad $2, %xmm1 -; SSE-NEXT: blendpd {{.*#+}} xmm1 = xmm0[0],xmm1[1] +; SSE-NEXT: pblendw {{.*#+}} xmm1 = xmm0[0,1,2,3],xmm1[4,5,6,7] ; SSE-NEXT: psrad $3, %xmm0 ; SSE-NEXT: psrad $1, %xmm2 ; SSE-NEXT: pblendw {{.*#+}} xmm2 = xmm2[0,1,2,3],xmm0[4,5,6,7] @@ -258,7 +258,7 @@ ; SSE-NEXT: movaps %xmm0, %xmm2 ; SSE-NEXT: movaps %xmm0, %xmm1 ; SSE-NEXT: psrad $2, %xmm1 -; SSE-NEXT: blendpd {{.*#+}} xmm1 = xmm0[0],xmm1[1] +; SSE-NEXT: pblendw {{.*#+}} xmm1 = xmm0[0,1,2,3],xmm1[4,5,6,7] ; SSE-NEXT: psrad $3, %xmm0 ; SSE-NEXT: psrad $1, %xmm2 ; SSE-NEXT: pblendw {{.*#+}} xmm2 = xmm2[0,1,2,3],xmm0[4,5,6,7] Index: test/CodeGen/X86/commute-blend-avx2.ll =================================================================== --- test/CodeGen/X86/commute-blend-avx2.ll +++ test/CodeGen/X86/commute-blend-avx2.ll @@ -70,7 +70,7 @@ define <2 x double> @commute_fold_vblendpd_128(<2 x double> %a, <2 x double>* %b) #0 { ; CHECK-LABEL: commute_fold_vblendpd_128: ; CHECK: # %bb.0: -; CHECK-NEXT: vblendpd {{.*#+}} xmm0 = xmm0[0],mem[1] +; CHECK-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0,1],mem[2,3] ; CHECK-NEXT: retq %1 = load <2 x double>, <2 x double>* %b %2 = call <2 x double> @llvm.x86.sse41.blendpd(<2 x double> %1, <2 x double> %a, i8 1) @@ -81,7 +81,7 @@ define <4 x double> @commute_fold_vblendpd_256(<4 x double> %a, <4 x double>* %b) #0 { ; CHECK-LABEL: commute_fold_vblendpd_256: ; CHECK: # %bb.0: -; CHECK-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0,1,2],mem[3] +; CHECK-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0,1,2,3,4,5],mem[6,7] ; CHECK-NEXT: retq %1 = load <4 x double>, <4 x double>* %b %2 = call <4 x double> @llvm.x86.avx.blend.pd.256(<4 x double> %1, <4 x double> %a, i8 7) Index: test/CodeGen/X86/commute-blend-sse41.ll =================================================================== --- test/CodeGen/X86/commute-blend-sse41.ll +++ test/CodeGen/X86/commute-blend-sse41.ll @@ -26,7 +26,7 @@ define <2 x double> @commute_fold_blendpd(<2 x double> %a, <2 x double>* %b) #0 { ; CHECK-LABEL: commute_fold_blendpd: ; CHECK: # %bb.0: -; CHECK-NEXT: blendpd {{.*#+}} xmm0 = xmm0[0],mem[1] +; CHECK-NEXT: blendps {{.*#+}} xmm0 = xmm0[0,1],mem[2,3] ; CHECK-NEXT: retq %1 = load <2 x double>, <2 x double>* %b %2 = call <2 x double> @llvm.x86.sse41.blendpd(<2 x double> %1, <2 x double> %a, i8 1) Index: test/CodeGen/X86/commuted-blend-mask.ll =================================================================== --- test/CodeGen/X86/commuted-blend-mask.ll +++ test/CodeGen/X86/commuted-blend-mask.ll @@ -10,5 +10,7 @@ define <4 x i32> @test(<4 x i32> %a, <4 x i32> %b) { ; CHECK: pblendw $63, %xmm1, %xmm0 %shuffle = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> - ret <4 x i32> %shuffle + ; add forces execution domain + %sum = add <4 x i32> %shuffle, %shuffle + ret <4 x i32> %sum } Index: test/CodeGen/X86/cvtv2f32.ll =================================================================== --- test/CodeGen/X86/cvtv2f32.ll +++ test/CodeGen/X86/cvtv2f32.ll @@ -72,10 +72,10 @@ define <2 x float> @uitofp_2i32_legalized(<2 x i32> %in, <2 x float> %v) { ; X32-LABEL: uitofp_2i32_legalized: ; X32: # %bb.0: -; X32-NEXT: pxor %xmm2, %xmm2 -; X32-NEXT: pblendw {{.*#+}} xmm2 = xmm0[0,1],xmm2[2,3],xmm0[4,5],xmm2[6,7] -; X32-NEXT: movdqa {{.*#+}} xmm0 = [4.503600e+15,4.503600e+15] -; X32-NEXT: por %xmm0, %xmm2 +; X32-NEXT: xorps %xmm2, %xmm2 +; X32-NEXT: blendps {{.*#+}} xmm2 = xmm0[0],xmm2[1],xmm0[2],xmm2[3] +; X32-NEXT: movaps {{.*#+}} xmm0 = [4.503600e+15,4.503600e+15] +; X32-NEXT: orps %xmm0, %xmm2 ; X32-NEXT: subpd %xmm0, %xmm2 ; X32-NEXT: cvtpd2ps %xmm2, %xmm0 ; X32-NEXT: mulps %xmm1, %xmm0 @@ -83,10 +83,10 @@ ; ; X64-LABEL: uitofp_2i32_legalized: ; X64: # %bb.0: -; X64-NEXT: pxor %xmm2, %xmm2 -; X64-NEXT: pblendw {{.*#+}} xmm2 = xmm0[0,1],xmm2[2,3],xmm0[4,5],xmm2[6,7] -; X64-NEXT: movdqa {{.*#+}} xmm0 = [4.503600e+15,4.503600e+15] -; X64-NEXT: por %xmm0, %xmm2 +; X64-NEXT: xorps %xmm2, %xmm2 +; X64-NEXT: blendps {{.*#+}} xmm2 = xmm0[0],xmm2[1],xmm0[2],xmm2[3] +; X64-NEXT: movaps {{.*#+}} xmm0 = [4.503600e+15,4.503600e+15] +; X64-NEXT: orps %xmm0, %xmm2 ; X64-NEXT: subpd %xmm0, %xmm2 ; X64-NEXT: cvtpd2ps %xmm2, %xmm0 ; X64-NEXT: mulps %xmm1, %xmm0 Index: test/CodeGen/X86/insert-into-constant-vector.ll =================================================================== --- test/CodeGen/X86/insert-into-constant-vector.ll +++ test/CodeGen/X86/insert-into-constant-vector.ll @@ -440,9 +440,9 @@ ; ; X64AVX2-LABEL: elt1_v8f64: ; X64AVX2: # %bb.0: -; X64AVX2-NEXT: vmovapd {{.*#+}} ymm1 = <42,u,2,3> -; X64AVX2-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm1[0],xmm0[0] -; X64AVX2-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3] +; X64AVX2-NEXT: vmovaps {{.*#+}} ymm1 = <42,u,2,3> +; X64AVX2-NEXT: vmovlhps {{.*#+}} xmm0 = xmm1[0],xmm0[0] +; X64AVX2-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm1[4,5,6,7] ; X64AVX2-NEXT: vmovaps {{.*#+}} ymm1 = [4.000000e+00,5.000000e+00,6.000000e+00,7.000000e+00] ; X64AVX2-NEXT: retq ; Index: test/CodeGen/X86/insertelement-ones.ll =================================================================== --- test/CodeGen/X86/insertelement-ones.ll +++ test/CodeGen/X86/insertelement-ones.ll @@ -77,7 +77,7 @@ ; AVX1: # %bb.0: ; AVX1-NEXT: vxorps %xmm1, %xmm1, %xmm1 ; AVX1-NEXT: vcmptrueps %ymm1, %ymm1, %ymm1 -; AVX1-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2],ymm0[3] +; AVX1-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm1[4,5],ymm0[6,7] ; AVX1-NEXT: retq ; ; AVX2-LABEL: insert_v4i64_01x3: Index: test/CodeGen/X86/insertelement-zero.ll =================================================================== --- test/CodeGen/X86/insertelement-zero.ll +++ test/CodeGen/X86/insertelement-zero.ll @@ -28,14 +28,14 @@ ; ; SSE41-LABEL: insert_v2f64_z1: ; SSE41: # %bb.0: -; SSE41-NEXT: xorpd %xmm1, %xmm1 -; SSE41-NEXT: blendpd {{.*#+}} xmm0 = xmm1[0],xmm0[1] +; SSE41-NEXT: xorps %xmm1, %xmm1 +; SSE41-NEXT: blendps {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3] ; SSE41-NEXT: retq ; ; AVX-LABEL: insert_v2f64_z1: ; AVX: # %bb.0: -; AVX-NEXT: vxorpd %xmm1, %xmm1, %xmm1 -; AVX-NEXT: vblendpd {{.*#+}} xmm0 = xmm1[0],xmm0[1] +; AVX-NEXT: vxorps %xmm1, %xmm1, %xmm1 +; AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3] ; AVX-NEXT: retq %1 = insertelement <2 x double> %a, double 0.0, i32 0 ret <2 x double> %1 @@ -66,14 +66,14 @@ ; SSE41-LABEL: insert_v4f64_0zz3: ; SSE41: # %bb.0: ; SSE41-NEXT: movq {{.*#+}} xmm0 = xmm0[0],zero -; SSE41-NEXT: xorpd %xmm2, %xmm2 -; SSE41-NEXT: blendpd {{.*#+}} xmm1 = xmm2[0],xmm1[1] +; SSE41-NEXT: xorps %xmm2, %xmm2 +; SSE41-NEXT: blendps {{.*#+}} xmm1 = xmm2[0,1],xmm1[2,3] ; SSE41-NEXT: retq ; ; AVX-LABEL: insert_v4f64_0zz3: ; AVX: # %bb.0: -; AVX-NEXT: vxorpd %xmm1, %xmm1, %xmm1 -; AVX-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2],ymm0[3] +; AVX-NEXT: vxorps %xmm1, %xmm1, %xmm1 +; AVX-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3,4,5],ymm0[6,7] ; AVX-NEXT: retq %1 = insertelement <4 x double> %a, double 0.0, i32 1 %2 = insertelement <4 x double> %1, double 0.0, i32 2 @@ -101,21 +101,15 @@ ; ; SSE41-LABEL: insert_v2i64_z1: ; SSE41: # %bb.0: -; SSE41-NEXT: pxor %xmm1, %xmm1 -; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm1[0,1,2,3],xmm0[4,5,6,7] +; SSE41-NEXT: xorps %xmm1, %xmm1 +; SSE41-NEXT: blendps {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3] ; SSE41-NEXT: retq ; -; AVX1-LABEL: insert_v2i64_z1: -; AVX1: # %bb.0: -; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm1[0,1,2,3],xmm0[4,5,6,7] -; AVX1-NEXT: retq -; -; AVX2-LABEL: insert_v2i64_z1: -; AVX2: # %bb.0: -; AVX2-NEXT: vxorps %xmm1, %xmm1, %xmm1 -; AVX2-NEXT: vblendps {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3] -; AVX2-NEXT: retq +; AVX-LABEL: insert_v2i64_z1: +; AVX: # %bb.0: +; AVX-NEXT: vxorps %xmm1, %xmm1, %xmm1 +; AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3] +; AVX-NEXT: retq %1 = insertelement <2 x i64> %a, i64 0, i32 0 ret <2 x i64> %1 } @@ -141,21 +135,15 @@ ; ; SSE41-LABEL: insert_v4i64_01z3: ; SSE41: # %bb.0: -; SSE41-NEXT: pxor %xmm2, %xmm2 -; SSE41-NEXT: pblendw {{.*#+}} xmm1 = xmm2[0,1,2,3],xmm1[4,5,6,7] +; SSE41-NEXT: xorps %xmm2, %xmm2 +; SSE41-NEXT: blendps {{.*#+}} xmm1 = xmm2[0,1],xmm1[2,3] ; SSE41-NEXT: retq ; -; AVX1-LABEL: insert_v4i64_01z3: -; AVX1: # %bb.0: -; AVX1-NEXT: vxorpd %xmm1, %xmm1, %xmm1 -; AVX1-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2],ymm0[3] -; AVX1-NEXT: retq -; -; AVX2-LABEL: insert_v4i64_01z3: -; AVX2: # %bb.0: -; AVX2-NEXT: vxorps %xmm1, %xmm1, %xmm1 -; AVX2-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm1[4,5],ymm0[6,7] -; AVX2-NEXT: retq +; AVX-LABEL: insert_v4i64_01z3: +; AVX: # %bb.0: +; AVX-NEXT: vxorps %xmm1, %xmm1, %xmm1 +; AVX-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm1[4,5],ymm0[6,7] +; AVX-NEXT: retq %1 = insertelement <4 x i64> %a, i64 0, i32 2 ret <4 x i64> %1 } @@ -263,21 +251,15 @@ ; ; SSE41-LABEL: insert_v4i32_01z3: ; SSE41: # %bb.0: -; SSE41-NEXT: pxor %xmm1, %xmm1 -; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5],xmm0[6,7] +; SSE41-NEXT: xorps %xmm1, %xmm1 +; SSE41-NEXT: blendps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2],xmm0[3] ; SSE41-NEXT: retq ; -; AVX1-LABEL: insert_v4i32_01z3: -; AVX1: # %bb.0: -; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5],xmm0[6,7] -; AVX1-NEXT: retq -; -; AVX2-LABEL: insert_v4i32_01z3: -; AVX2: # %bb.0: -; AVX2-NEXT: vxorps %xmm1, %xmm1, %xmm1 -; AVX2-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2],xmm0[3] -; AVX2-NEXT: retq +; AVX-LABEL: insert_v4i32_01z3: +; AVX: # %bb.0: +; AVX-NEXT: vxorps %xmm1, %xmm1, %xmm1 +; AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2],xmm0[3] +; AVX-NEXT: retq %1 = insertelement <4 x i32> %a, i32 0, i32 2 ret <4 x i32> %1 } @@ -312,9 +294,9 @@ ; ; SSE41-LABEL: insert_v8i32_z12345z7: ; SSE41: # %bb.0: -; SSE41-NEXT: pxor %xmm2, %xmm2 -; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm2[0,1],xmm0[2,3,4,5,6,7] -; SSE41-NEXT: pblendw {{.*#+}} xmm1 = xmm1[0,1,2,3],xmm2[4,5],xmm1[6,7] +; SSE41-NEXT: xorps %xmm2, %xmm2 +; SSE41-NEXT: blendps {{.*#+}} xmm0 = xmm2[0],xmm0[1,2,3] +; SSE41-NEXT: blendps {{.*#+}} xmm1 = xmm1[0,1],xmm2[2],xmm1[3] ; SSE41-NEXT: retq ; ; AVX-LABEL: insert_v8i32_z12345z7: Index: test/CodeGen/X86/masked_memop.ll =================================================================== --- test/CodeGen/X86/masked_memop.ll +++ test/CodeGen/X86/masked_memop.ll @@ -835,7 +835,7 @@ ; AVX1: ## %bb.0: ; AVX1-NEXT: vmovaps {{.*#+}} xmm1 = [0,4294967295,4294967295,4294967295] ; AVX1-NEXT: vmaskmovps (%rdi), %xmm1, %xmm1 -; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3,4,5,6,7] +; AVX1-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3] ; AVX1-NEXT: retq ; ; AVX2-LABEL: mload_constmask_v4i32: @@ -963,15 +963,10 @@ } define <4 x i64> @mload_constmask_v4i64(<4 x i64>* %addr, <4 x i64> %dst) { -; AVX1-LABEL: mload_constmask_v4i64: -; AVX1: ## %bb.0: -; AVX1-NEXT: vblendpd {{.*#+}} ymm0 = mem[0],ymm0[1,2],mem[3] -; AVX1-NEXT: retq -; -; AVX2-LABEL: mload_constmask_v4i64: -; AVX2: ## %bb.0: -; AVX2-NEXT: vblendps {{.*#+}} ymm0 = mem[0,1],ymm0[2,3,4,5],mem[6,7] -; AVX2-NEXT: retq +; AVX-LABEL: mload_constmask_v4i64: +; AVX: ## %bb.0: +; AVX-NEXT: vblendps {{.*#+}} ymm0 = mem[0,1],ymm0[2,3,4,5],mem[6,7] +; AVX-NEXT: retq ; ; AVX512F-LABEL: mload_constmask_v4i64: ; AVX512F: ## %bb.0: @@ -997,8 +992,8 @@ define <8 x double> @mload_constmask_v8f64(<8 x double>* %addr, <8 x double> %dst) { ; AVX-LABEL: mload_constmask_v8f64: ; AVX: ## %bb.0: -; AVX-NEXT: vblendpd {{.*#+}} ymm1 = ymm1[0,1,2],mem[3] -; AVX-NEXT: vblendpd {{.*#+}} ymm0 = mem[0,1,2],ymm0[3] +; AVX-NEXT: vblendps {{.*#+}} ymm1 = ymm1[0,1,2,3,4,5],mem[6,7] +; AVX-NEXT: vblendps {{.*#+}} ymm0 = mem[0,1,2,3,4,5],ymm0[6,7] ; AVX-NEXT: retq ; ; AVX512F-LABEL: mload_constmask_v8f64: Index: test/CodeGen/X86/merge-consecutive-loads-256.ll =================================================================== --- test/CodeGen/X86/merge-consecutive-loads-256.ll +++ test/CodeGen/X86/merge-consecutive-loads-256.ll @@ -129,15 +129,15 @@ define <4 x double> @merge_4f64_f64_34z6(double* %ptr) nounwind uwtable noinline ssp { ; AVX-LABEL: merge_4f64_f64_34z6: ; AVX: # %bb.0: -; AVX-NEXT: vxorpd %xmm0, %xmm0, %xmm0 -; AVX-NEXT: vblendpd {{.*#+}} ymm0 = mem[0,1],ymm0[2],mem[3] +; AVX-NEXT: vxorps %xmm0, %xmm0, %xmm0 +; AVX-NEXT: vblendps {{.*#+}} ymm0 = mem[0,1,2,3],ymm0[4,5],mem[6,7] ; AVX-NEXT: retq ; ; X32-AVX-LABEL: merge_4f64_f64_34z6: ; X32-AVX: # %bb.0: ; X32-AVX-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-AVX-NEXT: vxorpd %xmm0, %xmm0, %xmm0 -; X32-AVX-NEXT: vblendpd {{.*#+}} ymm0 = mem[0,1],ymm0[2],mem[3] +; X32-AVX-NEXT: vxorps %xmm0, %xmm0, %xmm0 +; X32-AVX-NEXT: vblendps {{.*#+}} ymm0 = mem[0,1,2,3],ymm0[4,5],mem[6,7] ; X32-AVX-NEXT: retl %ptr0 = getelementptr inbounds double, double* %ptr, i64 3 %ptr1 = getelementptr inbounds double, double* %ptr, i64 4 @@ -262,8 +262,8 @@ ; X32-AVX-LABEL: merge_8f32_2f32_23z5: ; X32-AVX: # %bb.0: ; X32-AVX-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-AVX-NEXT: vxorpd %xmm0, %xmm0, %xmm0 -; X32-AVX-NEXT: vblendpd {{.*#+}} ymm0 = mem[0,1],ymm0[2],mem[3] +; X32-AVX-NEXT: vxorps %xmm0, %xmm0, %xmm0 +; X32-AVX-NEXT: vblendps {{.*#+}} ymm0 = mem[0,1,2,3],ymm0[4,5],mem[6,7] ; X32-AVX-NEXT: retl %ptr0 = getelementptr inbounds <2 x float>, <2 x float>* %ptr, i64 2 %ptr1 = getelementptr inbounds <2 x float>, <2 x float>* %ptr, i64 3 Index: test/CodeGen/X86/oddshuffles.ll =================================================================== --- test/CodeGen/X86/oddshuffles.ll +++ test/CodeGen/X86/oddshuffles.ll @@ -105,10 +105,10 @@ ; ; AVX1-LABEL: v3i32: ; AVX1: # %bb.0: -; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,0,1,1] -; AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm0[0,1],xmm1[2,3],xmm0[4,5,6,7] -; AVX1-NEXT: vpextrd $2, %xmm0, 8(%rdi) -; AVX1-NEXT: vmovq %xmm1, (%rdi) +; AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm1[0,0,1,1] +; AVX1-NEXT: vblendps {{.*#+}} xmm1 = xmm0[0],xmm1[1],xmm0[2,3] +; AVX1-NEXT: vextractps $2, %xmm0, 8(%rdi) +; AVX1-NEXT: vmovlps %xmm1, (%rdi) ; AVX1-NEXT: retq ; ; AVX2-LABEL: v3i32: @@ -121,10 +121,10 @@ ; ; XOP-LABEL: v3i32: ; XOP: # %bb.0: -; XOP-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,0,1,1] -; XOP-NEXT: vpblendw {{.*#+}} xmm1 = xmm0[0,1],xmm1[2,3],xmm0[4,5,6,7] -; XOP-NEXT: vpextrd $2, %xmm0, 8(%rdi) -; XOP-NEXT: vmovq %xmm1, (%rdi) +; XOP-NEXT: vpermilps {{.*#+}} xmm1 = xmm1[0,0,1,1] +; XOP-NEXT: vblendps {{.*#+}} xmm1 = xmm0[0],xmm1[1],xmm0[2,3] +; XOP-NEXT: vextractps $2, %xmm0, 8(%rdi) +; XOP-NEXT: vmovlps %xmm1, (%rdi) ; XOP-NEXT: retq %r = shufflevector <2 x i32> %a, <2 x i32> %b, <3 x i32> store <3 x i32> %r, <3 x i32>* %p @@ -665,38 +665,38 @@ ; AVX1-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,3],xmm3[3,3] ; AVX1-NEXT: vpermilpd {{.*#+}} xmm1 = xmm1[1,1] ; AVX1-NEXT: vblendps {{.*#+}} xmm0 = xmm1[0],xmm0[1,2],xmm1[3] -; AVX1-NEXT: vmovapd %xmm0, 32(%rdi) +; AVX1-NEXT: vmovaps %xmm0, 32(%rdi) ; AVX1-NEXT: vmovaps %ymm2, (%rdi) ; AVX1-NEXT: vzeroupper ; AVX1-NEXT: retq ; ; AVX2-SLOW-LABEL: v12i32: ; AVX2-SLOW: # %bb.0: -; AVX2-SLOW-NEXT: vpermilps {{.*#+}} xmm2 = xmm1[2,3,2,3] -; AVX2-SLOW-NEXT: vpermilps {{.*#+}} ymm3 = ymm0[3,3,2,3,7,7,6,7] -; AVX2-SLOW-NEXT: vpermpd {{.*#+}} ymm3 = ymm3[0,2,2,3] -; AVX2-SLOW-NEXT: vblendps {{.*#+}} xmm2 = xmm2[0],xmm3[1,2],xmm2[3] -; AVX2-SLOW-NEXT: vmovaps {{.*#+}} ymm3 = <0,4,u,1,5,u,2,6> -; AVX2-SLOW-NEXT: vpermps %ymm0, %ymm3, %ymm0 -; AVX2-SLOW-NEXT: vbroadcastsd %xmm1, %ymm1 -; AVX2-SLOW-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0,1],ymm1[2],ymm0[3,4],ymm1[5],ymm0[6,7] -; AVX2-SLOW-NEXT: vmovaps %ymm0, (%rdi) -; AVX2-SLOW-NEXT: vmovaps %xmm2, 32(%rdi) +; AVX2-SLOW-NEXT: vpshufd {{.*#+}} xmm2 = xmm1[2,3,2,3] +; AVX2-SLOW-NEXT: vpshufd {{.*#+}} ymm3 = ymm0[3,3,2,3,7,7,6,7] +; AVX2-SLOW-NEXT: vpermq {{.*#+}} ymm3 = ymm3[0,2,2,3] +; AVX2-SLOW-NEXT: vpblendd {{.*#+}} xmm2 = xmm2[0],xmm3[1,2],xmm2[3] +; AVX2-SLOW-NEXT: vmovdqa {{.*#+}} ymm3 = <0,4,u,1,5,u,2,6> +; AVX2-SLOW-NEXT: vpermd %ymm0, %ymm3, %ymm0 +; AVX2-SLOW-NEXT: vpbroadcastq %xmm1, %ymm1 +; AVX2-SLOW-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2],ymm0[3,4],ymm1[5],ymm0[6,7] +; AVX2-SLOW-NEXT: vmovdqa %ymm0, (%rdi) +; AVX2-SLOW-NEXT: vmovdqa %xmm2, 32(%rdi) ; AVX2-SLOW-NEXT: vzeroupper ; AVX2-SLOW-NEXT: retq ; ; AVX2-FAST-LABEL: v12i32: ; AVX2-FAST: # %bb.0: -; AVX2-FAST-NEXT: vmovaps {{.*#+}} ymm2 = <0,4,u,1,5,u,2,6> -; AVX2-FAST-NEXT: vpermps %ymm0, %ymm2, %ymm2 -; AVX2-FAST-NEXT: vbroadcastsd %xmm1, %ymm3 -; AVX2-FAST-NEXT: vblendps {{.*#+}} ymm2 = ymm2[0,1],ymm3[2],ymm2[3,4],ymm3[5],ymm2[6,7] -; AVX2-FAST-NEXT: vmovaps {{.*#+}} ymm3 = [3,3,7,7,7,7,6,7] -; AVX2-FAST-NEXT: vpermps %ymm0, %ymm3, %ymm0 -; AVX2-FAST-NEXT: vpermilps {{.*#+}} xmm1 = xmm1[2,3,2,3] -; AVX2-FAST-NEXT: vblendps {{.*#+}} xmm0 = xmm1[0],xmm0[1,2],xmm1[3] -; AVX2-FAST-NEXT: vmovaps %xmm0, 32(%rdi) -; AVX2-FAST-NEXT: vmovaps %ymm2, (%rdi) +; AVX2-FAST-NEXT: vmovdqa {{.*#+}} ymm2 = <0,4,u,1,5,u,2,6> +; AVX2-FAST-NEXT: vpermd %ymm0, %ymm2, %ymm2 +; AVX2-FAST-NEXT: vpbroadcastq %xmm1, %ymm3 +; AVX2-FAST-NEXT: vpblendd {{.*#+}} ymm2 = ymm2[0,1],ymm3[2],ymm2[3,4],ymm3[5],ymm2[6,7] +; AVX2-FAST-NEXT: vmovdqa {{.*#+}} ymm3 = [3,3,7,7,7,7,6,7] +; AVX2-FAST-NEXT: vpermd %ymm0, %ymm3, %ymm0 +; AVX2-FAST-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[2,3,2,3] +; AVX2-FAST-NEXT: vpblendd {{.*#+}} xmm0 = xmm1[0],xmm0[1,2],xmm1[3] +; AVX2-FAST-NEXT: vmovdqa %xmm0, 32(%rdi) +; AVX2-FAST-NEXT: vmovdqa %ymm2, (%rdi) ; AVX2-FAST-NEXT: vzeroupper ; AVX2-FAST-NEXT: retq ; @@ -711,7 +711,7 @@ ; XOP-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,3],xmm3[3,3] ; XOP-NEXT: vpermilpd {{.*#+}} xmm1 = xmm1[1,1] ; XOP-NEXT: vblendps {{.*#+}} xmm0 = xmm1[0],xmm0[1,2],xmm1[3] -; XOP-NEXT: vmovapd %xmm0, 32(%rdi) +; XOP-NEXT: vmovaps %xmm0, 32(%rdi) ; XOP-NEXT: vmovaps %ymm2, (%rdi) ; XOP-NEXT: vzeroupper ; XOP-NEXT: retq @@ -1381,7 +1381,7 @@ ; SSE42-NEXT: pblendw {{.*#+}} xmm7 = xmm7[0,1],xmm4[2,3],xmm7[4,5,6,7] ; SSE42-NEXT: pshufd {{.*#+}} xmm6 = xmm4[2,3,0,1] ; SSE42-NEXT: shufps {{.*#+}} xmm4 = xmm4[0,3],xmm2[2,3] -; SSE42-NEXT: pblendw {{.*#+}} xmm4 = xmm4[0,1,2,3,4,5],xmm8[6,7] +; SSE42-NEXT: blendps {{.*#+}} xmm4 = xmm4[0,1,2],xmm8[3] ; SSE42-NEXT: movdqa %xmm10, %xmm1 ; SSE42-NEXT: pblendw {{.*#+}} xmm1 = xmm1[0,1],xmm5[2,3],xmm1[4,5,6,7] ; SSE42-NEXT: pshufd {{.*#+}} xmm0 = xmm5[2,3,0,1] @@ -1401,7 +1401,7 @@ ; SSE42-NEXT: pshufd {{.*#+}} xmm2 = xmm9[0,1,0,3] ; SSE42-NEXT: pblendw {{.*#+}} xmm2 = xmm0[0,1,2,3],xmm2[4,5,6,7] ; SSE42-NEXT: movdqu %xmm3, 16(%rsi) -; SSE42-NEXT: movdqu %xmm4, (%rsi) +; SSE42-NEXT: movups %xmm4, (%rsi) ; SSE42-NEXT: movdqu %xmm5, 16(%rdx) ; SSE42-NEXT: movdqu %xmm7, (%rdx) ; SSE42-NEXT: movdqu %xmm2, 16(%rcx) @@ -1422,7 +1422,7 @@ ; AVX1-NEXT: vpermilps {{.*#+}} xmm5 = xmm5[0,3,2,1] ; AVX1-NEXT: vpermilps {{.*#+}} xmm6 = xmm6[0,3,2,3] ; AVX1-NEXT: vinsertf128 $1, %xmm6, %ymm5, %ymm5 -; AVX1-NEXT: vblendpd {{.*#+}} ymm4 = ymm5[0,1,2],ymm4[3] +; AVX1-NEXT: vblendps {{.*#+}} ymm4 = ymm5[0,1,2,3,4,5],ymm4[6,7] ; AVX1-NEXT: vblendps {{.*#+}} xmm5 = xmm2[0,1],xmm3[2],xmm2[3] ; AVX1-NEXT: vpermilps {{.*#+}} xmm5 = xmm5[0,0,3,2] ; AVX1-NEXT: vinsertf128 $1, %xmm5, %ymm0, %ymm5 @@ -1442,7 +1442,7 @@ ; AVX1-NEXT: vpermilpd {{.*#+}} xmm1 = xmm1[1,0] ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 ; AVX1-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0,1,2,3,4],ymm2[5,6,7] -; AVX1-NEXT: vmovupd %ymm4, (%rsi) +; AVX1-NEXT: vmovups %ymm4, (%rsi) ; AVX1-NEXT: vmovups %ymm5, (%rdx) ; AVX1-NEXT: vmovups %ymm0, (%rcx) ; AVX1-NEXT: vzeroupper @@ -1520,7 +1520,7 @@ ; XOP-NEXT: vpermilps {{.*#+}} xmm5 = xmm5[0,3,2,1] ; XOP-NEXT: vpermilps {{.*#+}} xmm6 = xmm6[0,3,2,3] ; XOP-NEXT: vinsertf128 $1, %xmm6, %ymm5, %ymm5 -; XOP-NEXT: vblendpd {{.*#+}} ymm4 = ymm5[0,1,2],ymm4[3] +; XOP-NEXT: vblendps {{.*#+}} ymm4 = ymm5[0,1,2,3,4,5],ymm4[6,7] ; XOP-NEXT: vblendps {{.*#+}} xmm5 = xmm2[0,1],xmm3[2],xmm2[3] ; XOP-NEXT: vpermilps {{.*#+}} xmm5 = xmm5[0,0,3,2] ; XOP-NEXT: vinsertf128 $1, %xmm5, %ymm0, %ymm5 @@ -1540,7 +1540,7 @@ ; XOP-NEXT: vpermilpd {{.*#+}} xmm1 = xmm1[1,0] ; XOP-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 ; XOP-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0,1,2,3,4],ymm2[5,6,7] -; XOP-NEXT: vmovupd %ymm4, (%rsi) +; XOP-NEXT: vmovups %ymm4, (%rsi) ; XOP-NEXT: vmovups %ymm5, (%rdx) ; XOP-NEXT: vmovups %ymm0, (%rcx) ; XOP-NEXT: vzeroupper @@ -1674,8 +1674,8 @@ ; AVX1-NEXT: vblendps {{.*#+}} ymm0 = ymm2[0],ymm0[1],ymm2[2,3],ymm0[4],ymm2[5,6],ymm0[7] ; AVX1-NEXT: vpermilps {{.*#+}} ymm1 = ymm1[0,0,3,3,4,4,7,7] ; AVX1-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0,1],ymm1[2],ymm0[3,4],ymm1[5],ymm0[6,7] -; AVX1-NEXT: vmovupd %ymm0, 32(%rdi) -; AVX1-NEXT: vmovupd %ymm4, 64(%rdi) +; AVX1-NEXT: vmovups %ymm0, 32(%rdi) +; AVX1-NEXT: vmovups %ymm4, 64(%rdi) ; AVX1-NEXT: vmovups %ymm3, (%rdi) ; AVX1-NEXT: vzeroupper ; AVX1-NEXT: retq @@ -1763,7 +1763,7 @@ ; XOP-NEXT: vpermilps {{.*#+}} ymm1 = ymm1[0,0,3,3,4,4,7,7] ; XOP-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0,1],ymm1[2],ymm0[3,4],ymm1[5],ymm0[6,7] ; XOP-NEXT: vmovups %ymm0, 32(%rdi) -; XOP-NEXT: vmovupd %ymm4, 64(%rdi) +; XOP-NEXT: vmovups %ymm4, 64(%rdi) ; XOP-NEXT: vmovups %ymm3, (%rdi) ; XOP-NEXT: vzeroupper ; XOP-NEXT: retq Index: test/CodeGen/X86/pr31956.ll =================================================================== --- test/CodeGen/X86/pr31956.ll +++ test/CodeGen/X86/pr31956.ll @@ -10,7 +10,7 @@ ; CHECK-LABEL: foo: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero -; CHECK-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0],mem[1,2,3] +; CHECK-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0,1],mem[2,3,4,5,6,7] ; CHECK-NEXT: vextractf128 $1, %ymm0, %xmm1 ; CHECK-NEXT: vshufps {{.*#+}} xmm0 = xmm1[0,2],xmm0[2,0] ; CHECK-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,2,1,3] Index: test/CodeGen/X86/split-extend-vector-inreg.ll =================================================================== --- test/CodeGen/X86/split-extend-vector-inreg.ll +++ test/CodeGen/X86/split-extend-vector-inreg.ll @@ -7,8 +7,8 @@ ; X32: # %bb.0: # %BB ; X32-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[2,3,0,1] ; X32-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 -; X32-NEXT: vxorpd %xmm1, %xmm1, %xmm1 -; X32-NEXT: vblendpd {{.*#+}} ymm0 = ymm1[0,1],ymm0[2],ymm1[3] +; X32-NEXT: vxorps %xmm1, %xmm1, %xmm1 +; X32-NEXT: vblendps {{.*#+}} ymm0 = ymm1[0,1,2,3],ymm0[4,5],ymm1[6,7] ; X32-NEXT: movb $1, %al ; X32-NEXT: .p2align 4, 0x90 ; X32-NEXT: .LBB0_1: # %CF @@ -22,8 +22,8 @@ ; X64: # %bb.0: # %BB ; X64-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[2,3,0,1] ; X64-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 -; X64-NEXT: vxorpd %xmm1, %xmm1, %xmm1 -; X64-NEXT: vblendpd {{.*#+}} ymm0 = ymm1[0,1],ymm0[2],ymm1[3] +; X64-NEXT: vxorps %xmm1, %xmm1, %xmm1 +; X64-NEXT: vblendps {{.*#+}} ymm0 = ymm1[0,1,2,3],ymm0[4,5],ymm1[6,7] ; X64-NEXT: movb $1, %al ; X64-NEXT: .p2align 4, 0x90 ; X64-NEXT: .LBB0_1: # %CF Index: test/CodeGen/X86/sse-scalar-fp-arith.ll =================================================================== --- test/CodeGen/X86/sse-scalar-fp-arith.ll +++ test/CodeGen/X86/sse-scalar-fp-arith.ll @@ -1152,8 +1152,8 @@ ; SSE41-NEXT: testb $1, %dil ; SSE41-NEXT: jne .LBB63_1 ; SSE41-NEXT: # %bb.2: -; SSE41-NEXT: movapd %xmm2, %xmm1 -; SSE41-NEXT: blendpd {{.*#+}} xmm0 = xmm1[0],xmm0[1] +; SSE41-NEXT: movaps %xmm2, %xmm1 +; SSE41-NEXT: blendps {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3] ; SSE41-NEXT: retq ; SSE41-NEXT: .LBB63_1: ; SSE41-NEXT: addsd %xmm0, %xmm1 Index: test/CodeGen/X86/sse41-intrinsics-fast-isel.ll =================================================================== --- test/CodeGen/X86/sse41-intrinsics-fast-isel.ll +++ test/CodeGen/X86/sse41-intrinsics-fast-isel.ll @@ -24,12 +24,12 @@ define <2 x double> @test_mm_blend_pd(<2 x double> %a0, <2 x double> %a1) { ; X32-LABEL: test_mm_blend_pd: ; X32: # %bb.0: -; X32-NEXT: blendpd {{.*#+}} xmm0 = xmm0[0],xmm1[1] +; X32-NEXT: blendps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3] ; X32-NEXT: retl ; ; X64-LABEL: test_mm_blend_pd: ; X64: # %bb.0: -; X64-NEXT: blendpd {{.*#+}} xmm0 = xmm0[0],xmm1[1] +; X64-NEXT: blendps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3] ; X64-NEXT: retq %res = shufflevector <2 x double> %a0, <2 x double> %a1, <2 x i32> ret <2 x double> %res Index: test/CodeGen/X86/sse41-intrinsics-x86-upgrade.ll =================================================================== --- test/CodeGen/X86/sse41-intrinsics-x86-upgrade.ll +++ test/CodeGen/X86/sse41-intrinsics-x86-upgrade.ll @@ -7,7 +7,7 @@ define <2 x double> @test_x86_sse41_blendpd(<2 x double> %a0, <2 x double> %a1) { ; CHECK-LABEL: test_x86_sse41_blendpd: ; CHECK: ## %bb.0: -; CHECK-NEXT: blendpd {{.*#+}} xmm0 = xmm0[0],xmm1[1] +; CHECK-NEXT: blendps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3] ; CHECK-NEXT: retl %res = call <2 x double> @llvm.x86.sse41.blendpd(<2 x double> %a0, <2 x double> %a1, i32 6) ; <<2 x double>> [#uses=1] ret <2 x double> %res Index: test/CodeGen/X86/sse41.ll =================================================================== --- test/CodeGen/X86/sse41.ll +++ test/CodeGen/X86/sse41.ll @@ -564,14 +564,14 @@ define <4 x i32> @i32_shuf_XYZ0(<4 x i32> %x, <4 x i32> %a) { ; X32-LABEL: i32_shuf_XYZ0: ; X32: ## %bb.0: -; X32-NEXT: pxor %xmm1, %xmm1 -; X32-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5],xmm1[6,7] +; X32-NEXT: xorps %xmm1, %xmm1 +; X32-NEXT: blendps {{.*#+}} xmm0 = xmm0[0,1,2],xmm1[3] ; X32-NEXT: retl ; ; X64-LABEL: i32_shuf_XYZ0: ; X64: ## %bb.0: -; X64-NEXT: pxor %xmm1, %xmm1 -; X64-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5],xmm1[6,7] +; X64-NEXT: xorps %xmm1, %xmm1 +; X64-NEXT: blendps {{.*#+}} xmm0 = xmm0[0,1,2],xmm1[3] ; X64-NEXT: retq %vecext = extractelement <4 x i32> %x, i32 0 %vecinit = insertelement <4 x i32> undef, i32 %vecext, i32 0 Index: test/CodeGen/X86/vec_extract-avx.ll =================================================================== --- test/CodeGen/X86/vec_extract-avx.ll +++ test/CodeGen/X86/vec_extract-avx.ll @@ -144,19 +144,19 @@ ; X32: # %bb.0: ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax ; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X32-NEXT: vmovupd (%ecx), %xmm0 -; X32-NEXT: vxorpd %xmm1, %xmm1, %xmm1 -; X32-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3] -; X32-NEXT: vmovapd %ymm0, (%eax) +; X32-NEXT: vmovups (%ecx), %xmm0 +; X32-NEXT: vxorps %xmm1, %xmm1, %xmm1 +; X32-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3,4,5,6,7] +; X32-NEXT: vmovaps %ymm0, (%eax) ; X32-NEXT: vzeroupper ; X32-NEXT: retl ; ; X64-LABEL: legal_vzmovl_2i64_4i64: ; X64: # %bb.0: -; X64-NEXT: vmovupd (%rdi), %xmm0 -; X64-NEXT: vxorpd %xmm1, %xmm1, %xmm1 -; X64-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3] -; X64-NEXT: vmovapd %ymm0, (%rsi) +; X64-NEXT: vmovups (%rdi), %xmm0 +; X64-NEXT: vxorps %xmm1, %xmm1, %xmm1 +; X64-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3,4,5,6,7] +; X64-NEXT: vmovaps %ymm0, (%rsi) ; X64-NEXT: vzeroupper ; X64-NEXT: retq %ld = load <2 x i64>, <2 x i64>* %in, align 8 @@ -196,19 +196,19 @@ ; X32: # %bb.0: ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax ; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X32-NEXT: vmovupd (%ecx), %xmm0 -; X32-NEXT: vxorpd %xmm1, %xmm1, %xmm1 -; X32-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3] -; X32-NEXT: vmovapd %ymm0, (%eax) +; X32-NEXT: vmovups (%ecx), %xmm0 +; X32-NEXT: vxorps %xmm1, %xmm1, %xmm1 +; X32-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3,4,5,6,7] +; X32-NEXT: vmovaps %ymm0, (%eax) ; X32-NEXT: vzeroupper ; X32-NEXT: retl ; ; X64-LABEL: legal_vzmovl_2f64_4f64: ; X64: # %bb.0: -; X64-NEXT: vmovupd (%rdi), %xmm0 -; X64-NEXT: vxorpd %xmm1, %xmm1, %xmm1 -; X64-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3] -; X64-NEXT: vmovapd %ymm0, (%rsi) +; X64-NEXT: vmovups (%rdi), %xmm0 +; X64-NEXT: vxorps %xmm1, %xmm1, %xmm1 +; X64-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3,4,5,6,7] +; X64-NEXT: vmovaps %ymm0, (%rsi) ; X64-NEXT: vzeroupper ; X64-NEXT: retq %ld = load <2 x double>, <2 x double>* %in, align 8 Index: test/CodeGen/X86/vector-blend.ll =================================================================== --- test/CodeGen/X86/vector-blend.ll +++ test/CodeGen/X86/vector-blend.ll @@ -76,18 +76,13 @@ ; ; SSE41-LABEL: vsel_4xi8: ; SSE41: # %bb.0: # %entry -; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5],xmm0[6,7] +; SSE41-NEXT: blendps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2],xmm0[3] ; SSE41-NEXT: retq ; -; AVX1-LABEL: vsel_4xi8: -; AVX1: # %bb.0: # %entry -; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5],xmm0[6,7] -; AVX1-NEXT: retq -; -; AVX2-LABEL: vsel_4xi8: -; AVX2: # %bb.0: # %entry -; AVX2-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2],xmm0[3] -; AVX2-NEXT: retq +; AVX-LABEL: vsel_4xi8: +; AVX: # %bb.0: # %entry +; AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2],xmm0[3] +; AVX-NEXT: retq entry: %vsel = select <4 x i1> , <4 x i8> %v1, <4 x i8> %v2 ret <4 x i8> %vsel @@ -110,18 +105,13 @@ ; ; SSE41-LABEL: vsel_4xi16: ; SSE41: # %bb.0: # %entry -; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3],xmm0[4,5,6,7] +; SSE41-NEXT: blendps {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2,3] ; SSE41-NEXT: retq ; -; AVX1-LABEL: vsel_4xi16: -; AVX1: # %bb.0: # %entry -; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3],xmm0[4,5,6,7] -; AVX1-NEXT: retq -; -; AVX2-LABEL: vsel_4xi16: -; AVX2: # %bb.0: # %entry -; AVX2-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2,3] -; AVX2-NEXT: retq +; AVX-LABEL: vsel_4xi16: +; AVX: # %bb.0: # %entry +; AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2,3] +; AVX-NEXT: retq entry: %vsel = select <4 x i1> , <4 x i16> %v1, <4 x i16> %v2 ret <4 x i16> %vsel @@ -144,18 +134,13 @@ ; ; SSE41-LABEL: vsel_i32: ; SSE41: # %bb.0: # %entry -; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3],xmm0[4,5],xmm1[6,7] +; SSE41-NEXT: blendps {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3] ; SSE41-NEXT: retq ; -; AVX1-LABEL: vsel_i32: -; AVX1: # %bb.0: # %entry -; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3],xmm0[4,5],xmm1[6,7] -; AVX1-NEXT: retq -; -; AVX2-LABEL: vsel_i32: -; AVX2: # %bb.0: # %entry -; AVX2-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3] -; AVX2-NEXT: retq +; AVX-LABEL: vsel_i32: +; AVX: # %bb.0: # %entry +; AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3] +; AVX-NEXT: retq entry: %vsel = select <4 x i1> , <4 x i32> %v1, <4 x i32> %v2 ret <4 x i32> %vsel @@ -176,12 +161,12 @@ ; ; SSE41-LABEL: vsel_double: ; SSE41: # %bb.0: # %entry -; SSE41-NEXT: blendpd {{.*#+}} xmm0 = xmm0[0],xmm1[1] +; SSE41-NEXT: blendps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3] ; SSE41-NEXT: retq ; ; AVX-LABEL: vsel_double: ; AVX: # %bb.0: # %entry -; AVX-NEXT: vblendpd {{.*#+}} xmm0 = xmm0[0],xmm1[1] +; AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3] ; AVX-NEXT: retq entry: %vsel = select <2 x i1> , <2 x double> %v1, <2 x double> %v2 @@ -203,18 +188,13 @@ ; ; SSE41-LABEL: vsel_i64: ; SSE41: # %bb.0: # %entry -; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5,6,7] +; SSE41-NEXT: blendps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3] ; SSE41-NEXT: retq ; -; AVX1-LABEL: vsel_i64: -; AVX1: # %bb.0: # %entry -; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5,6,7] -; AVX1-NEXT: retq -; -; AVX2-LABEL: vsel_i64: -; AVX2: # %bb.0: # %entry -; AVX2-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3] -; AVX2-NEXT: retq +; AVX-LABEL: vsel_i64: +; AVX: # %bb.0: # %entry +; AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3] +; AVX-NEXT: retq entry: %vsel = select <2 x i1> , <2 x i64> %v1, <2 x i64> %v2 ret <2 x i64> %vsel @@ -342,8 +322,8 @@ ; ; SSE41-LABEL: vsel_i328: ; SSE41: # %bb.0: # %entry -; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1],xmm2[2,3,4,5,6,7] -; SSE41-NEXT: pblendw {{.*#+}} xmm1 = xmm1[0,1],xmm3[2,3,4,5,6,7] +; SSE41-NEXT: blendps {{.*#+}} xmm0 = xmm0[0],xmm2[1,2,3] +; SSE41-NEXT: blendps {{.*#+}} xmm1 = xmm1[0],xmm3[1,2,3] ; SSE41-NEXT: retq ; ; AVX-LABEL: vsel_i328: @@ -378,16 +358,16 @@ ; ; SSE41-LABEL: vsel_double8: ; SSE41: # %bb.0: # %entry -; SSE41-NEXT: blendpd {{.*#+}} xmm0 = xmm0[0],xmm4[1] -; SSE41-NEXT: blendpd {{.*#+}} xmm2 = xmm2[0],xmm6[1] +; SSE41-NEXT: blendps {{.*#+}} xmm0 = xmm0[0,1],xmm4[2,3] +; SSE41-NEXT: blendps {{.*#+}} xmm2 = xmm2[0,1],xmm6[2,3] ; SSE41-NEXT: movaps %xmm5, %xmm1 ; SSE41-NEXT: movaps %xmm7, %xmm3 ; SSE41-NEXT: retq ; ; AVX-LABEL: vsel_double8: ; AVX: # %bb.0: # %entry -; AVX-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0],ymm2[1,2,3] -; AVX-NEXT: vblendpd {{.*#+}} ymm1 = ymm1[0],ymm3[1,2,3] +; AVX-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0,1],ymm2[2,3,4,5,6,7] +; AVX-NEXT: vblendps {{.*#+}} ymm1 = ymm1[0,1],ymm3[2,3,4,5,6,7] ; AVX-NEXT: retq entry: %vsel = select <8 x i1> , <8 x double> %v1, <8 x double> %v2 @@ -417,23 +397,17 @@ ; ; SSE41-LABEL: vsel_i648: ; SSE41: # %bb.0: # %entry -; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm4[4,5,6,7] -; SSE41-NEXT: pblendw {{.*#+}} xmm2 = xmm2[0,1,2,3],xmm6[4,5,6,7] +; SSE41-NEXT: blendps {{.*#+}} xmm0 = xmm0[0,1],xmm4[2,3] +; SSE41-NEXT: blendps {{.*#+}} xmm2 = xmm2[0,1],xmm6[2,3] ; SSE41-NEXT: movaps %xmm5, %xmm1 ; SSE41-NEXT: movaps %xmm7, %xmm3 ; SSE41-NEXT: retq ; -; AVX1-LABEL: vsel_i648: -; AVX1: # %bb.0: # %entry -; AVX1-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0],ymm2[1,2,3] -; AVX1-NEXT: vblendpd {{.*#+}} ymm1 = ymm1[0],ymm3[1,2,3] -; AVX1-NEXT: retq -; -; AVX2-LABEL: vsel_i648: -; AVX2: # %bb.0: # %entry -; AVX2-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0,1],ymm2[2,3,4,5,6,7] -; AVX2-NEXT: vblendps {{.*#+}} ymm1 = ymm1[0,1],ymm3[2,3,4,5,6,7] -; AVX2-NEXT: retq +; AVX-LABEL: vsel_i648: +; AVX: # %bb.0: # %entry +; AVX-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0,1],ymm2[2,3,4,5,6,7] +; AVX-NEXT: vblendps {{.*#+}} ymm1 = ymm1[0,1],ymm3[2,3,4,5,6,7] +; AVX-NEXT: retq entry: %vsel = select <8 x i1> , <8 x i64> %v1, <8 x i64> %v2 ret <8 x i64> %vsel @@ -458,13 +432,13 @@ ; ; SSE41-LABEL: vsel_double4: ; SSE41: # %bb.0: # %entry -; SSE41-NEXT: blendpd {{.*#+}} xmm0 = xmm0[0],xmm2[1] -; SSE41-NEXT: blendpd {{.*#+}} xmm1 = xmm1[0],xmm3[1] +; SSE41-NEXT: blendps {{.*#+}} xmm0 = xmm0[0,1],xmm2[2,3] +; SSE41-NEXT: blendps {{.*#+}} xmm1 = xmm1[0,1],xmm3[2,3] ; SSE41-NEXT: retq ; ; AVX-LABEL: vsel_double4: ; AVX: # %bb.0: # %entry -; AVX-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3] +; AVX-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3],ymm0[4,5],ymm1[6,7] ; AVX-NEXT: retq entry: %vsel = select <4 x i1> , <4 x double> %v1, <4 x double> %v2 @@ -568,13 +542,13 @@ ; ; SSE41-LABEL: constant_blendvpd_avx: ; SSE41: # %bb.0: # %entry -; SSE41-NEXT: blendpd {{.*#+}} xmm1 = xmm1[0],xmm3[1] +; SSE41-NEXT: blendps {{.*#+}} xmm1 = xmm1[0,1],xmm3[2,3] ; SSE41-NEXT: movaps %xmm2, %xmm0 ; SSE41-NEXT: retq ; ; AVX-LABEL: constant_blendvpd_avx: ; AVX: # %bb.0: # %entry -; AVX-NEXT: vblendpd {{.*#+}} ymm0 = ymm1[0,1],ymm0[2],ymm1[3] +; AVX-NEXT: vblendps {{.*#+}} ymm0 = ymm1[0,1,2,3],ymm0[4,5],ymm1[6,7] ; AVX-NEXT: retq entry: %select = select <4 x i1> , <4 x double> %xy, <4 x double> %ab @@ -752,12 +726,12 @@ ; ; SSE41-LABEL: blend_shufflevector_4xdouble: ; SSE41: # %bb.0: # %entry -; SSE41-NEXT: blendpd {{.*#+}} xmm0 = xmm0[0],xmm2[1] +; SSE41-NEXT: blendps {{.*#+}} xmm0 = xmm0[0,1],xmm2[2,3] ; SSE41-NEXT: retq ; ; AVX-LABEL: blend_shufflevector_4xdouble: ; AVX: # %bb.0: # %entry -; AVX-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2,3] +; AVX-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3],ymm0[4,5,6,7] ; AVX-NEXT: retq entry: %select = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> @@ -779,19 +753,14 @@ ; ; SSE41-LABEL: blend_shufflevector_4xi64: ; SSE41: # %bb.0: # %entry -; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm2[0,1,2,3],xmm0[4,5,6,7] +; SSE41-NEXT: blendps {{.*#+}} xmm0 = xmm2[0,1],xmm0[2,3] ; SSE41-NEXT: movaps %xmm3, %xmm1 ; SSE41-NEXT: retq ; -; AVX1-LABEL: blend_shufflevector_4xi64: -; AVX1: # %bb.0: # %entry -; AVX1-NEXT: vblendpd {{.*#+}} ymm0 = ymm1[0],ymm0[1],ymm1[2,3] -; AVX1-NEXT: retq -; -; AVX2-LABEL: blend_shufflevector_4xi64: -; AVX2: # %bb.0: # %entry -; AVX2-NEXT: vblendps {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,3],ymm1[4,5,6,7] -; AVX2-NEXT: retq +; AVX-LABEL: blend_shufflevector_4xi64: +; AVX: # %bb.0: # %entry +; AVX-NEXT: vblendps {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,3],ymm1[4,5,6,7] +; AVX-NEXT: retq entry: %select = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> ret <4 x i64> %select Index: test/CodeGen/X86/vector-extend-inreg.ll =================================================================== --- test/CodeGen/X86/vector-extend-inreg.ll +++ test/CodeGen/X86/vector-extend-inreg.ll @@ -72,16 +72,16 @@ ; X32-AVX-NEXT: subl $384, %esp # imm = 0x180 ; X32-AVX-NEXT: movl 40(%ebp), %ecx ; X32-AVX-NEXT: vbroadcastsd 32(%ebp), %ymm0 -; X32-AVX-NEXT: vxorpd %xmm1, %xmm1, %xmm1 -; X32-AVX-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3] -; X32-AVX-NEXT: vmovapd %ymm1, {{[0-9]+}}(%esp) -; X32-AVX-NEXT: vmovapd %ymm1, {{[0-9]+}}(%esp) -; X32-AVX-NEXT: vmovapd %ymm1, {{[0-9]+}}(%esp) -; X32-AVX-NEXT: vmovapd %ymm0, {{[0-9]+}}(%esp) -; X32-AVX-NEXT: vmovapd %ymm1, {{[0-9]+}}(%esp) -; X32-AVX-NEXT: vmovapd %ymm1, {{[0-9]+}}(%esp) -; X32-AVX-NEXT: vmovapd %ymm1, (%esp) -; X32-AVX-NEXT: vmovapd %ymm0, {{[0-9]+}}(%esp) +; X32-AVX-NEXT: vxorps %xmm1, %xmm1, %xmm1 +; X32-AVX-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3,4,5,6,7] +; X32-AVX-NEXT: vmovaps %ymm1, {{[0-9]+}}(%esp) +; X32-AVX-NEXT: vmovaps %ymm1, {{[0-9]+}}(%esp) +; X32-AVX-NEXT: vmovaps %ymm1, {{[0-9]+}}(%esp) +; X32-AVX-NEXT: vmovaps %ymm0, {{[0-9]+}}(%esp) +; X32-AVX-NEXT: vmovaps %ymm1, {{[0-9]+}}(%esp) +; X32-AVX-NEXT: vmovaps %ymm1, {{[0-9]+}}(%esp) +; X32-AVX-NEXT: vmovaps %ymm1, (%esp) +; X32-AVX-NEXT: vmovaps %ymm0, {{[0-9]+}}(%esp) ; X32-AVX-NEXT: leal (%ecx,%ecx), %eax ; X32-AVX-NEXT: andl $31, %eax ; X32-AVX-NEXT: movl 128(%esp,%eax,4), %eax @@ -101,12 +101,12 @@ ; X64-AVX-NEXT: subq $256, %rsp # imm = 0x100 ; X64-AVX-NEXT: # kill: def %edi killed %edi def %rdi ; X64-AVX-NEXT: vpermpd {{.*#+}} ymm0 = ymm3[3,1,2,3] -; X64-AVX-NEXT: vxorpd %xmm1, %xmm1, %xmm1 -; X64-AVX-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3] -; X64-AVX-NEXT: vmovapd %ymm1, {{[0-9]+}}(%rsp) -; X64-AVX-NEXT: vmovapd %ymm1, {{[0-9]+}}(%rsp) -; X64-AVX-NEXT: vmovapd %ymm1, (%rsp) -; X64-AVX-NEXT: vmovapd %ymm0, {{[0-9]+}}(%rsp) +; X64-AVX-NEXT: vxorps %xmm1, %xmm1, %xmm1 +; X64-AVX-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3,4,5,6,7] +; X64-AVX-NEXT: vmovaps %ymm1, {{[0-9]+}}(%rsp) +; X64-AVX-NEXT: vmovaps %ymm1, {{[0-9]+}}(%rsp) +; X64-AVX-NEXT: vmovaps %ymm1, (%rsp) +; X64-AVX-NEXT: vmovaps %ymm0, {{[0-9]+}}(%rsp) ; X64-AVX-NEXT: andl $15, %edi ; X64-AVX-NEXT: movq (%rsp,%rdi,8), %rax ; X64-AVX-NEXT: movq %rbp, %rsp Index: test/CodeGen/X86/vector-shuffle-128-v2.ll =================================================================== --- test/CodeGen/X86/vector-shuffle-128-v2.ll +++ test/CodeGen/X86/vector-shuffle-128-v2.ll @@ -240,17 +240,17 @@ ; ; SSE41-LABEL: shuffle_v2f64_03: ; SSE41: # %bb.0: -; SSE41-NEXT: blendpd {{.*#+}} xmm0 = xmm0[0],xmm1[1] +; SSE41-NEXT: blendps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3] ; SSE41-NEXT: retq ; ; AVX1-LABEL: shuffle_v2f64_03: ; AVX1: # %bb.0: -; AVX1-NEXT: vblendpd {{.*#+}} xmm0 = xmm0[0],xmm1[1] +; AVX1-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3] ; AVX1-NEXT: retq ; ; AVX2-LABEL: shuffle_v2f64_03: ; AVX2: # %bb.0: -; AVX2-NEXT: vblendpd {{.*#+}} xmm0 = xmm0[0],xmm1[1] +; AVX2-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3] ; AVX2-NEXT: retq ; ; AVX512VL-LABEL: shuffle_v2f64_03: @@ -278,17 +278,17 @@ ; ; SSE41-LABEL: shuffle_v2f64_21: ; SSE41: # %bb.0: -; SSE41-NEXT: blendpd {{.*#+}} xmm0 = xmm1[0],xmm0[1] +; SSE41-NEXT: blendps {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3] ; SSE41-NEXT: retq ; ; AVX1-LABEL: shuffle_v2f64_21: ; AVX1: # %bb.0: -; AVX1-NEXT: vblendpd {{.*#+}} xmm0 = xmm1[0],xmm0[1] +; AVX1-NEXT: vblendps {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3] ; AVX1-NEXT: retq ; ; AVX2-LABEL: shuffle_v2f64_21: ; AVX2: # %bb.0: -; AVX2-NEXT: vblendpd {{.*#+}} xmm0 = xmm1[0],xmm0[1] +; AVX2-NEXT: vblendps {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3] ; AVX2-NEXT: retq ; ; AVX512VL-LABEL: shuffle_v2f64_21: @@ -376,23 +376,13 @@ ; ; SSE41-LABEL: shuffle_v2i64_03: ; SSE41: # %bb.0: -; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5,6,7] +; SSE41-NEXT: blendps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3] ; SSE41-NEXT: retq ; -; AVX1-LABEL: shuffle_v2i64_03: -; AVX1: # %bb.0: -; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5,6,7] -; AVX1-NEXT: retq -; -; AVX2-LABEL: shuffle_v2i64_03: -; AVX2: # %bb.0: -; AVX2-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3] -; AVX2-NEXT: retq -; -; AVX512VL-LABEL: shuffle_v2i64_03: -; AVX512VL: # %bb.0: -; AVX512VL-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3] -; AVX512VL-NEXT: retq +; AVX-LABEL: shuffle_v2i64_03: +; AVX: # %bb.0: +; AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3] +; AVX-NEXT: retq %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> ret <2 x i64> %shuffle } @@ -417,24 +407,14 @@ ; ; SSE41-LABEL: shuffle_v2i64_03_copy: ; SSE41: # %bb.0: -; SSE41-NEXT: pblendw {{.*#+}} xmm1 = xmm1[0,1,2,3],xmm2[4,5,6,7] -; SSE41-NEXT: movdqa %xmm1, %xmm0 +; SSE41-NEXT: blendps {{.*#+}} xmm1 = xmm1[0,1],xmm2[2,3] +; SSE41-NEXT: movaps %xmm1, %xmm0 ; SSE41-NEXT: retq ; -; AVX1-LABEL: shuffle_v2i64_03_copy: -; AVX1: # %bb.0: -; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm1[0,1,2,3],xmm2[4,5,6,7] -; AVX1-NEXT: retq -; -; AVX2-LABEL: shuffle_v2i64_03_copy: -; AVX2: # %bb.0: -; AVX2-NEXT: vblendps {{.*#+}} xmm0 = xmm1[0,1],xmm2[2,3] -; AVX2-NEXT: retq -; -; AVX512VL-LABEL: shuffle_v2i64_03_copy: -; AVX512VL: # %bb.0: -; AVX512VL-NEXT: vblendps {{.*#+}} xmm0 = xmm1[0,1],xmm2[2,3] -; AVX512VL-NEXT: retq +; AVX-LABEL: shuffle_v2i64_03_copy: +; AVX: # %bb.0: +; AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm1[0,1],xmm2[2,3] +; AVX-NEXT: retq %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> ret <2 x i64> %shuffle } @@ -573,23 +553,13 @@ ; ; SSE41-LABEL: shuffle_v2i64_21: ; SSE41: # %bb.0: -; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm1[0,1,2,3],xmm0[4,5,6,7] +; SSE41-NEXT: blendps {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3] ; SSE41-NEXT: retq ; -; AVX1-LABEL: shuffle_v2i64_21: -; AVX1: # %bb.0: -; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm1[0,1,2,3],xmm0[4,5,6,7] -; AVX1-NEXT: retq -; -; AVX2-LABEL: shuffle_v2i64_21: -; AVX2: # %bb.0: -; AVX2-NEXT: vblendps {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3] -; AVX2-NEXT: retq -; -; AVX512VL-LABEL: shuffle_v2i64_21: -; AVX512VL: # %bb.0: -; AVX512VL-NEXT: vblendps {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3] -; AVX512VL-NEXT: retq +; AVX-LABEL: shuffle_v2i64_21: +; AVX: # %bb.0: +; AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3] +; AVX-NEXT: retq %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> ret <2 x i64> %shuffle } @@ -614,24 +584,14 @@ ; ; SSE41-LABEL: shuffle_v2i64_21_copy: ; SSE41: # %bb.0: -; SSE41-NEXT: pblendw {{.*#+}} xmm1 = xmm2[0,1,2,3],xmm1[4,5,6,7] -; SSE41-NEXT: movdqa %xmm1, %xmm0 +; SSE41-NEXT: blendps {{.*#+}} xmm1 = xmm2[0,1],xmm1[2,3] +; SSE41-NEXT: movaps %xmm1, %xmm0 ; SSE41-NEXT: retq ; -; AVX1-LABEL: shuffle_v2i64_21_copy: -; AVX1: # %bb.0: -; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm2[0,1,2,3],xmm1[4,5,6,7] -; AVX1-NEXT: retq -; -; AVX2-LABEL: shuffle_v2i64_21_copy: -; AVX2: # %bb.0: -; AVX2-NEXT: vblendps {{.*#+}} xmm0 = xmm2[0,1],xmm1[2,3] -; AVX2-NEXT: retq -; -; AVX512VL-LABEL: shuffle_v2i64_21_copy: -; AVX512VL: # %bb.0: -; AVX512VL-NEXT: vblendps {{.*#+}} xmm0 = xmm2[0,1],xmm1[2,3] -; AVX512VL-NEXT: retq +; AVX-LABEL: shuffle_v2i64_21_copy: +; AVX: # %bb.0: +; AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm2[0,1],xmm1[2,3] +; AVX-NEXT: retq %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> ret <2 x i64> %shuffle } @@ -789,14 +749,14 @@ ; ; SSE41-LABEL: shuffle_v2i64_z1: ; SSE41: # %bb.0: -; SSE41-NEXT: pxor %xmm1, %xmm1 -; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm1[0,1,2,3],xmm0[4,5,6,7] +; SSE41-NEXT: xorps %xmm1, %xmm1 +; SSE41-NEXT: blendps {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3] ; SSE41-NEXT: retq ; ; AVX1-LABEL: shuffle_v2i64_z1: ; AVX1: # %bb.0: -; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm1[0,1,2,3],xmm0[4,5,6,7] +; AVX1-NEXT: vxorps %xmm1, %xmm1, %xmm1 +; AVX1-NEXT: vblendps {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3] ; AVX1-NEXT: retq ; ; AVX2-LABEL: shuffle_v2i64_z1: @@ -906,26 +866,26 @@ ; ; SSE41-LABEL: shuffle_v2f64_z1: ; SSE41: # %bb.0: -; SSE41-NEXT: xorpd %xmm1, %xmm1 -; SSE41-NEXT: blendpd {{.*#+}} xmm0 = xmm1[0],xmm0[1] +; SSE41-NEXT: xorps %xmm1, %xmm1 +; SSE41-NEXT: blendps {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3] ; SSE41-NEXT: retq ; ; AVX1-LABEL: shuffle_v2f64_z1: ; AVX1: # %bb.0: -; AVX1-NEXT: vxorpd %xmm1, %xmm1, %xmm1 -; AVX1-NEXT: vblendpd {{.*#+}} xmm0 = xmm1[0],xmm0[1] +; AVX1-NEXT: vxorps %xmm1, %xmm1, %xmm1 +; AVX1-NEXT: vblendps {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3] ; AVX1-NEXT: retq ; ; AVX2-LABEL: shuffle_v2f64_z1: ; AVX2: # %bb.0: -; AVX2-NEXT: vxorpd %xmm1, %xmm1, %xmm1 -; AVX2-NEXT: vblendpd {{.*#+}} xmm0 = xmm1[0],xmm0[1] +; AVX2-NEXT: vxorps %xmm1, %xmm1, %xmm1 +; AVX2-NEXT: vblendps {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3] ; AVX2-NEXT: retq ; ; AVX512VL-LABEL: shuffle_v2f64_z1: ; AVX512VL: # %bb.0: ; AVX512VL-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; AVX512VL-NEXT: vblendpd {{.*#+}} xmm0 = xmm1[0],xmm0[1] +; AVX512VL-NEXT: vpblendd {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3] ; AVX512VL-NEXT: retq %shuffle = shufflevector <2 x double> %a, <2 x double> zeroinitializer, <2 x i32> ret <2 x double> %shuffle @@ -980,14 +940,14 @@ ; ; SSE41-LABEL: shuffle_v2i64_bitcast_z123: ; SSE41: # %bb.0: -; SSE41-NEXT: pxor %xmm1, %xmm1 -; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3,4,5,6,7] +; SSE41-NEXT: xorps %xmm1, %xmm1 +; SSE41-NEXT: blendps {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3] ; SSE41-NEXT: retq ; ; AVX1-LABEL: shuffle_v2i64_bitcast_z123: ; AVX1: # %bb.0: -; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3,4,5,6,7] +; AVX1-NEXT: vxorps %xmm1, %xmm1, %xmm1 +; AVX1-NEXT: vblendps {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3] ; AVX1-NEXT: retq ; ; AVX2-LABEL: shuffle_v2i64_bitcast_z123: @@ -1222,17 +1182,17 @@ ; ; SSE41-LABEL: insert_reg_lo_v2f64: ; SSE41: # %bb.0: -; SSE41-NEXT: blendpd {{.*#+}} xmm0 = xmm0[0],xmm1[1] +; SSE41-NEXT: blendps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3] ; SSE41-NEXT: retq ; ; AVX1-LABEL: insert_reg_lo_v2f64: ; AVX1: # %bb.0: -; AVX1-NEXT: vblendpd {{.*#+}} xmm0 = xmm0[0],xmm1[1] +; AVX1-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3] ; AVX1-NEXT: retq ; ; AVX2-LABEL: insert_reg_lo_v2f64: ; AVX2: # %bb.0: -; AVX2-NEXT: vblendpd {{.*#+}} xmm0 = xmm0[0],xmm1[1] +; AVX2-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3] ; AVX2-NEXT: retq ; ; AVX512VL-LABEL: insert_reg_lo_v2f64: Index: test/CodeGen/X86/vector-shuffle-128-v4.ll =================================================================== --- test/CodeGen/X86/vector-shuffle-128-v4.ll +++ test/CodeGen/X86/vector-shuffle-128-v4.ll @@ -356,8 +356,8 @@ ; ; AVX1-LABEL: shuffle_v4i32_0124: ; AVX1: # %bb.0: -; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,1,2,0] -; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5],xmm1[6,7] +; AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm1[0,1,2,0] +; AVX1-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0,1,2],xmm1[3] ; AVX1-NEXT: retq ; ; AVX2OR512VL-LABEL: shuffle_v4i32_0124: @@ -396,9 +396,9 @@ ; ; AVX1-LABEL: shuffle_v4i32_0142: ; AVX1: # %bb.0: -; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,1,0,1] -; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,2,2] -; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5],xmm0[6,7] +; AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm1[0,1,0,1] +; AVX1-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,1,2,2] +; AVX1-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2],xmm0[3] ; AVX1-NEXT: retq ; ; AVX2OR512VL-LABEL: shuffle_v4i32_0142: @@ -441,9 +441,9 @@ ; ; AVX1-LABEL: shuffle_v4i32_0412: ; AVX1: # %bb.0: -; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,0,1,1] -; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,1,2] -; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3],xmm0[4,5,6,7] +; AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm1[0,0,1,1] +; AVX1-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,1,1,2] +; AVX1-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2,3] ; AVX1-NEXT: retq ; ; AVX2OR512VL-LABEL: shuffle_v4i32_0412: @@ -483,17 +483,11 @@ ; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3,4,5,6,7] ; SSE41-NEXT: retq ; -; AVX1-LABEL: shuffle_v4i32_4012: -; AVX1: # %bb.0: -; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,1,2] -; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3,4,5,6,7] -; AVX1-NEXT: retq -; -; AVX2OR512VL-LABEL: shuffle_v4i32_4012: -; AVX2OR512VL: # %bb.0: -; AVX2OR512VL-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,0,1,2] -; AVX2OR512VL-NEXT: vblendps {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3] -; AVX2OR512VL-NEXT: retq +; AVX-LABEL: shuffle_v4i32_4012: +; AVX: # %bb.0: +; AVX-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,0,1,2] +; AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3] +; AVX-NEXT: retq %shuffle = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> ret <4 x i32> %shuffle } @@ -538,9 +532,9 @@ ; ; AVX1-LABEL: shuffle_v4i32_0451: ; AVX1: # %bb.0: -; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,0,1,1] -; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,0,1] -; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3,4,5],xmm0[6,7] +; AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm1[0,0,1,1] +; AVX1-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,1,0,1] +; AVX1-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2],xmm0[3] ; AVX1-NEXT: retq ; ; AVX2OR512VL-LABEL: shuffle_v4i32_0451: @@ -594,9 +588,9 @@ ; ; AVX1-LABEL: shuffle_v4i32_4015: ; AVX1: # %bb.0: -; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,1,0,1] -; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,1,1] -; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3,4,5],xmm1[6,7] +; AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm1[0,1,0,1] +; AVX1-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,0,1,1] +; AVX1-NEXT: vblendps {{.*#+}} xmm0 = xmm1[0],xmm0[1,2],xmm1[3] ; AVX1-NEXT: retq ; ; AVX2OR512VL-LABEL: shuffle_v4i32_4015: @@ -1191,14 +1185,14 @@ ; ; SSE41-LABEL: shuffle_v4i32_4zzz: ; SSE41: # %bb.0: -; SSE41-NEXT: pxor %xmm1, %xmm1 -; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3,4,5,6,7] +; SSE41-NEXT: xorps %xmm1, %xmm1 +; SSE41-NEXT: blendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3] ; SSE41-NEXT: retq ; ; AVX1OR2-LABEL: shuffle_v4i32_4zzz: ; AVX1OR2: # %bb.0: -; AVX1OR2-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; AVX1OR2-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3,4,5,6,7] +; AVX1OR2-NEXT: vxorps %xmm1, %xmm1, %xmm1 +; AVX1OR2-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3] ; AVX1OR2-NEXT: retq ; ; AVX512VL-LABEL: shuffle_v4i32_4zzz: @@ -1241,16 +1235,16 @@ ; ; AVX1-LABEL: shuffle_v4i32_z4zz: ; AVX1: # %bb.0: -; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3,4,5,6,7] -; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,0,1,1] +; AVX1-NEXT: vxorps %xmm1, %xmm1, %xmm1 +; AVX1-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3] +; AVX1-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[1,0,1,1] ; AVX1-NEXT: retq ; ; AVX2-SLOW-LABEL: shuffle_v4i32_z4zz: ; AVX2-SLOW: # %bb.0: -; AVX2-SLOW-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; AVX2-SLOW-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3,4,5,6,7] -; AVX2-SLOW-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,0,1,1] +; AVX2-SLOW-NEXT: vxorps %xmm1, %xmm1, %xmm1 +; AVX2-SLOW-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3] +; AVX2-SLOW-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[1,0,1,1] ; AVX2-SLOW-NEXT: retq ; ; AVX2-FAST-LABEL: shuffle_v4i32_z4zz: @@ -1297,16 +1291,16 @@ ; ; AVX1-LABEL: shuffle_v4i32_zz4z: ; AVX1: # %bb.0: -; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3,4,5,6,7] -; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,1,0,1] +; AVX1-NEXT: vxorps %xmm1, %xmm1, %xmm1 +; AVX1-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3] +; AVX1-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[1,1,0,1] ; AVX1-NEXT: retq ; ; AVX2-SLOW-LABEL: shuffle_v4i32_zz4z: ; AVX2-SLOW: # %bb.0: -; AVX2-SLOW-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; AVX2-SLOW-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3,4,5,6,7] -; AVX2-SLOW-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,1,0,1] +; AVX2-SLOW-NEXT: vxorps %xmm1, %xmm1, %xmm1 +; AVX2-SLOW-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3] +; AVX2-SLOW-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[1,1,0,1] ; AVX2-SLOW-NEXT: retq ; ; AVX2-FAST-LABEL: shuffle_v4i32_zz4z: @@ -1367,9 +1361,9 @@ ; ; AVX1-LABEL: shuffle_v4i32_z6zz: ; AVX1: # %bb.0: -; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,2,3,3] -; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3],xmm1[4,5,6,7] +; AVX1-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[2,2,3,3] +; AVX1-NEXT: vxorps %xmm1, %xmm1, %xmm1 +; AVX1-NEXT: vblendps {{.*#+}} xmm0 = xmm1[0],xmm0[1],xmm1[2,3] ; AVX1-NEXT: retq ; ; AVX2-SLOW-LABEL: shuffle_v4i32_z6zz: @@ -1694,21 +1688,15 @@ ; ; SSE41-LABEL: shuffle_v4i32_0z23: ; SSE41: # %bb.0: -; SSE41-NEXT: pxor %xmm1, %xmm1 -; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3],xmm0[4,5,6,7] +; SSE41-NEXT: xorps %xmm1, %xmm1 +; SSE41-NEXT: blendps {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2,3] ; SSE41-NEXT: retq ; -; AVX1-LABEL: shuffle_v4i32_0z23: -; AVX1: # %bb.0: -; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3],xmm0[4,5,6,7] -; AVX1-NEXT: retq -; -; AVX2OR512VL-LABEL: shuffle_v4i32_0z23: -; AVX2OR512VL: # %bb.0: -; AVX2OR512VL-NEXT: vxorps %xmm1, %xmm1, %xmm1 -; AVX2OR512VL-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2,3] -; AVX2OR512VL-NEXT: retq +; AVX-LABEL: shuffle_v4i32_0z23: +; AVX: # %bb.0: +; AVX-NEXT: vxorps %xmm1, %xmm1, %xmm1 +; AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2,3] +; AVX-NEXT: retq %shuffle = shufflevector <4 x i32> %a, <4 x i32> zeroinitializer, <4 x i32> ret <4 x i32> %shuffle } @@ -1731,21 +1719,15 @@ ; ; SSE41-LABEL: shuffle_v4i32_01z3: ; SSE41: # %bb.0: -; SSE41-NEXT: pxor %xmm1, %xmm1 -; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5],xmm0[6,7] +; SSE41-NEXT: xorps %xmm1, %xmm1 +; SSE41-NEXT: blendps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2],xmm0[3] ; SSE41-NEXT: retq ; -; AVX1-LABEL: shuffle_v4i32_01z3: -; AVX1: # %bb.0: -; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5],xmm0[6,7] -; AVX1-NEXT: retq -; -; AVX2OR512VL-LABEL: shuffle_v4i32_01z3: -; AVX2OR512VL: # %bb.0: -; AVX2OR512VL-NEXT: vxorps %xmm1, %xmm1, %xmm1 -; AVX2OR512VL-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2],xmm0[3] -; AVX2OR512VL-NEXT: retq +; AVX-LABEL: shuffle_v4i32_01z3: +; AVX: # %bb.0: +; AVX-NEXT: vxorps %xmm1, %xmm1, %xmm1 +; AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2],xmm0[3] +; AVX-NEXT: retq %shuffle = shufflevector <4 x i32> %a, <4 x i32> zeroinitializer, <4 x i32> ret <4 x i32> %shuffle } @@ -1768,21 +1750,15 @@ ; ; SSE41-LABEL: shuffle_v4i32_012z: ; SSE41: # %bb.0: -; SSE41-NEXT: pxor %xmm1, %xmm1 -; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5],xmm1[6,7] +; SSE41-NEXT: xorps %xmm1, %xmm1 +; SSE41-NEXT: blendps {{.*#+}} xmm0 = xmm0[0,1,2],xmm1[3] ; SSE41-NEXT: retq ; -; AVX1-LABEL: shuffle_v4i32_012z: -; AVX1: # %bb.0: -; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5],xmm1[6,7] -; AVX1-NEXT: retq -; -; AVX2OR512VL-LABEL: shuffle_v4i32_012z: -; AVX2OR512VL: # %bb.0: -; AVX2OR512VL-NEXT: vxorps %xmm1, %xmm1, %xmm1 -; AVX2OR512VL-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0,1,2],xmm1[3] -; AVX2OR512VL-NEXT: retq +; AVX-LABEL: shuffle_v4i32_012z: +; AVX: # %bb.0: +; AVX-NEXT: vxorps %xmm1, %xmm1, %xmm1 +; AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0,1,2],xmm1[3] +; AVX-NEXT: retq %shuffle = shufflevector <4 x i32> %a, <4 x i32> zeroinitializer, <4 x i32> ret <4 x i32> %shuffle } @@ -1805,21 +1781,15 @@ ; ; SSE41-LABEL: shuffle_v4i32_0zz3: ; SSE41: # %bb.0: -; SSE41-NEXT: pxor %xmm1, %xmm1 -; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3,4,5],xmm0[6,7] +; SSE41-NEXT: xorps %xmm1, %xmm1 +; SSE41-NEXT: blendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2],xmm0[3] ; SSE41-NEXT: retq ; -; AVX1-LABEL: shuffle_v4i32_0zz3: -; AVX1: # %bb.0: -; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3,4,5],xmm0[6,7] -; AVX1-NEXT: retq -; -; AVX2OR512VL-LABEL: shuffle_v4i32_0zz3: -; AVX2OR512VL: # %bb.0: -; AVX2OR512VL-NEXT: vxorps %xmm1, %xmm1, %xmm1 -; AVX2OR512VL-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2],xmm0[3] -; AVX2OR512VL-NEXT: retq +; AVX-LABEL: shuffle_v4i32_0zz3: +; AVX: # %bb.0: +; AVX-NEXT: vxorps %xmm1, %xmm1, %xmm1 +; AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2],xmm0[3] +; AVX-NEXT: retq %shuffle = shufflevector <4 x i32> %a, <4 x i32> zeroinitializer, <4 x i32> ret <4 x i32> %shuffle } @@ -1986,18 +1956,13 @@ ; ; SSE41-LABEL: mask_v4i32_0127: ; SSE41: # %bb.0: -; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm1[0,1,2,3,4,5],xmm0[6,7] +; SSE41-NEXT: blendps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[3] ; SSE41-NEXT: retq ; -; AVX1-LABEL: mask_v4i32_0127: -; AVX1: # %bb.0: -; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm1[0,1,2,3,4,5],xmm0[6,7] -; AVX1-NEXT: retq -; -; AVX2OR512VL-LABEL: mask_v4i32_0127: -; AVX2OR512VL: # %bb.0: -; AVX2OR512VL-NEXT: vblendps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[3] -; AVX2OR512VL-NEXT: retq +; AVX-LABEL: mask_v4i32_0127: +; AVX: # %bb.0: +; AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[3] +; AVX-NEXT: retq %1 = bitcast <4 x i32> %a to <2 x i64> %2 = bitcast <4 x i32> %b to <2 x i64> %3 = and <2 x i64> %1, @@ -2189,21 +2154,15 @@ ; ; SSE41-LABEL: insert_mem_lo_v4i32: ; SSE41: # %bb.0: -; SSE41-NEXT: movq {{.*#+}} xmm1 = mem[0],zero -; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm1[0,1,2,3],xmm0[4,5,6,7] +; SSE41-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero +; SSE41-NEXT: blendps {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3] ; SSE41-NEXT: retq ; -; AVX1-LABEL: insert_mem_lo_v4i32: -; AVX1: # %bb.0: -; AVX1-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero -; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm1[0,1,2,3],xmm0[4,5,6,7] -; AVX1-NEXT: retq -; -; AVX2OR512VL-LABEL: insert_mem_lo_v4i32: -; AVX2OR512VL: # %bb.0: -; AVX2OR512VL-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero -; AVX2OR512VL-NEXT: vblendps {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3] -; AVX2OR512VL-NEXT: retq +; AVX-LABEL: insert_mem_lo_v4i32: +; AVX: # %bb.0: +; AVX-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero +; AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3] +; AVX-NEXT: retq %a = load <2 x i32>, <2 x i32>* %ptr %v = shufflevector <2 x i32> %a, <2 x i32> undef, <4 x i32> %shuffle = shufflevector <4 x i32> %v, <4 x i32> %b, <4 x i32> @@ -2267,12 +2226,12 @@ ; ; SSE41-LABEL: insert_reg_lo_v4f32: ; SSE41: # %bb.0: -; SSE41-NEXT: blendpd {{.*#+}} xmm0 = xmm0[0],xmm1[1] +; SSE41-NEXT: blendps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3] ; SSE41-NEXT: retq ; ; AVX1OR2-LABEL: insert_reg_lo_v4f32: ; AVX1OR2: # %bb.0: -; AVX1OR2-NEXT: vblendpd {{.*#+}} xmm0 = xmm0[0],xmm1[1] +; AVX1OR2-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3] ; AVX1OR2-NEXT: retq ; ; AVX512VL-LABEL: insert_reg_lo_v4f32: Index: test/CodeGen/X86/vector-shuffle-128-v8.ll =================================================================== --- test/CodeGen/X86/vector-shuffle-128-v8.ll +++ test/CodeGen/X86/vector-shuffle-128-v8.ll @@ -2493,18 +2493,13 @@ ; ; SSE41-LABEL: mask_v8i16_012345ef: ; SSE41: # %bb.0: -; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm1[0,1,2,3,4,5],xmm0[6,7] +; SSE41-NEXT: blendps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[3] ; SSE41-NEXT: retq ; -; AVX1-LABEL: mask_v8i16_012345ef: -; AVX1: # %bb.0: -; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm1[0,1,2,3,4,5],xmm0[6,7] -; AVX1-NEXT: retq -; -; AVX2OR512VL-LABEL: mask_v8i16_012345ef: -; AVX2OR512VL: # %bb.0: -; AVX2OR512VL-NEXT: vblendps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[3] -; AVX2OR512VL-NEXT: retq +; AVX-LABEL: mask_v8i16_012345ef: +; AVX: # %bb.0: +; AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[3] +; AVX-NEXT: retq %1 = bitcast <8 x i16> %a to <2 x i64> %2 = bitcast <8 x i16> %b to <2 x i64> %3 = and <2 x i64> %1, Index: test/CodeGen/X86/vector-shuffle-256-v16.ll =================================================================== --- test/CodeGen/X86/vector-shuffle-256-v16.ll +++ test/CodeGen/X86/vector-shuffle-256-v16.ll @@ -887,15 +887,10 @@ } define <16 x i16> @shuffle_v16i16_16_17_18_19_04_05_06_07_24_25_26_27_12_13_14_15(<16 x i16> %a, <16 x i16> %b) { -; AVX1-LABEL: shuffle_v16i16_16_17_18_19_04_05_06_07_24_25_26_27_12_13_14_15: -; AVX1: # %bb.0: -; AVX1-NEXT: vblendpd {{.*#+}} ymm0 = ymm1[0],ymm0[1],ymm1[2],ymm0[3] -; AVX1-NEXT: retq -; -; AVX2OR512VL-LABEL: shuffle_v16i16_16_17_18_19_04_05_06_07_24_25_26_27_12_13_14_15: -; AVX2OR512VL: # %bb.0: -; AVX2OR512VL-NEXT: vblendps {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,3],ymm1[4,5],ymm0[6,7] -; AVX2OR512VL-NEXT: retq +; ALL-LABEL: shuffle_v16i16_16_17_18_19_04_05_06_07_24_25_26_27_12_13_14_15: +; ALL: # %bb.0: +; ALL-NEXT: vblendps {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,3],ymm1[4,5],ymm0[6,7] +; ALL-NEXT: retq %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> ret <16 x i16> %shuffle } @@ -3113,7 +3108,7 @@ ; AVX1-NEXT: vpshuflw {{.*#+}} xmm1 = xmm0[1,1,2,3,4,5,6,7] ; AVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2 ; AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0],xmm2[1],xmm1[2],xmm2[3],xmm1[4,5,6,7] -; AVX1-NEXT: vblendpd {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,3] +; AVX1-NEXT: vblendps {{.*#+}} ymm0 = ymm1[0,1,2,3],ymm0[4,5,6,7] ; AVX1-NEXT: retq ; ; AVX2OR512VL-LABEL: shuffle_v16i16_01_zz_02_zz_04_uu_06_07_08_09_10_11_12_13_14_15: @@ -4469,15 +4464,10 @@ } define <16 x i16> @concat_v16i16_0_1_2_3_4_5_6_7_24_25_26_27_28_29_30_31(<16 x i16> %a, <16 x i16> %b) { -; AVX1-LABEL: concat_v16i16_0_1_2_3_4_5_6_7_24_25_26_27_28_29_30_31: -; AVX1: # %bb.0: -; AVX1-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3] -; AVX1-NEXT: retq -; -; AVX2OR512VL-LABEL: concat_v16i16_0_1_2_3_4_5_6_7_24_25_26_27_28_29_30_31: -; AVX2OR512VL: # %bb.0: -; AVX2OR512VL-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm1[4,5,6,7] -; AVX2OR512VL-NEXT: retq +; ALL-LABEL: concat_v16i16_0_1_2_3_4_5_6_7_24_25_26_27_28_29_30_31: +; ALL: # %bb.0: +; ALL-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm1[4,5,6,7] +; ALL-NEXT: retq %alo = shufflevector <16 x i16> %a, <16 x i16> undef, <8 x i32> %bhi = shufflevector <16 x i16> %b, <16 x i16> undef, <8 x i32> %shuf = shufflevector <8 x i16> %alo, <8 x i16> %bhi, <16 x i32> Index: test/CodeGen/X86/vector-shuffle-256-v32.ll =================================================================== --- test/CodeGen/X86/vector-shuffle-256-v32.ll +++ test/CodeGen/X86/vector-shuffle-256-v32.ll @@ -1284,7 +1284,7 @@ ; AVX1-LABEL: shuffle_v32i8_01_zz_02_zz_04_uu_06_07_08_09_10_11_12_13_14_15_u6_17_18_19_20_21_22_23_24_25_26_27_28_29_30_31: ; AVX1: # %bb.0: ; AVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm0[1],zero,xmm0[2],zero,xmm0[4,u,6,7,8,9,10,11,12,13,14,15] -; AVX1-NEXT: vblendpd {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,3] +; AVX1-NEXT: vblendps {{.*#+}} ymm0 = ymm1[0,1,2,3],ymm0[4,5,6,7] ; AVX1-NEXT: retq ; ; AVX2OR512VL-LABEL: shuffle_v32i8_01_zz_02_zz_04_uu_06_07_08_09_10_11_12_13_14_15_u6_17_18_19_20_21_22_23_24_25_26_27_28_29_30_31: Index: test/CodeGen/X86/vector-shuffle-256-v4.ll =================================================================== --- test/CodeGen/X86/vector-shuffle-256-v4.ll +++ test/CodeGen/X86/vector-shuffle-256-v4.ll @@ -367,7 +367,7 @@ define <4 x double> @shuffle_v4f64_0527(<4 x double> %a, <4 x double> %b) { ; ALL-LABEL: shuffle_v4f64_0527: ; ALL: # %bb.0: -; ALL-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3] +; ALL-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3],ymm0[4,5],ymm1[6,7] ; ALL-NEXT: retq %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> ret <4 x double> %shuffle @@ -376,7 +376,7 @@ define <4 x double> @shuffle_v4f64_4163(<4 x double> %a, <4 x double> %b) { ; ALL-LABEL: shuffle_v4f64_4163: ; ALL: # %bb.0: -; ALL-NEXT: vblendpd {{.*#+}} ymm0 = ymm1[0],ymm0[1],ymm1[2],ymm0[3] +; ALL-NEXT: vblendps {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,3],ymm1[4,5],ymm0[6,7] ; ALL-NEXT: retq %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> ret <4 x double> %shuffle @@ -403,7 +403,7 @@ define <4 x double> @shuffle_v4f64_0167(<4 x double> %a, <4 x double> %b) { ; ALL-LABEL: shuffle_v4f64_0167: ; ALL: # %bb.0: -; ALL-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3] +; ALL-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm1[4,5,6,7] ; ALL-NEXT: retq %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> ret <4 x double> %shuffle @@ -461,7 +461,7 @@ ; AVX2: # %bb.0: ; AVX2-NEXT: vpermpd {{.*#+}} ymm1 = ymm1[0,0,2,1] ; AVX2-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[0,1,1,3] -; AVX2-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3] +; AVX2-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3],ymm0[4,5],ymm1[6,7] ; AVX2-NEXT: retq ; ; AVX512VL-LABEL: shuffle_v4f64_0415: @@ -588,8 +588,8 @@ ; ; AVX2-SLOW-LABEL: shuffle_v4f64_1z2z: ; AVX2-SLOW: # %bb.0: -; AVX2-SLOW-NEXT: vxorpd %xmm1, %xmm1, %xmm1 -; AVX2-SLOW-NEXT: vblendpd {{.*#+}} ymm0 = ymm1[0],ymm0[1,2,3] +; AVX2-SLOW-NEXT: vxorps %xmm1, %xmm1, %xmm1 +; AVX2-SLOW-NEXT: vblendps {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,3,4,5,6,7] ; AVX2-SLOW-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[1,0,2,0] ; AVX2-SLOW-NEXT: retq ; @@ -601,8 +601,8 @@ ; AVX512VL-SLOW-LABEL: shuffle_v4f64_1z2z: ; AVX512VL-SLOW: # %bb.0: ; AVX512VL-SLOW-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; AVX512VL-SLOW-NEXT: vblendpd {{.*#+}} ymm0 = ymm1[0],ymm0[1,2,3] -; AVX512VL-SLOW-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[1,0,2,0] +; AVX512VL-SLOW-NEXT: vpblendd {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,3,4,5,6,7] +; AVX512VL-SLOW-NEXT: vpermq {{.*#+}} ymm0 = ymm0[1,0,2,0] ; AVX512VL-SLOW-NEXT: retq ; ; AVX512VL-FAST-LABEL: shuffle_v4f64_1z2z: @@ -825,7 +825,7 @@ ; AVX1: # %bb.0: ; AVX1-NEXT: vmovddup {{.*#+}} xmm1 = xmm1[0,0] ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm1 -; AVX1-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0,1,2],ymm1[3] +; AVX1-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0,1,2,3,4,5],ymm1[6,7] ; AVX1-NEXT: retq ; ; AVX2-LABEL: shuffle_v4i64_0124: @@ -915,7 +915,7 @@ ; AVX1-NEXT: vpalignr {{.*#+}} xmm2 = xmm0[8,9,10,11,12,13,14,15],xmm2[0,1,2,3,4,5,6,7] ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,0,1] ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 -; AVX1-NEXT: vblendpd {{.*#+}} ymm0 = ymm1[0],ymm0[1,2,3] +; AVX1-NEXT: vblendps {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,3,4,5,6,7] ; AVX1-NEXT: retq ; ; AVX2-LABEL: shuffle_v4i64_4012: @@ -1339,15 +1339,15 @@ ; AVX1-LABEL: insert_reg_and_zero_v4f64: ; AVX1: # %bb.0: ; AVX1-NEXT: # kill: def %xmm0 killed %xmm0 def %ymm0 -; AVX1-NEXT: vxorpd %xmm1, %xmm1, %xmm1 -; AVX1-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3] +; AVX1-NEXT: vxorps %xmm1, %xmm1, %xmm1 +; AVX1-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3,4,5,6,7] ; AVX1-NEXT: retq ; ; AVX2-LABEL: insert_reg_and_zero_v4f64: ; AVX2: # %bb.0: ; AVX2-NEXT: # kill: def %xmm0 killed %xmm0 def %ymm0 -; AVX2-NEXT: vxorpd %xmm1, %xmm1, %xmm1 -; AVX2-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3] +; AVX2-NEXT: vxorps %xmm1, %xmm1, %xmm1 +; AVX2-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3,4,5,6,7] ; AVX2-NEXT: retq ; ; AVX512VL-LABEL: insert_reg_and_zero_v4f64: @@ -1512,20 +1512,10 @@ } define <4 x i64> @concat_v4i64_0167(<4 x i64> %a0, <4 x i64> %a1) { -; AVX1-LABEL: concat_v4i64_0167: -; AVX1: # %bb.0: -; AVX1-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3] -; AVX1-NEXT: retq -; -; AVX2-LABEL: concat_v4i64_0167: -; AVX2: # %bb.0: -; AVX2-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm1[4,5,6,7] -; AVX2-NEXT: retq -; -; AVX512VL-LABEL: concat_v4i64_0167: -; AVX512VL: # %bb.0: -; AVX512VL-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm1[4,5,6,7] -; AVX512VL-NEXT: retq +; ALL-LABEL: concat_v4i64_0167: +; ALL: # %bb.0: +; ALL-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm1[4,5,6,7] +; ALL-NEXT: retq %a0lo = shufflevector <4 x i64> %a0, <4 x i64> %a1, <2 x i32> %a1hi = shufflevector <4 x i64> %a0, <4 x i64> %a1, <2 x i32> %shuffle64 = shufflevector <2 x i64> %a0lo, <2 x i64> %a1hi, <4 x i32> Index: test/CodeGen/X86/vector-shuffle-256-v8.ll =================================================================== --- test/CodeGen/X86/vector-shuffle-256-v8.ll +++ test/CodeGen/X86/vector-shuffle-256-v8.ll @@ -110,7 +110,7 @@ ; AVX1-LABEL: shuffle_v8f32_06000000: ; AVX1: # %bb.0: ; AVX1-NEXT: vperm2f128 {{.*#+}} ymm1 = ymm0[2,3,0,1] -; AVX1-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3] +; AVX1-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3,4,5,6,7] ; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,2,0,0,4,4,4,4] ; AVX1-NEXT: retq ; @@ -127,7 +127,7 @@ ; AVX1-LABEL: shuffle_v8f32_70000000: ; AVX1: # %bb.0: ; AVX1-NEXT: vperm2f128 {{.*#+}} ymm1 = ymm0[2,3,0,1] -; AVX1-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3] +; AVX1-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3,4,5,6,7] ; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,0,0,0,4,4,4,4] ; AVX1-NEXT: retq ; @@ -663,7 +663,7 @@ ; AVX1-NEXT: vshufps {{.*#+}} ymm0 = ymm0[0,3],ymm2[0,0],ymm0[4,7],ymm2[4,4] ; AVX1-NEXT: vperm2f128 {{.*#+}} ymm2 = ymm1[2,3,0,1] ; AVX1-NEXT: vpermilps {{.*#+}} ymm1 = ymm1[0,1,2,0,4,5,6,4] -; AVX1-NEXT: vblendpd {{.*#+}} ymm1 = ymm2[0],ymm1[1,2],ymm2[3] +; AVX1-NEXT: vblendps {{.*#+}} ymm1 = ymm2[0,1],ymm1[2,3,4,5],ymm2[6,7] ; AVX1-NEXT: vblendps {{.*#+}} ymm0 = ymm1[0],ymm0[1,2],ymm1[3,4,5,6],ymm0[7] ; AVX1-NEXT: retq ; @@ -830,7 +830,7 @@ define <8 x float> @shuffle_v8f32_3210fedc(<8 x float> %a, <8 x float> %b) { ; ALL-LABEL: shuffle_v8f32_3210fedc: ; ALL: # %bb.0: -; ALL-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3] +; ALL-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm1[4,5,6,7] ; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4] ; ALL-NEXT: retq %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> @@ -865,7 +865,7 @@ ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm1 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 ; AVX1-NEXT: vshufps {{.*#+}} xmm0 = xmm0[1,3],xmm2[1,3] -; AVX1-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3] +; AVX1-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm1[4,5,6,7] ; AVX1-NEXT: retq ; ; AVX2-LABEL: PR21138: @@ -892,7 +892,7 @@ define <8 x float> @shuffle_v8f32_ba987654(<8 x float> %a, <8 x float> %b) { ; ALL-LABEL: shuffle_v8f32_ba987654: ; ALL: # %bb.0: -; ALL-NEXT: vblendpd {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,3] +; ALL-NEXT: vblendps {{.*#+}} ymm0 = ymm1[0,1,2,3],ymm0[4,5,6,7] ; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4] ; ALL-NEXT: retq %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> @@ -1106,7 +1106,7 @@ ; AVX1-LABEL: shuffle_v8i32_06000000: ; AVX1: # %bb.0: ; AVX1-NEXT: vperm2f128 {{.*#+}} ymm1 = ymm0[2,3,0,1] -; AVX1-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3] +; AVX1-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3,4,5,6,7] ; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,2,0,0,4,4,4,4] ; AVX1-NEXT: retq ; @@ -1123,7 +1123,7 @@ ; AVX1-LABEL: shuffle_v8i32_70000000: ; AVX1: # %bb.0: ; AVX1-NEXT: vperm2f128 {{.*#+}} ymm1 = ymm0[2,3,0,1] -; AVX1-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3] +; AVX1-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3,4,5,6,7] ; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,0,0,0,4,4,4,4] ; AVX1-NEXT: retq ; @@ -1913,17 +1913,11 @@ } define <8 x i32> @shuffle_v8i32_3210fedc(<8 x i32> %a, <8 x i32> %b) { -; AVX1-LABEL: shuffle_v8i32_3210fedc: -; AVX1: # %bb.0: -; AVX1-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3] -; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4] -; AVX1-NEXT: retq -; -; AVX2OR512VL-LABEL: shuffle_v8i32_3210fedc: -; AVX2OR512VL: # %bb.0: -; AVX2OR512VL-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm1[4,5,6,7] -; AVX2OR512VL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4] -; AVX2OR512VL-NEXT: retq +; ALL-LABEL: shuffle_v8i32_3210fedc: +; ALL: # %bb.0: +; ALL-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm1[4,5,6,7] +; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4] +; ALL-NEXT: retq %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> ret <8 x i32> %shuffle } @@ -1961,33 +1955,21 @@ } define <8 x i32> @shuffle_v8i32_ba987654(<8 x i32> %a, <8 x i32> %b) { -; AVX1-LABEL: shuffle_v8i32_ba987654: -; AVX1: # %bb.0: -; AVX1-NEXT: vblendpd {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,3] -; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4] -; AVX1-NEXT: retq -; -; AVX2OR512VL-LABEL: shuffle_v8i32_ba987654: -; AVX2OR512VL: # %bb.0: -; AVX2OR512VL-NEXT: vblendps {{.*#+}} ymm0 = ymm1[0,1,2,3],ymm0[4,5,6,7] -; AVX2OR512VL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4] -; AVX2OR512VL-NEXT: retq +; ALL-LABEL: shuffle_v8i32_ba987654: +; ALL: # %bb.0: +; ALL-NEXT: vblendps {{.*#+}} ymm0 = ymm1[0,1,2,3],ymm0[4,5,6,7] +; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4] +; ALL-NEXT: retq %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> ret <8 x i32> %shuffle } define <8 x i32> @shuffle_v8i32_ba983210(<8 x i32> %a, <8 x i32> %b) { -; AVX1-LABEL: shuffle_v8i32_ba983210: -; AVX1: # %bb.0: -; AVX1-NEXT: vblendpd {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,3] -; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4] -; AVX1-NEXT: retq -; -; AVX2OR512VL-LABEL: shuffle_v8i32_ba983210: -; AVX2OR512VL: # %bb.0: -; AVX2OR512VL-NEXT: vblendps {{.*#+}} ymm0 = ymm1[0,1,2,3],ymm0[4,5,6,7] -; AVX2OR512VL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4] -; AVX2OR512VL-NEXT: retq +; ALL-LABEL: shuffle_v8i32_ba983210: +; ALL: # %bb.0: +; ALL-NEXT: vblendps {{.*#+}} ymm0 = ymm1[0,1,2,3],ymm0[4,5,6,7] +; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4] +; ALL-NEXT: retq %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> ret <8 x i32> %shuffle } @@ -2313,15 +2295,10 @@ } define <8 x i32> @concat_v8i32_0123CDEF(<8 x i32> %a, <8 x i32> %b) { -; AVX1-LABEL: concat_v8i32_0123CDEF: -; AVX1: # %bb.0: -; AVX1-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3] -; AVX1-NEXT: retq -; -; AVX2OR512VL-LABEL: concat_v8i32_0123CDEF: -; AVX2OR512VL: # %bb.0: -; AVX2OR512VL-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm1[4,5,6,7] -; AVX2OR512VL-NEXT: retq +; ALL-LABEL: concat_v8i32_0123CDEF: +; ALL: # %bb.0: +; ALL-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm1[4,5,6,7] +; ALL-NEXT: retq %alo = shufflevector <8 x i32> %a, <8 x i32> undef, <4 x i32> %bhi = shufflevector <8 x i32> %b, <8 x i32> undef, <4 x i32> %shuf = shufflevector <4 x i32> %alo, <4 x i32> %bhi, <8 x i32> Index: test/CodeGen/X86/vector-shuffle-512-v16.ll =================================================================== --- test/CodeGen/X86/vector-shuffle-512-v16.ll +++ test/CodeGen/X86/vector-shuffle-512-v16.ll @@ -274,7 +274,7 @@ define <16 x i32> @shuffle_v16i32_0_1_2_19_u_u_u_u_u_u_u_u_u_u_u_u(<16 x i32> %a, <16 x i32> %b) { ; ALL-LABEL: shuffle_v16i32_0_1_2_19_u_u_u_u_u_u_u_u_u_u_u_u: ; ALL: # %bb.0: -; ALL-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0,1,2],xmm1[3] +; ALL-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0,1,2],xmm1[3] ; ALL-NEXT: retq %c = shufflevector <16 x i32> %a, <16 x i32> %b, <16 x i32> ret <16 x i32> %c @@ -296,10 +296,10 @@ define <4 x i32> @test_v16i32_0_1_2_12 (<16 x i32> %v) { ; ALL-LABEL: test_v16i32_0_1_2_12: ; ALL: # %bb.0: -; ALL-NEXT: vextractf64x4 $1, %zmm0, %ymm1 -; ALL-NEXT: vextractf128 $1, %ymm1, %xmm1 -; ALL-NEXT: vbroadcastss %xmm1, %xmm1 -; ALL-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0,1,2],xmm1[3] +; ALL-NEXT: vextracti64x4 $1, %zmm0, %ymm1 +; ALL-NEXT: vextracti128 $1, %ymm1, %xmm1 +; ALL-NEXT: vpbroadcastd %xmm1, %xmm1 +; ALL-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0,1,2],xmm1[3] ; ALL-NEXT: vzeroupper ; ALL-NEXT: retq %res = shufflevector <16 x i32> %v, <16 x i32> undef, <4 x i32> Index: test/CodeGen/X86/vector-shuffle-512-v8.ll =================================================================== --- test/CodeGen/X86/vector-shuffle-512-v8.ll +++ test/CodeGen/X86/vector-shuffle-512-v8.ll @@ -2684,7 +2684,7 @@ ; AVX512F-NEXT: vextractf128 $1, %ymm0, %xmm1 ; AVX512F-NEXT: vextractf64x4 $1, %zmm0, %ymm0 ; AVX512F-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[0,1,0,2] -; AVX512F-NEXT: vblendpd {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,3] +; AVX512F-NEXT: vblendps {{.*#+}} ymm0 = ymm1[0,1,2,3],ymm0[4,5,6,7] ; AVX512F-NEXT: retq ; ; AVX512F-32-LABEL: test_v8f64_2346: @@ -2692,7 +2692,7 @@ ; AVX512F-32-NEXT: vextractf128 $1, %ymm0, %xmm1 ; AVX512F-32-NEXT: vextractf64x4 $1, %zmm0, %ymm0 ; AVX512F-32-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[0,1,0,2] -; AVX512F-32-NEXT: vblendpd {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,3] +; AVX512F-32-NEXT: vblendps {{.*#+}} ymm0 = ymm1[0,1,2,3],ymm0[4,5,6,7] ; AVX512F-32-NEXT: retl %res = shufflevector <8 x double> %v, <8 x double> undef, <4 x i32> ret <4 x double> %res @@ -2744,7 +2744,7 @@ ; AVX512F-LABEL: test_v8i64_2_5: ; AVX512F: # %bb.0: ; AVX512F-NEXT: vextractf64x4 $1, %zmm0, %ymm1 -; AVX512F-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3],ymm0[4,5,6,7] +; AVX512F-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3],ymm0[4,5,6,7] ; AVX512F-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[2,1,2,3] ; AVX512F-NEXT: # kill: def %xmm0 killed %xmm0 killed %ymm0 ; AVX512F-NEXT: vzeroupper @@ -2753,7 +2753,7 @@ ; AVX512F-32-LABEL: test_v8i64_2_5: ; AVX512F-32: # %bb.0: ; AVX512F-32-NEXT: vextractf64x4 $1, %zmm0, %ymm1 -; AVX512F-32-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3],ymm0[4,5,6,7] +; AVX512F-32-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3],ymm0[4,5,6,7] ; AVX512F-32-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[2,1,2,3] ; AVX512F-32-NEXT: # kill: def %xmm0 killed %xmm0 killed %ymm0 ; AVX512F-32-NEXT: vzeroupper Index: test/CodeGen/X86/vector-shuffle-avx512.ll =================================================================== --- test/CodeGen/X86/vector-shuffle-avx512.ll +++ test/CodeGen/X86/vector-shuffle-avx512.ll @@ -91,8 +91,8 @@ ; KNL64: # %bb.0: ; KNL64-NEXT: # kill: def %xmm0 killed %xmm0 def %ymm0 ; KNL64-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[0,1,2,1] -; KNL64-NEXT: vxorpd %xmm1, %xmm1, %xmm1 -; KNL64-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2],ymm0[3] +; KNL64-NEXT: vxorps %xmm1, %xmm1, %xmm1 +; KNL64-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3,4,5],ymm0[6,7] ; KNL64-NEXT: retq ; ; SKX32-LABEL: expand2: @@ -107,8 +107,8 @@ ; KNL32: # %bb.0: ; KNL32-NEXT: # kill: def %xmm0 killed %xmm0 def %ymm0 ; KNL32-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[0,1,2,1] -; KNL32-NEXT: vxorpd %xmm1, %xmm1, %xmm1 -; KNL32-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2],ymm0[3] +; KNL32-NEXT: vxorps %xmm1, %xmm1, %xmm1 +; KNL32-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3,4,5],ymm0[6,7] ; KNL32-NEXT: retl %res = shufflevector <2 x double> %a, <2 x double> zeroinitializer, <4 x i32> ret <4 x double> %res Index: test/CodeGen/X86/vector-shuffle-combining-avx.ll =================================================================== --- test/CodeGen/X86/vector-shuffle-combining-avx.ll +++ test/CodeGen/X86/vector-shuffle-combining-avx.ll @@ -203,14 +203,14 @@ define <4 x double> @combine_vperm2f128_vpermilvar_as_vpblendpd(<4 x double> %a0) { ; X32-LABEL: combine_vperm2f128_vpermilvar_as_vpblendpd: ; X32: # %bb.0: -; X32-NEXT: vxorpd %xmm1, %xmm1, %xmm1 -; X32-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3] +; X32-NEXT: vxorps %xmm1, %xmm1, %xmm1 +; X32-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm1[4,5,6,7] ; X32-NEXT: retl ; ; X64-LABEL: combine_vperm2f128_vpermilvar_as_vpblendpd: ; X64: # %bb.0: -; X64-NEXT: vxorpd %xmm1, %xmm1, %xmm1 -; X64-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3] +; X64-NEXT: vxorps %xmm1, %xmm1, %xmm1 +; X64-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm1[4,5,6,7] ; X64-NEXT: retq %1 = tail call <4 x double> @llvm.x86.avx.vpermilvar.pd.256(<4 x double> %a0, <4 x i64> ) %2 = shufflevector <4 x double> %1, <4 x double> zeroinitializer, <4 x i32> Index: test/CodeGen/X86/vector-shuffle-combining-avx2.ll =================================================================== --- test/CodeGen/X86/vector-shuffle-combining-avx2.ll +++ test/CodeGen/X86/vector-shuffle-combining-avx2.ll @@ -522,8 +522,8 @@ define <4 x double> @combine_pshufb_as_vzmovl_64(<4 x double> %a0) { ; X32-AVX2-LABEL: combine_pshufb_as_vzmovl_64: ; X32-AVX2: # %bb.0: -; X32-AVX2-NEXT: vxorpd %xmm1, %xmm1, %xmm1 -; X32-AVX2-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3] +; X32-AVX2-NEXT: vxorps %xmm1, %xmm1, %xmm1 +; X32-AVX2-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3,4,5,6,7] ; X32-AVX2-NEXT: retl ; ; X32-AVX512-LABEL: combine_pshufb_as_vzmovl_64: @@ -534,8 +534,8 @@ ; ; X64-AVX2-LABEL: combine_pshufb_as_vzmovl_64: ; X64-AVX2: # %bb.0: -; X64-AVX2-NEXT: vxorpd %xmm1, %xmm1, %xmm1 -; X64-AVX2-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3] +; X64-AVX2-NEXT: vxorps %xmm1, %xmm1, %xmm1 +; X64-AVX2-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3,4,5,6,7] ; X64-AVX2-NEXT: retq ; ; X64-AVX512-LABEL: combine_pshufb_as_vzmovl_64: @@ -978,20 +978,20 @@ ; X32-AVX2-NEXT: vpxor %xmm2, %xmm2, %xmm2 ; X32-AVX2-NEXT: vpmovzxdq {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero ; X32-AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm2[0,1,2,3],ymm0[4,5,6,7] -; X32-AVX2-NEXT: vmovapd {{.*#+}} ymm2 = -; X32-AVX2-NEXT: vpermps %ymm1, %ymm2, %ymm1 -; X32-AVX2-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3] +; X32-AVX2-NEXT: vmovdqa {{.*#+}} ymm2 = +; X32-AVX2-NEXT: vpermd %ymm1, %ymm2, %ymm1 +; X32-AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3],ymm0[4,5],ymm1[6,7] ; X32-AVX2-NEXT: retl ; ; X32-AVX512-LABEL: PR34577: ; X32-AVX512: # %bb.0: # %entry -; X32-AVX512-NEXT: vmovapd {{.*#+}} ymm2 = <1,u,u,u,2,u,5,0> +; X32-AVX512-NEXT: vmovaps {{.*#+}} ymm2 = <1,u,u,u,2,u,5,0> ; X32-AVX512-NEXT: vpermps %ymm0, %ymm2, %ymm0 -; X32-AVX512-NEXT: vxorpd %xmm2, %xmm2, %xmm2 -; X32-AVX512-NEXT: vblendpd {{.*#+}} ymm0 = ymm2[0,1],ymm0[2,3] -; X32-AVX512-NEXT: vmovapd {{.*#+}} ymm2 = +; X32-AVX512-NEXT: vxorps %xmm2, %xmm2, %xmm2 +; X32-AVX512-NEXT: vblendps {{.*#+}} ymm0 = ymm2[0,1,2,3],ymm0[4,5,6,7] +; X32-AVX512-NEXT: vmovaps {{.*#+}} ymm2 = ; X32-AVX512-NEXT: vpermps %ymm1, %ymm2, %ymm1 -; X32-AVX512-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3] +; X32-AVX512-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3],ymm0[4,5],ymm1[6,7] ; X32-AVX512-NEXT: retl ; ; X64-AVX2-LABEL: PR34577: @@ -999,20 +999,20 @@ ; X64-AVX2-NEXT: vpxor %xmm2, %xmm2, %xmm2 ; X64-AVX2-NEXT: vpmovzxdq {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero ; X64-AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm2[0,1,2,3],ymm0[4,5,6,7] -; X64-AVX2-NEXT: vmovapd {{.*#+}} ymm2 = -; X64-AVX2-NEXT: vpermps %ymm1, %ymm2, %ymm1 -; X64-AVX2-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3] +; X64-AVX2-NEXT: vmovdqa {{.*#+}} ymm2 = +; X64-AVX2-NEXT: vpermd %ymm1, %ymm2, %ymm1 +; X64-AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3],ymm0[4,5],ymm1[6,7] ; X64-AVX2-NEXT: retq ; ; X64-AVX512-LABEL: PR34577: ; X64-AVX512: # %bb.0: # %entry -; X64-AVX512-NEXT: vmovapd {{.*#+}} ymm2 = <1,u,u,u,2,u,5,0> +; X64-AVX512-NEXT: vmovaps {{.*#+}} ymm2 = <1,u,u,u,2,u,5,0> ; X64-AVX512-NEXT: vpermps %ymm0, %ymm2, %ymm0 -; X64-AVX512-NEXT: vxorpd %xmm2, %xmm2, %xmm2 -; X64-AVX512-NEXT: vblendpd {{.*#+}} ymm0 = ymm2[0,1],ymm0[2,3] -; X64-AVX512-NEXT: vmovapd {{.*#+}} ymm2 = +; X64-AVX512-NEXT: vxorps %xmm2, %xmm2, %xmm2 +; X64-AVX512-NEXT: vblendps {{.*#+}} ymm0 = ymm2[0,1,2,3],ymm0[4,5,6,7] +; X64-AVX512-NEXT: vmovaps {{.*#+}} ymm2 = ; X64-AVX512-NEXT: vpermps %ymm1, %ymm2, %ymm1 -; X64-AVX512-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3] +; X64-AVX512-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3],ymm0[4,5],ymm1[6,7] ; X64-AVX512-NEXT: retq entry: %shuf0 = shufflevector <8 x float> %inp0, <8 x float> %inp2, <8 x i32> Index: test/CodeGen/X86/vector-shuffle-combining-ssse3.ll =================================================================== --- test/CodeGen/X86/vector-shuffle-combining-ssse3.ll +++ test/CodeGen/X86/vector-shuffle-combining-ssse3.ll @@ -49,17 +49,17 @@ ; ; SSE41-LABEL: combine_pshufb_as_movsd: ; SSE41: # %bb.0: -; SSE41-NEXT: blendpd {{.*#+}} xmm0 = xmm0[0],xmm1[1] +; SSE41-NEXT: blendps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3] ; SSE41-NEXT: retq ; ; AVX1-LABEL: combine_pshufb_as_movsd: ; AVX1: # %bb.0: -; AVX1-NEXT: vblendpd {{.*#+}} xmm0 = xmm0[0],xmm1[1] +; AVX1-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3] ; AVX1-NEXT: retq ; ; AVX2-LABEL: combine_pshufb_as_movsd: ; AVX2: # %bb.0: -; AVX2-NEXT: vblendpd {{.*#+}} xmm0 = xmm0[0],xmm1[1] +; AVX2-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3] ; AVX2-NEXT: retq ; ; AVX512F-LABEL: combine_pshufb_as_movsd: Index: test/CodeGen/X86/vector-shuffle-combining.ll =================================================================== --- test/CodeGen/X86/vector-shuffle-combining.ll +++ test/CodeGen/X86/vector-shuffle-combining.ll @@ -298,21 +298,15 @@ ; ; SSE41-LABEL: combine_bitwise_ops_test1b: ; SSE41: # %bb.0: -; SSE41-NEXT: pand %xmm1, %xmm0 -; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1],xmm2[2,3],xmm0[4,5],xmm2[6,7] +; SSE41-NEXT: andps %xmm1, %xmm0 +; SSE41-NEXT: blendps {{.*#+}} xmm0 = xmm0[0],xmm2[1],xmm0[2],xmm2[3] ; SSE41-NEXT: retq ; -; AVX1-LABEL: combine_bitwise_ops_test1b: -; AVX1: # %bb.0: -; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0 -; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm2[2,3],xmm0[4,5],xmm2[6,7] -; AVX1-NEXT: retq -; -; AVX2-LABEL: combine_bitwise_ops_test1b: -; AVX2: # %bb.0: -; AVX2-NEXT: vandps %xmm1, %xmm0, %xmm0 -; AVX2-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm2[1],xmm0[2],xmm2[3] -; AVX2-NEXT: retq +; AVX-LABEL: combine_bitwise_ops_test1b: +; AVX: # %bb.0: +; AVX-NEXT: vandps %xmm1, %xmm0, %xmm0 +; AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm2[1],xmm0[2],xmm2[3] +; AVX-NEXT: retq %shuf1 = shufflevector <4 x i32> %a, <4 x i32> %c, <4 x i32> %shuf2 = shufflevector <4 x i32> %b, <4 x i32> %c, <4 x i32> %and = and <4 x i32> %shuf1, %shuf2 @@ -338,21 +332,15 @@ ; ; SSE41-LABEL: combine_bitwise_ops_test2b: ; SSE41: # %bb.0: -; SSE41-NEXT: por %xmm1, %xmm0 -; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1],xmm2[2,3],xmm0[4,5],xmm2[6,7] +; SSE41-NEXT: orps %xmm1, %xmm0 +; SSE41-NEXT: blendps {{.*#+}} xmm0 = xmm0[0],xmm2[1],xmm0[2],xmm2[3] ; SSE41-NEXT: retq ; -; AVX1-LABEL: combine_bitwise_ops_test2b: -; AVX1: # %bb.0: -; AVX1-NEXT: vpor %xmm1, %xmm0, %xmm0 -; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm2[2,3],xmm0[4,5],xmm2[6,7] -; AVX1-NEXT: retq -; -; AVX2-LABEL: combine_bitwise_ops_test2b: -; AVX2: # %bb.0: -; AVX2-NEXT: vorps %xmm1, %xmm0, %xmm0 -; AVX2-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm2[1],xmm0[2],xmm2[3] -; AVX2-NEXT: retq +; AVX-LABEL: combine_bitwise_ops_test2b: +; AVX: # %bb.0: +; AVX-NEXT: vorps %xmm1, %xmm0, %xmm0 +; AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm2[1],xmm0[2],xmm2[3] +; AVX-NEXT: retq %shuf1 = shufflevector <4 x i32> %a, <4 x i32> %c, <4 x i32> %shuf2 = shufflevector <4 x i32> %b, <4 x i32> %c, <4 x i32> %or = or <4 x i32> %shuf1, %shuf2 @@ -374,24 +362,17 @@ ; ; SSE41-LABEL: combine_bitwise_ops_test3b: ; SSE41: # %bb.0: -; SSE41-NEXT: pxor %xmm1, %xmm0 -; SSE41-NEXT: pxor %xmm1, %xmm1 -; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3],xmm0[4,5],xmm1[6,7] +; SSE41-NEXT: xorps %xmm1, %xmm0 +; SSE41-NEXT: xorps %xmm1, %xmm1 +; SSE41-NEXT: blendps {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3] ; SSE41-NEXT: retq ; -; AVX1-LABEL: combine_bitwise_ops_test3b: -; AVX1: # %bb.0: -; AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0 -; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3],xmm0[4,5],xmm1[6,7] -; AVX1-NEXT: retq -; -; AVX2-LABEL: combine_bitwise_ops_test3b: -; AVX2: # %bb.0: -; AVX2-NEXT: vxorps %xmm1, %xmm0, %xmm0 -; AVX2-NEXT: vxorps %xmm1, %xmm1, %xmm1 -; AVX2-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3] -; AVX2-NEXT: retq +; AVX-LABEL: combine_bitwise_ops_test3b: +; AVX: # %bb.0: +; AVX-NEXT: vxorps %xmm1, %xmm0, %xmm0 +; AVX-NEXT: vxorps %xmm1, %xmm1, %xmm1 +; AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3] +; AVX-NEXT: retq %shuf1 = shufflevector <4 x i32> %a, <4 x i32> %c, <4 x i32> %shuf2 = shufflevector <4 x i32> %b, <4 x i32> %c, <4 x i32> %xor = xor <4 x i32> %shuf1, %shuf2 @@ -417,21 +398,15 @@ ; ; SSE41-LABEL: combine_bitwise_ops_test4b: ; SSE41: # %bb.0: -; SSE41-NEXT: pand %xmm1, %xmm0 -; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm2[0,1],xmm0[2,3],xmm2[4,5],xmm0[6,7] +; SSE41-NEXT: andps %xmm1, %xmm0 +; SSE41-NEXT: blendps {{.*#+}} xmm0 = xmm2[0],xmm0[1],xmm2[2],xmm0[3] ; SSE41-NEXT: retq ; -; AVX1-LABEL: combine_bitwise_ops_test4b: -; AVX1: # %bb.0: -; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0 -; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm2[0,1],xmm0[2,3],xmm2[4,5],xmm0[6,7] -; AVX1-NEXT: retq -; -; AVX2-LABEL: combine_bitwise_ops_test4b: -; AVX2: # %bb.0: -; AVX2-NEXT: vandps %xmm1, %xmm0, %xmm0 -; AVX2-NEXT: vblendps {{.*#+}} xmm0 = xmm2[0],xmm0[1],xmm2[2],xmm0[3] -; AVX2-NEXT: retq +; AVX-LABEL: combine_bitwise_ops_test4b: +; AVX: # %bb.0: +; AVX-NEXT: vandps %xmm1, %xmm0, %xmm0 +; AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm2[0],xmm0[1],xmm2[2],xmm0[3] +; AVX-NEXT: retq %shuf1 = shufflevector <4 x i32> %c, <4 x i32> %a, <4 x i32> %shuf2 = shufflevector <4 x i32> %c, <4 x i32> %b, <4 x i32> %and = and <4 x i32> %shuf1, %shuf2 @@ -457,21 +432,15 @@ ; ; SSE41-LABEL: combine_bitwise_ops_test5b: ; SSE41: # %bb.0: -; SSE41-NEXT: por %xmm1, %xmm0 -; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm2[0,1],xmm0[2,3],xmm2[4,5],xmm0[6,7] +; SSE41-NEXT: orps %xmm1, %xmm0 +; SSE41-NEXT: blendps {{.*#+}} xmm0 = xmm2[0],xmm0[1],xmm2[2],xmm0[3] ; SSE41-NEXT: retq ; -; AVX1-LABEL: combine_bitwise_ops_test5b: -; AVX1: # %bb.0: -; AVX1-NEXT: vpor %xmm1, %xmm0, %xmm0 -; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm2[0,1],xmm0[2,3],xmm2[4,5],xmm0[6,7] -; AVX1-NEXT: retq -; -; AVX2-LABEL: combine_bitwise_ops_test5b: -; AVX2: # %bb.0: -; AVX2-NEXT: vorps %xmm1, %xmm0, %xmm0 -; AVX2-NEXT: vblendps {{.*#+}} xmm0 = xmm2[0],xmm0[1],xmm2[2],xmm0[3] -; AVX2-NEXT: retq +; AVX-LABEL: combine_bitwise_ops_test5b: +; AVX: # %bb.0: +; AVX-NEXT: vorps %xmm1, %xmm0, %xmm0 +; AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm2[0],xmm0[1],xmm2[2],xmm0[3] +; AVX-NEXT: retq %shuf1 = shufflevector <4 x i32> %c, <4 x i32> %a, <4 x i32> %shuf2 = shufflevector <4 x i32> %c, <4 x i32> %b, <4 x i32> %or = or <4 x i32> %shuf1, %shuf2 @@ -493,24 +462,17 @@ ; ; SSE41-LABEL: combine_bitwise_ops_test6b: ; SSE41: # %bb.0: -; SSE41-NEXT: pxor %xmm1, %xmm0 -; SSE41-NEXT: pxor %xmm1, %xmm1 -; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3],xmm1[4,5],xmm0[6,7] +; SSE41-NEXT: xorps %xmm1, %xmm0 +; SSE41-NEXT: xorps %xmm1, %xmm1 +; SSE41-NEXT: blendps {{.*#+}} xmm0 = xmm1[0],xmm0[1],xmm1[2],xmm0[3] ; SSE41-NEXT: retq ; -; AVX1-LABEL: combine_bitwise_ops_test6b: -; AVX1: # %bb.0: -; AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0 -; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3],xmm1[4,5],xmm0[6,7] -; AVX1-NEXT: retq -; -; AVX2-LABEL: combine_bitwise_ops_test6b: -; AVX2: # %bb.0: -; AVX2-NEXT: vxorps %xmm1, %xmm0, %xmm0 -; AVX2-NEXT: vxorps %xmm1, %xmm1, %xmm1 -; AVX2-NEXT: vblendps {{.*#+}} xmm0 = xmm1[0],xmm0[1],xmm1[2],xmm0[3] -; AVX2-NEXT: retq +; AVX-LABEL: combine_bitwise_ops_test6b: +; AVX: # %bb.0: +; AVX-NEXT: vxorps %xmm1, %xmm0, %xmm0 +; AVX-NEXT: vxorps %xmm1, %xmm1, %xmm1 +; AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm1[0],xmm0[1],xmm1[2],xmm0[3] +; AVX-NEXT: retq %shuf1 = shufflevector <4 x i32> %c, <4 x i32> %a, <4 x i32> %shuf2 = shufflevector <4 x i32> %c, <4 x i32> %b, <4 x i32> %xor = xor <4 x i32> %shuf1, %shuf2 @@ -904,9 +866,9 @@ ; ; AVX1-LABEL: combine_nested_undef_test15: ; AVX1: # %bb.0: -; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,0,1,1] -; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[3,1,0,1] -; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3],xmm0[4,5,6,7] +; AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm1[0,0,1,1] +; AVX1-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[3,1,0,1] +; AVX1-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2,3] ; AVX1-NEXT: retq ; ; AVX2-LABEL: combine_nested_undef_test15: @@ -941,17 +903,11 @@ ; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3],xmm0[4,5],xmm1[6,7] ; SSE41-NEXT: retq ; -; AVX1-LABEL: combine_nested_undef_test16: -; AVX1: # %bb.0: -; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1] -; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3],xmm0[4,5],xmm1[6,7] -; AVX1-NEXT: retq -; -; AVX2-LABEL: combine_nested_undef_test16: -; AVX2: # %bb.0: -; AVX2-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[2,3,0,1] -; AVX2-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3] -; AVX2-NEXT: retq +; AVX-LABEL: combine_nested_undef_test16: +; AVX: # %bb.0: +; AVX-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[2,3,0,1] +; AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3] +; AVX-NEXT: retq %1 = shufflevector <4 x i32> %A, <4 x i32> %B, <4 x i32> %2 = shufflevector <4 x i32> %1, <4 x i32> undef, <4 x i32> ret <4 x i32> %2 @@ -976,17 +932,11 @@ ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[3,1,0,1] ; SSE41-NEXT: retq ; -; AVX1-LABEL: combine_nested_undef_test17: -; AVX1: # %bb.0: -; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3,4,5,6,7] -; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[3,1,0,1] -; AVX1-NEXT: retq -; -; AVX2-LABEL: combine_nested_undef_test17: -; AVX2: # %bb.0: -; AVX2-NEXT: vblendps {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3] -; AVX2-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[3,1,0,1] -; AVX2-NEXT: retq +; AVX-LABEL: combine_nested_undef_test17: +; AVX: # %bb.0: +; AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3] +; AVX-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[3,1,0,1] +; AVX-NEXT: retq %1 = shufflevector <4 x i32> %A, <4 x i32> %B, <4 x i32> %2 = shufflevector <4 x i32> %1, <4 x i32> undef, <4 x i32> ret <4 x i32> %2 @@ -1026,17 +976,11 @@ ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,0,0,0] ; SSE41-NEXT: retq ; -; AVX1-LABEL: combine_nested_undef_test19: -; AVX1: # %bb.0: -; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3],xmm0[4,5,6,7] -; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,0,0,0] -; AVX1-NEXT: retq -; -; AVX2-LABEL: combine_nested_undef_test19: -; AVX2: # %bb.0: -; AVX2-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2,3] -; AVX2-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[1,0,0,0] -; AVX2-NEXT: retq +; AVX-LABEL: combine_nested_undef_test19: +; AVX: # %bb.0: +; AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2,3] +; AVX-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[1,0,0,0] +; AVX-NEXT: retq %1 = shufflevector <4 x i32> %A, <4 x i32> %B, <4 x i32> %2 = shufflevector <4 x i32> %1, <4 x i32> undef, <4 x i32> ret <4 x i32> %2 @@ -1063,17 +1007,11 @@ ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,3,0] ; SSE41-NEXT: retq ; -; AVX1-LABEL: combine_nested_undef_test20: -; AVX1: # %bb.0: -; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm1[0,1,2,3],xmm0[4,5,6,7] -; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,3,0] -; AVX1-NEXT: retq -; -; AVX2-LABEL: combine_nested_undef_test20: -; AVX2: # %bb.0: -; AVX2-NEXT: vblendps {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3] -; AVX2-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,2,3,0] -; AVX2-NEXT: retq +; AVX-LABEL: combine_nested_undef_test20: +; AVX: # %bb.0: +; AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3] +; AVX-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,2,3,0] +; AVX-NEXT: retq %1 = shufflevector <4 x i32> %A, <4 x i32> %B, <4 x i32> %2 = shufflevector <4 x i32> %1, <4 x i32> undef, <4 x i32> ret <4 x i32> %2 @@ -1100,8 +1038,8 @@ ; ; AVX1-LABEL: combine_nested_undef_test21: ; AVX1: # %bb.0: -; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3],xmm1[4,5,6,7] -; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,0,1] +; AVX1-NEXT: vblendps {{.*#+}} xmm0 = xmm1[0],xmm0[1],xmm1[2,3] +; AVX1-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,1,0,1] ; AVX1-NEXT: retq ; ; AVX2-LABEL: combine_nested_undef_test21: @@ -1362,18 +1300,13 @@ ; ; SSE41-LABEL: combine_test7: ; SSE41: # %bb.0: -; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3,4,5,6,7] +; SSE41-NEXT: blendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3] ; SSE41-NEXT: retq ; -; AVX1-LABEL: combine_test7: -; AVX1: # %bb.0: -; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3,4,5,6,7] -; AVX1-NEXT: retq -; -; AVX2-LABEL: combine_test7: -; AVX2: # %bb.0: -; AVX2-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3] -; AVX2-NEXT: retq +; AVX-LABEL: combine_test7: +; AVX: # %bb.0: +; AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3] +; AVX-NEXT: retq %1 = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> %2 = shufflevector <4 x i32> %1, <4 x i32> %b, <4 x i32> ret <4 x i32> %2 @@ -1425,18 +1358,13 @@ ; ; SSE41-LABEL: combine_test10: ; SSE41: # %bb.0: -; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3],xmm1[4,5,6,7] +; SSE41-NEXT: blendps {{.*#+}} xmm0 = xmm1[0],xmm0[1],xmm1[2,3] ; SSE41-NEXT: retq ; -; AVX1-LABEL: combine_test10: -; AVX1: # %bb.0: -; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3],xmm1[4,5,6,7] -; AVX1-NEXT: retq -; -; AVX2-LABEL: combine_test10: -; AVX2: # %bb.0: -; AVX2-NEXT: vblendps {{.*#+}} xmm0 = xmm1[0],xmm0[1],xmm1[2,3] -; AVX2-NEXT: retq +; AVX-LABEL: combine_test10: +; AVX: # %bb.0: +; AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm1[0],xmm0[1],xmm1[2,3] +; AVX-NEXT: retq %1 = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> %2 = shufflevector <4 x i32> %1, <4 x i32> %b, <4 x i32> ret <4 x i32> %2 @@ -1559,18 +1487,13 @@ ; ; SSE41-LABEL: combine_test17: ; SSE41: # %bb.0: -; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3,4,5,6,7] +; SSE41-NEXT: blendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3] ; SSE41-NEXT: retq ; -; AVX1-LABEL: combine_test17: -; AVX1: # %bb.0: -; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3,4,5,6,7] -; AVX1-NEXT: retq -; -; AVX2-LABEL: combine_test17: -; AVX2: # %bb.0: -; AVX2-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3] -; AVX2-NEXT: retq +; AVX-LABEL: combine_test17: +; AVX: # %bb.0: +; AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3] +; AVX-NEXT: retq %1 = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> %2 = shufflevector <4 x i32> %1, <4 x i32> %a, <4 x i32> ret <4 x i32> %2 @@ -1621,18 +1544,13 @@ ; ; SSE41-LABEL: combine_test20: ; SSE41: # %bb.0: -; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3],xmm1[4,5,6,7] +; SSE41-NEXT: blendps {{.*#+}} xmm0 = xmm1[0],xmm0[1],xmm1[2,3] ; SSE41-NEXT: retq ; -; AVX1-LABEL: combine_test20: -; AVX1: # %bb.0: -; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3],xmm1[4,5,6,7] -; AVX1-NEXT: retq -; -; AVX2-LABEL: combine_test20: -; AVX2: # %bb.0: -; AVX2-NEXT: vblendps {{.*#+}} xmm0 = xmm1[0],xmm0[1],xmm1[2,3] -; AVX2-NEXT: retq +; AVX-LABEL: combine_test20: +; AVX: # %bb.0: +; AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm1[0],xmm0[1],xmm1[2,3] +; AVX-NEXT: retq %1 = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> %2 = shufflevector <4 x i32> %1, <4 x i32> %a, <4 x i32> ret <4 x i32> %2 @@ -1760,13 +1678,13 @@ ; ; SSE41-LABEL: combine_test3b: ; SSE41: # %bb.0: -; SSE41-NEXT: blendpd {{.*#+}} xmm0 = xmm0[0],xmm1[1] +; SSE41-NEXT: blendps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3] ; SSE41-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,3,2,3] ; SSE41-NEXT: retq ; ; AVX-LABEL: combine_test3b: ; AVX: # %bb.0: -; AVX-NEXT: vblendpd {{.*#+}} xmm0 = xmm0[0],xmm1[1] +; AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3] ; AVX-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,3,2,3] ; AVX-NEXT: retq %1 = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> @@ -2020,12 +1938,12 @@ ; ; SSE41-LABEL: combine_blend_01: ; SSE41: # %bb.0: -; SSE41-NEXT: blendpd {{.*#+}} xmm0 = xmm1[0],xmm0[1] +; SSE41-NEXT: blendps {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3] ; SSE41-NEXT: retq ; ; AVX-LABEL: combine_blend_01: ; AVX: # %bb.0: -; AVX-NEXT: vblendpd {{.*#+}} xmm0 = xmm1[0],xmm0[1] +; AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3] ; AVX-NEXT: retq %shuffle = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> %shuffle6 = shufflevector <4 x float> %shuffle, <4 x float> %b, <4 x i32> @@ -2154,12 +2072,12 @@ ; ; SSE41-LABEL: combine_undef_input_test1: ; SSE41: # %bb.0: -; SSE41-NEXT: blendpd {{.*#+}} xmm0 = xmm1[0],xmm0[1] +; SSE41-NEXT: blendps {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3] ; SSE41-NEXT: retq ; ; AVX-LABEL: combine_undef_input_test1: ; AVX: # %bb.0: -; AVX-NEXT: vblendpd {{.*#+}} xmm0 = xmm1[0],xmm0[1] +; AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3] ; AVX-NEXT: retq %1 = shufflevector <4 x float> %a, <4 x float> undef, <4 x i32> %2 = shufflevector <4 x float> %1, <4 x float> %b, <4 x i32> @@ -2226,12 +2144,12 @@ ; ; SSE41-LABEL: combine_undef_input_test5: ; SSE41: # %bb.0: -; SSE41-NEXT: blendpd {{.*#+}} xmm0 = xmm0[0],xmm1[1] +; SSE41-NEXT: blendps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3] ; SSE41-NEXT: retq ; ; AVX-LABEL: combine_undef_input_test5: ; AVX: # %bb.0: -; AVX-NEXT: vblendpd {{.*#+}} xmm0 = xmm0[0],xmm1[1] +; AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3] ; AVX-NEXT: retq %1 = shufflevector <4 x float> %a, <4 x float> undef, <4 x i32> %2 = shufflevector <4 x float> %1, <4 x float> %b, <4 x i32> @@ -2338,12 +2256,12 @@ ; ; SSE41-LABEL: combine_undef_input_test11: ; SSE41: # %bb.0: -; SSE41-NEXT: blendpd {{.*#+}} xmm0 = xmm1[0],xmm0[1] +; SSE41-NEXT: blendps {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3] ; SSE41-NEXT: retq ; ; AVX-LABEL: combine_undef_input_test11: ; AVX: # %bb.0: -; AVX-NEXT: vblendpd {{.*#+}} xmm0 = xmm1[0],xmm0[1] +; AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3] ; AVX-NEXT: retq %1 = shufflevector <4 x float> %a, <4 x float> undef, <4 x i32> %2 = shufflevector <4 x float> %b, <4 x float> %1, <4 x i32> @@ -2410,12 +2328,12 @@ ; ; SSE41-LABEL: combine_undef_input_test15: ; SSE41: # %bb.0: -; SSE41-NEXT: blendpd {{.*#+}} xmm0 = xmm0[0],xmm1[1] +; SSE41-NEXT: blendps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3] ; SSE41-NEXT: retq ; ; AVX-LABEL: combine_undef_input_test15: ; AVX: # %bb.0: -; AVX-NEXT: vblendpd {{.*#+}} xmm0 = xmm0[0],xmm1[1] +; AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3] ; AVX-NEXT: retq %1 = shufflevector <4 x float> %a, <4 x float> undef, <4 x i32> %2 = shufflevector <4 x float> %b, <4 x float> %1, <4 x i32> @@ -2866,8 +2784,8 @@ ; ; SSE41-LABEL: PR22412: ; SSE41: # %bb.0: # %entry -; SSE41-NEXT: blendpd {{.*#+}} xmm0 = xmm0[0],xmm2[1] -; SSE41-NEXT: movapd %xmm0, %xmm1 +; SSE41-NEXT: blendps {{.*#+}} xmm0 = xmm0[0,1],xmm2[2,3] +; SSE41-NEXT: movaps %xmm0, %xmm1 ; SSE41-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,0],xmm3[3,2] ; SSE41-NEXT: shufps {{.*#+}} xmm3 = xmm3[1,0],xmm0[3,2] ; SSE41-NEXT: movaps %xmm1, %xmm0 @@ -2876,22 +2794,22 @@ ; ; AVX1-LABEL: PR22412: ; AVX1: # %bb.0: # %entry -; AVX1-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3] +; AVX1-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3,4,5,6,7] ; AVX1-NEXT: vperm2f128 {{.*#+}} ymm1 = ymm0[2,3,0,1] ; AVX1-NEXT: vshufps {{.*#+}} ymm0 = ymm0[1,0],ymm1[3,2],ymm0[5,4],ymm1[7,6] ; AVX1-NEXT: retq ; ; AVX2-SLOW-LABEL: PR22412: ; AVX2-SLOW: # %bb.0: # %entry -; AVX2-SLOW-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3] +; AVX2-SLOW-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3,4,5,6,7] ; AVX2-SLOW-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[1,0,3,2,5,4,7,6] ; AVX2-SLOW-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[0,3,2,1] ; AVX2-SLOW-NEXT: retq ; ; AVX2-FAST-LABEL: PR22412: ; AVX2-FAST: # %bb.0: # %entry -; AVX2-FAST-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3] -; AVX2-FAST-NEXT: vmovapd {{.*#+}} ymm1 = [1,0,7,6,5,4,3,2] +; AVX2-FAST-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3,4,5,6,7] +; AVX2-FAST-NEXT: vmovaps {{.*#+}} ymm1 = [1,0,7,6,5,4,3,2] ; AVX2-FAST-NEXT: vpermps %ymm0, %ymm1, %ymm0 ; AVX2-FAST-NEXT: retq entry: Index: test/CodeGen/X86/vselect-2.ll =================================================================== --- test/CodeGen/X86/vselect-2.ll +++ test/CodeGen/X86/vselect-2.ll @@ -13,18 +13,13 @@ ; ; SSE41-LABEL: test1: ; SSE41: # %bb.0: -; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5,6,7] +; SSE41-NEXT: blendps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3] ; SSE41-NEXT: retq ; -; AVX1-LABEL: test1: -; AVX1: # %bb.0: -; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5,6,7] -; AVX1-NEXT: retq -; -; AVX2-LABEL: test1: -; AVX2: # %bb.0: -; AVX2-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3] -; AVX2-NEXT: retq +; AVX-LABEL: test1: +; AVX: # %bb.0: +; AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3] +; AVX-NEXT: retq %select = select <4 x i1>, <4 x i32> %A, <4 x i32> %B ret <4 x i32> %select } @@ -37,18 +32,13 @@ ; ; SSE41-LABEL: test2: ; SSE41: # %bb.0: -; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm1[0,1,2,3],xmm0[4,5,6,7] +; SSE41-NEXT: blendps {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3] ; SSE41-NEXT: retq ; -; AVX1-LABEL: test2: -; AVX1: # %bb.0: -; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm1[0,1,2,3],xmm0[4,5,6,7] -; AVX1-NEXT: retq -; -; AVX2-LABEL: test2: -; AVX2: # %bb.0: -; AVX2-NEXT: vblendps {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3] -; AVX2-NEXT: retq +; AVX-LABEL: test2: +; AVX: # %bb.0: +; AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3] +; AVX-NEXT: retq %select = select <4 x i1>, <4 x i32> %A, <4 x i32> %B ret <4 x i32> %select } @@ -62,12 +52,12 @@ ; ; SSE41-LABEL: test3: ; SSE41: # %bb.0: -; SSE41-NEXT: blendpd {{.*#+}} xmm0 = xmm0[0],xmm1[1] +; SSE41-NEXT: blendps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3] ; SSE41-NEXT: retq ; ; AVX-LABEL: test3: ; AVX: # %bb.0: -; AVX-NEXT: vblendpd {{.*#+}} xmm0 = xmm0[0],xmm1[1] +; AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3] ; AVX-NEXT: retq %select = select <4 x i1>, <4 x float> %A, <4 x float> %B ret <4 x float> %select @@ -81,12 +71,12 @@ ; ; SSE41-LABEL: test4: ; SSE41: # %bb.0: -; SSE41-NEXT: blendpd {{.*#+}} xmm0 = xmm1[0],xmm0[1] +; SSE41-NEXT: blendps {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3] ; SSE41-NEXT: retq ; ; AVX-LABEL: test4: ; AVX: # %bb.0: -; AVX-NEXT: vblendpd {{.*#+}} xmm0 = xmm1[0],xmm0[1] +; AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3] ; AVX-NEXT: retq %select = select <4 x i1>, <4 x float> %A, <4 x float> %B ret <4 x float> %select Index: test/CodeGen/X86/vselect.ll =================================================================== --- test/CodeGen/X86/vselect.ll +++ test/CodeGen/X86/vselect.ll @@ -36,12 +36,12 @@ ; ; SSE41-LABEL: test2: ; SSE41: # %bb.0: -; SSE41-NEXT: blendpd {{.*#+}} xmm0 = xmm0[0],xmm1[1] +; SSE41-NEXT: blendps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3] ; SSE41-NEXT: retq ; ; AVX-LABEL: test2: ; AVX: # %bb.0: -; AVX-NEXT: vblendpd {{.*#+}} xmm0 = xmm0[0],xmm1[1] +; AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3] ; AVX-NEXT: retq %1 = select <4 x i1> , <4 x float> %a, <4 x float> %b ret <4 x float> %1 @@ -55,12 +55,12 @@ ; ; SSE41-LABEL: test3: ; SSE41: # %bb.0: -; SSE41-NEXT: blendpd {{.*#+}} xmm0 = xmm1[0],xmm0[1] +; SSE41-NEXT: blendps {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3] ; SSE41-NEXT: retq ; ; AVX-LABEL: test3: ; AVX: # %bb.0: -; AVX-NEXT: vblendpd {{.*#+}} xmm0 = xmm1[0],xmm0[1] +; AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3] ; AVX-NEXT: retq %1 = select <4 x i1> , <4 x float> %a, <4 x float> %b ret <4 x float> %1 @@ -113,18 +113,13 @@ ; ; SSE41-LABEL: test7: ; SSE41: # %bb.0: -; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5,6,7] +; SSE41-NEXT: blendps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3] ; SSE41-NEXT: retq ; -; AVX1-LABEL: test7: -; AVX1: # %bb.0: -; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5,6,7] -; AVX1-NEXT: retq -; -; AVX2-LABEL: test7: -; AVX2: # %bb.0: -; AVX2-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3] -; AVX2-NEXT: retq +; AVX-LABEL: test7: +; AVX: # %bb.0: +; AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3] +; AVX-NEXT: retq %1 = select <8 x i1> , <8 x i16> %a, <8 x i16> %b ret <8 x i16> %1 } @@ -137,18 +132,13 @@ ; ; SSE41-LABEL: test8: ; SSE41: # %bb.0: -; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm1[0,1,2,3],xmm0[4,5,6,7] +; SSE41-NEXT: blendps {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3] ; SSE41-NEXT: retq ; -; AVX1-LABEL: test8: -; AVX1: # %bb.0: -; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm1[0,1,2,3],xmm0[4,5,6,7] -; AVX1-NEXT: retq -; -; AVX2-LABEL: test8: -; AVX2: # %bb.0: -; AVX2-NEXT: vblendps {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3] -; AVX2-NEXT: retq +; AVX-LABEL: test8: +; AVX: # %bb.0: +; AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3] +; AVX-NEXT: retq %1 = select <8 x i1> , <8 x i16> %a, <8 x i16> %b ret <8 x i16> %1 } @@ -310,18 +300,13 @@ ; ; SSE41-LABEL: test19: ; SSE41: # %bb.0: -; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3,4,5,6,7] +; SSE41-NEXT: blendps {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3] ; SSE41-NEXT: retq ; -; AVX1-LABEL: test19: -; AVX1: # %bb.0: -; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3,4,5,6,7] -; AVX1-NEXT: retq -; -; AVX2-LABEL: test19: -; AVX2: # %bb.0: -; AVX2-NEXT: vblendps {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3] -; AVX2-NEXT: retq +; AVX-LABEL: test19: +; AVX: # %bb.0: +; AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3] +; AVX-NEXT: retq %1 = select <4 x i1> , <4 x i32> %a, <4 x i32> %b ret <4 x i32> %1 } @@ -334,12 +319,12 @@ ; ; SSE41-LABEL: test20: ; SSE41: # %bb.0: -; SSE41-NEXT: blendpd {{.*#+}} xmm0 = xmm1[0],xmm0[1] +; SSE41-NEXT: blendps {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3] ; SSE41-NEXT: retq ; ; AVX-LABEL: test20: ; AVX: # %bb.0: -; AVX-NEXT: vblendpd {{.*#+}} xmm0 = xmm1[0],xmm0[1] +; AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3] ; AVX-NEXT: retq %1 = select <2 x i1> , <2 x double> %a, <2 x double> %b ret <2 x double> %1 @@ -353,18 +338,13 @@ ; ; SSE41-LABEL: test21: ; SSE41: # %bb.0: -; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm1[0,1,2,3],xmm0[4,5,6,7] +; SSE41-NEXT: blendps {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3] ; SSE41-NEXT: retq ; -; AVX1-LABEL: test21: -; AVX1: # %bb.0: -; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm1[0,1,2,3],xmm0[4,5,6,7] -; AVX1-NEXT: retq -; -; AVX2-LABEL: test21: -; AVX2: # %bb.0: -; AVX2-NEXT: vblendps {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3] -; AVX2-NEXT: retq +; AVX-LABEL: test21: +; AVX: # %bb.0: +; AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3] +; AVX-NEXT: retq %1 = select <2 x i1> , <2 x i64> %a, <2 x i64> %b ret <2 x i64> %1 } @@ -398,18 +378,13 @@ ; ; SSE41-LABEL: test23: ; SSE41: # %bb.0: -; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3,4,5,6,7] +; SSE41-NEXT: blendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3] ; SSE41-NEXT: retq ; -; AVX1-LABEL: test23: -; AVX1: # %bb.0: -; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3,4,5,6,7] -; AVX1-NEXT: retq -; -; AVX2-LABEL: test23: -; AVX2: # %bb.0: -; AVX2-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3] -; AVX2-NEXT: retq +; AVX-LABEL: test23: +; AVX: # %bb.0: +; AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3] +; AVX-NEXT: retq %1 = select <4 x i1> , <4 x i32> %a, <4 x i32> %b ret <4 x i32> %1 } @@ -423,12 +398,12 @@ ; ; SSE41-LABEL: test24: ; SSE41: # %bb.0: -; SSE41-NEXT: blendpd {{.*#+}} xmm0 = xmm0[0],xmm1[1] +; SSE41-NEXT: blendps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3] ; SSE41-NEXT: retq ; ; AVX-LABEL: test24: ; AVX: # %bb.0: -; AVX-NEXT: vblendpd {{.*#+}} xmm0 = xmm0[0],xmm1[1] +; AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3] ; AVX-NEXT: retq %1 = select <2 x i1> , <2 x double> %a, <2 x double> %b ret <2 x double> %1 @@ -443,18 +418,13 @@ ; ; SSE41-LABEL: test25: ; SSE41: # %bb.0: -; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5,6,7] +; SSE41-NEXT: blendps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3] ; SSE41-NEXT: retq ; -; AVX1-LABEL: test25: -; AVX1: # %bb.0: -; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5,6,7] -; AVX1-NEXT: retq -; -; AVX2-LABEL: test25: -; AVX2: # %bb.0: -; AVX2-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3] -; AVX2-NEXT: retq +; AVX-LABEL: test25: +; AVX: # %bb.0: +; AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3] +; AVX-NEXT: retq %1 = select <2 x i1> , <2 x i64> %a, <2 x i64> %b ret <2 x i64> %1 } Index: test/CodeGen/X86/x86-interleaved-access.ll =================================================================== --- test/CodeGen/X86/x86-interleaved-access.ll +++ test/CodeGen/X86/x86-interleaved-access.ll @@ -691,7 +691,7 @@ ; AVX1-NEXT: vpshufb %xmm0, %xmm10, %xmm0 ; AVX1-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1] ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm5[4,5,6,7] -; AVX1-NEXT: vblendpd {{.*#+}} ymm8 = ymm0[0,1],ymm8[2,3] +; AVX1-NEXT: vblendps {{.*#+}} ymm8 = ymm0[0,1,2,3],ymm8[4,5,6,7] ; AVX1-NEXT: vmovdqa {{.*#+}} xmm0 = ; AVX1-NEXT: vpshufb %xmm0, %xmm11, %xmm4 ; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm5 @@ -709,7 +709,7 @@ ; AVX1-NEXT: vpshufb %xmm5, %xmm10, %xmm5 ; AVX1-NEXT: vpunpckldq {{.*#+}} xmm4 = xmm5[0],xmm4[0],xmm5[1],xmm4[1] ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm4[0,1,2,3],xmm0[4,5,6,7] -; AVX1-NEXT: vblendpd {{.*#+}} ymm9 = ymm0[0,1],ymm1[2,3] +; AVX1-NEXT: vblendps {{.*#+}} ymm9 = ymm0[0,1,2,3],ymm1[4,5,6,7] ; AVX1-NEXT: vmovdqa {{.*#+}} xmm0 = ; AVX1-NEXT: vpshufb %xmm0, %xmm11, %xmm1 ; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm4 @@ -727,7 +727,7 @@ ; AVX1-NEXT: vpshufb %xmm4, %xmm10, %xmm4 ; AVX1-NEXT: vpunpckldq {{.*#+}} xmm4 = xmm4[0],xmm5[0],xmm4[1],xmm5[1] ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm4[0,1,2,3],xmm0[4,5,6,7] -; AVX1-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3] +; AVX1-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm1[4,5,6,7] ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = ; AVX1-NEXT: vpshufb %xmm1, %xmm11, %xmm4 ; AVX1-NEXT: vpshufb %xmm1, %xmm3, %xmm3 @@ -745,7 +745,7 @@ ; AVX1-NEXT: vpshufb %xmm4, %xmm10, %xmm4 ; AVX1-NEXT: vpunpckldq {{.*#+}} xmm3 = xmm4[0],xmm3[0],xmm4[1],xmm3[1] ; AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm3[0,1,2,3],xmm1[4,5,6,7] -; AVX1-NEXT: vblendpd {{.*#+}} ymm1 = ymm1[0,1],ymm2[2,3] +; AVX1-NEXT: vblendps {{.*#+}} ymm1 = ymm1[0,1,2,3],ymm2[4,5,6,7] ; AVX1-NEXT: vpcmpeqb %xmm9, %xmm8, %xmm2 ; AVX1-NEXT: vextractf128 $1, %ymm9, %xmm3 ; AVX1-NEXT: vextractf128 $1, %ymm8, %xmm4