Index: lib/Target/X86/X86InstrTablesInfo.h =================================================================== --- lib/Target/X86/X86InstrTablesInfo.h +++ lib/Target/X86/X86InstrTablesInfo.h @@ -47,9 +47,7 @@ { X86::VCOMISDZrr , X86::VCOMISDrr }, { X86::VCOMISSZrm , X86::VCOMISSrm }, { X86::VCOMISSZrr , X86::VCOMISSrr }, - { X86::VCVTSD2SI64Zrm , X86::VCVTSD2SI64rm }, { X86::VCVTSD2SI64Zrr , X86::VCVTSD2SI64rr }, - { X86::VCVTSD2SIZrm , X86::VCVTSD2SIrm }, { X86::VCVTSD2SIZrr , X86::VCVTSD2SIrr }, { X86::VCVTSD2SSZrm , X86::VCVTSD2SSrm }, { X86::VCVTSD2SSZrr , X86::VCVTSD2SSrr }, @@ -61,11 +59,17 @@ { X86::VCVTSI2SSZrm_Int , X86::Int_VCVTSI2SSrm }, { X86::VCVTSI2SSZrr , X86::VCVTSI2SSrr }, { X86::VCVTSI2SSZrr_Int , X86::Int_VCVTSI2SSrr }, + { X86::VCVTSI642SDZrm , X86::VCVTSI2SD64rm }, + { X86::VCVTSI642SDZrm_Int , X86::Int_VCVTSI2SD64rm }, + { X86::VCVTSI642SDZrr , X86::VCVTSI2SD64rr }, + { X86::VCVTSI642SDZrr_Int , X86::Int_VCVTSI2SD64rr }, + { X86::VCVTSI642SSZrm , X86::VCVTSI2SS64rm }, + { X86::VCVTSI642SSZrm_Int , X86::Int_VCVTSI2SS64rm }, + { X86::VCVTSI642SSZrr , X86::VCVTSI2SS64rr }, + { X86::VCVTSI642SSZrr_Int , X86::Int_VCVTSI2SS64rr }, { X86::VCVTSS2SDZrm , X86::VCVTSS2SDrm }, { X86::VCVTSS2SDZrr , X86::VCVTSS2SDrr }, - { X86::VCVTSS2SI64Zrm , X86::VCVTSS2SI64rm }, { X86::VCVTSS2SI64Zrr , X86::VCVTSS2SI64rr }, - { X86::VCVTSS2SIZrm , X86::VCVTSS2SIrm }, { X86::VCVTSS2SIZrr , X86::VCVTSS2SIrr }, { X86::VCVTTSD2SI64Zrm , X86::VCVTTSD2SI64rm }, { X86::VCVTTSD2SI64Zrm_Int , X86::Int_VCVTTSD2SI64rm }, @@ -91,6 +95,8 @@ { X86::VDIVSSZrm_Int , X86::VDIVSSrm_Int }, { X86::VDIVSSZrr , X86::VDIVSSrr }, { X86::VDIVSSZrr_Int , X86::VDIVSSrr_Int }, + { X86::VEXTRACTPSZmr , X86::VEXTRACTPSmr }, + { X86::VEXTRACTPSZrr , X86::VEXTRACTPSrr }, { X86::VFMADD132SDZm , X86::VFMADD132SDm }, { X86::VFMADD132SDZm_Int , X86::VFMADD132SDm_Int }, { X86::VFMADD132SDZr , X86::VFMADD132SDr }, @@ -187,36 +193,43 @@ { X86::VFNMSUB231SSZm_Int , X86::VFNMSUB231SSm_Int }, { X86::VFNMSUB231SSZr , X86::VFNMSUB231SSr }, { X86::VFNMSUB231SSZr_Int , X86::VFNMSUB231SSr_Int }, + { X86::VINSERTPSZrm , X86::VINSERTPSrm }, + { X86::VINSERTPSZrr , X86::VINSERTPSrr }, { X86::VMAXCSDZrm , X86::VMAXCSDrm }, { X86::VMAXCSDZrr , X86::VMAXCSDrr }, { X86::VMAXCSSZrm , X86::VMAXCSSrm }, { X86::VMAXCSSZrr , X86::VMAXCSSrr }, - { X86::VMAXSDZrm , X86::VMAXSDrm }, + { X86::VMAXSDZrm , X86::VMAXCSDrm }, { X86::VMAXSDZrm_Int , X86::VMAXSDrm_Int }, - { X86::VMAXSDZrr , X86::VMAXSDrr }, + { X86::VMAXSDZrr , X86::VMAXCSDrr }, { X86::VMAXSDZrr_Int , X86::VMAXSDrr_Int }, - { X86::VMAXSSZrm , X86::VMAXSSrm }, + { X86::VMAXSSZrm , X86::VMAXCSSrm }, { X86::VMAXSSZrm_Int , X86::VMAXSSrm_Int }, - { X86::VMAXSSZrr , X86::VMAXSSrr }, + { X86::VMAXSSZrr , X86::VMAXCSSrr }, { X86::VMAXSSZrr_Int , X86::VMAXSSrr_Int }, { X86::VMINCSDZrm , X86::VMINCSDrm }, { X86::VMINCSDZrr , X86::VMINCSDrr }, { X86::VMINCSSZrm , X86::VMINCSSrm }, { X86::VMINCSSZrr , X86::VMINCSSrr }, - { X86::VMINSDZrm , X86::VMINSDrm }, + { X86::VMINSDZrm , X86::VMINCSDrm }, { X86::VMINSDZrm_Int , X86::VMINSDrm_Int }, - { X86::VMINSDZrr , X86::VMINSDrr }, + { X86::VMINSDZrr , X86::VMINCSDrr }, { X86::VMINSDZrr_Int , X86::VMINSDrr_Int }, - { X86::VMINSSZrm , X86::VMINSSrm }, + { X86::VMINSSZrm , X86::VMINCSSrm }, { X86::VMINSSZrm_Int , X86::VMINSSrm_Int }, - { X86::VMINSSZrr , X86::VMINSSrr }, + { X86::VMINSSZrr , X86::VMINCSSrr }, { X86::VMINSSZrr_Int , X86::VMINSSrr_Int }, { X86::VMOV64toSDZrr , X86::VMOV64toSDrr }, { X86::VMOVDI2SSZrm , X86::VMOVDI2SSrm }, { X86::VMOVDI2SSZrr , X86::VMOVDI2SSrr }, + { X86::VMOVSDto64Zmr , X86::VMOVSDto64mr }, + { X86::VMOVSDto64Zrr , X86::VMOVSDto64rr }, { X86::VMOVSDZmr , X86::VMOVSDmr }, { X86::VMOVSDZrm , X86::VMOVSDrm }, { X86::VMOVSDZrr , X86::VMOVSDrr }, + { X86::VMOVSDZrr_REV , X86::VMOVSDrr_REV }, + { X86::VMOVSS2DIZmr , X86::VMOVSS2DImr }, + { X86::VMOVSS2DIZrr , X86::VMOVSS2DIrr }, { X86::VMOVSSZmr , X86::VMOVSSmr }, { X86::VMOVSSZrm , X86::VMOVSSrm }, { X86::VMOVSSZrr , X86::VMOVSSrr }, @@ -250,6 +263,7 @@ { X86::VUCOMISSZrm , X86::VUCOMISSrm }, { X86::VUCOMISSZrr , X86::VUCOMISSrr }, + { X86::VMOV64toPQIZrm , X86::VMOV64toPQIrm }, { X86::VMOV64toPQIZrr , X86::VMOV64toPQIrr }, { X86::VMOV64toSDZrr , X86::VMOV64toSDrr }, { X86::VMOVDI2PDIZrm , X86::VMOVDI2PDIrm }, @@ -259,6 +273,8 @@ { X86::VMOVPDI2DIZmr , X86::VMOVPDI2DImr }, { X86::VMOVPDI2DIZrr , X86::VMOVPDI2DIrr }, { X86::VMOVPQI2QIZmr , X86::VMOVPQI2QImr }, + { X86::VMOVPQI2QIZrr , X86::VMOVPQI2QIrr }, + { X86::VMOVPQIto64Zmr , X86::VMOVPQIto64mr }, { X86::VMOVPQIto64Zrr , X86::VMOVPQIto64rr }, { X86::VMOVQI2PQIZrm , X86::VMOVQI2PQIrm }, { X86::VMOVZPQILo2PQIZrr , X86::VMOVZPQILo2PQIrr }, @@ -271,6 +287,7 @@ { X86::VPEXTRQZrr , X86::VPEXTRQrr }, { X86::VPEXTRWZmr , X86::VPEXTRWmr }, { X86::VPEXTRWZrr , X86::VPEXTRWri }, + { X86::VPEXTRWZrr_REV , X86::VPEXTRWrr_REV }, { X86::VPINSRBZrm , X86::VPINSRBrm }, { X86::VPINSRBZrr , X86::VPINSRBrr }, @@ -294,6 +311,8 @@ { X86::VANDPDZ128rr , X86::VANDPDrr }, { X86::VANDPSZ128rm , X86::VANDPSrm }, { X86::VANDPSZ128rr , X86::VANDPSrr }, + { X86::VBROADCASTI32X2Z128m , X86::VPBROADCASTQrm }, + { X86::VBROADCASTI32X2Z128r , X86::VPBROADCASTQrr }, { X86::VBROADCASTSSZ128m , X86::VBROADCASTSSrm }, { X86::VBROADCASTSSZ128r , X86::VBROADCASTSSrr }, { X86::VCVTDQ2PDZ128rm , X86::VCVTDQ2PDrm }, @@ -396,18 +415,18 @@ { X86::VMAXCPDZ128rr , X86::VMAXCPDrr }, { X86::VMAXCPSZ128rm , X86::VMAXCPSrm }, { X86::VMAXCPSZ128rr , X86::VMAXCPSrr }, - { X86::VMAXPDZ128rm , X86::VMAXPDrm }, - { X86::VMAXPDZ128rr , X86::VMAXPDrr }, - { X86::VMAXPSZ128rm , X86::VMAXPSrm }, - { X86::VMAXPSZ128rr , X86::VMAXPSrr }, + { X86::VMAXPDZ128rm , X86::VMAXCPDrm }, + { X86::VMAXPDZ128rr , X86::VMAXCPDrr }, + { X86::VMAXPSZ128rm , X86::VMAXCPSrm }, + { X86::VMAXPSZ128rr , X86::VMAXCPSrr }, { X86::VMINCPDZ128rm , X86::VMINCPDrm }, { X86::VMINCPDZ128rr , X86::VMINCPDrr }, { X86::VMINCPSZ128rm , X86::VMINCPSrm }, { X86::VMINCPSZ128rr , X86::VMINCPSrr }, - { X86::VMINPDZ128rm , X86::VMINPDrm }, - { X86::VMINPDZ128rr , X86::VMINPDrr }, - { X86::VMINPSZ128rm , X86::VMINPSrm }, - { X86::VMINPSZ128rr , X86::VMINPSrr }, + { X86::VMINPDZ128rm , X86::VMINCPDrm }, + { X86::VMINPDZ128rr , X86::VMINCPDrr }, + { X86::VMINPSZ128rm , X86::VMINCPSrm }, + { X86::VMINPSZ128rr , X86::VMINCPSrr }, { X86::VMOVAPDZ128mr , X86::VMOVAPDmr }, { X86::VMOVAPDZ128rm , X86::VMOVAPDrm }, { X86::VMOVAPDZ128rr , X86::VMOVAPDrr }, @@ -510,6 +529,10 @@ { X86::VPANDDZ128rr , X86::VPANDrr }, { X86::VPANDQZ128rm , X86::VPANDrm }, { X86::VPANDQZ128rr , X86::VPANDrr }, + { X86::VPANDNDZ128rm , X86::VPANDNrm }, + { X86::VPANDNDZ128rr , X86::VPANDNrr }, + { X86::VPANDNQZ128rm , X86::VPANDNrm }, + { X86::VPANDNQZ128rr , X86::VPANDNrr }, { X86::VPAVGBZ128rm , X86::VPAVGBrm }, { X86::VPAVGBZ128rr , X86::VPAVGBrr }, { X86::VPAVGWZ128rm , X86::VPAVGWrm }, @@ -724,6 +747,12 @@ { X86::VANDPDZ256rr , X86::VANDPDYrr }, { X86::VANDPSZ256rm , X86::VANDPSYrm }, { X86::VANDPSZ256rr , X86::VANDPSYrr }, + { X86::VBROADCASTF32X2Z256m , X86::VBROADCASTSDYrm }, + { X86::VBROADCASTF32X2Z256r , X86::VBROADCASTSDYrr }, + { X86::VBROADCASTF32X4Z256rm , X86::VBROADCASTF128 }, + { X86::VBROADCASTI32X2Z256m , X86::VPBROADCASTQYrm }, + { X86::VBROADCASTI32X2Z256r , X86::VPBROADCASTQYrr }, + { X86::VBROADCASTI32X4Z256rm , X86::VBROADCASTI128 }, { X86::VBROADCASTSDZ256m , X86::VBROADCASTSDYrm }, { X86::VBROADCASTSDZ256r , X86::VBROADCASTSDYrr }, { X86::VBROADCASTSSZ256m , X86::VBROADCASTSSYrm }, @@ -844,18 +873,18 @@ { X86::VMAXCPDZ256rr , X86::VMAXCPDYrr }, { X86::VMAXCPSZ256rm , X86::VMAXCPSYrm }, { X86::VMAXCPSZ256rr , X86::VMAXCPSYrr }, - { X86::VMAXPDZ256rm , X86::VMAXPDYrm }, - { X86::VMAXPDZ256rr , X86::VMAXPDYrr }, - { X86::VMAXPSZ256rm , X86::VMAXPSYrm }, - { X86::VMAXPSZ256rr , X86::VMAXPSYrr }, + { X86::VMAXPDZ256rm , X86::VMAXCPDYrm }, + { X86::VMAXPDZ256rr , X86::VMAXCPDYrr }, + { X86::VMAXPSZ256rm , X86::VMAXCPSYrm }, + { X86::VMAXPSZ256rr , X86::VMAXCPSYrr }, { X86::VMINCPDZ256rm , X86::VMINCPDYrm }, { X86::VMINCPDZ256rr , X86::VMINCPDYrr }, { X86::VMINCPSZ256rm , X86::VMINCPSYrm }, { X86::VMINCPSZ256rr , X86::VMINCPSYrr }, - { X86::VMINPDZ256rm , X86::VMINPDYrm }, - { X86::VMINPDZ256rr , X86::VMINPDYrr }, - { X86::VMINPSZ256rm , X86::VMINPSYrm }, - { X86::VMINPSZ256rr , X86::VMINPSYrr }, + { X86::VMINPDZ256rm , X86::VMINCPDYrm }, + { X86::VMINPDZ256rr , X86::VMINCPDYrr }, + { X86::VMINPSZ256rm , X86::VMINCPSYrm }, + { X86::VMINPSZ256rr , X86::VMINCPSYrr }, { X86::VMOVAPDZ256mr , X86::VMOVAPDYmr }, { X86::VMOVAPDZ256rm , X86::VMOVAPDYrm }, { X86::VMOVAPDZ256rr , X86::VMOVAPDYrr }, @@ -950,6 +979,10 @@ { X86::VPANDDZ256rr , X86::VPANDYrr }, { X86::VPANDQZ256rm , X86::VPANDYrm }, { X86::VPANDQZ256rr , X86::VPANDYrr }, + { X86::VPANDNDZ256rm , X86::VPANDNYrm }, + { X86::VPANDNDZ256rr , X86::VPANDNYrr }, + { X86::VPANDNQZ256rm , X86::VPANDNYrm }, + { X86::VPANDNQZ256rr , X86::VPANDNYrr }, { X86::VPAVGBZ256rm , X86::VPAVGBYrm }, { X86::VPAVGBZ256rr , X86::VPAVGBYrr }, { X86::VPAVGWZ256rm , X86::VPAVGWYrm }, Index: test/CodeGen/X86/avx512-mov.ll =================================================================== --- test/CodeGen/X86/avx512-mov.ll +++ test/CodeGen/X86/avx512-mov.ll @@ -4,7 +4,7 @@ define i32 @test1(float %x) { ; CHECK-LABEL: test1: ; CHECK: ## BB#0: -; CHECK-NEXT: vmovd %xmm0, %eax ## encoding: [0x62,0xf1,0x7d,0x08,0x7e,0xc0] +; CHECK-NEXT: vmovd %xmm0, %eax ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x7e,0xc0] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = bitcast float %x to i32 ret i32 %res Index: test/CodeGen/X86/avx512-vbroadcasti128.ll =================================================================== --- test/CodeGen/X86/avx512-vbroadcasti128.ll +++ test/CodeGen/X86/avx512-vbroadcasti128.ll @@ -10,13 +10,13 @@ define <4 x double> @test_broadcast_2f64_4f64(<2 x double> *%p) nounwind { ; X64-AVX512VL-LABEL: test_broadcast_2f64_4f64: ; X64-AVX512VL: ## BB#0: -; X64-AVX512VL-NEXT: vbroadcastf32x4 {{.*#+}} ymm0 = mem[0,1,2,3,0,1,2,3] +; X64-AVX512VL-NEXT: vbroadcastf128 {{.*#+}} ymm0 = mem[0,1,0,1] ; X64-AVX512VL-NEXT: vaddpd {{.*}}(%rip), %ymm0, %ymm0 ; X64-AVX512VL-NEXT: retq ; ; X64-AVX512BWVL-LABEL: test_broadcast_2f64_4f64: ; X64-AVX512BWVL: ## BB#0: -; X64-AVX512BWVL-NEXT: vbroadcastf32x4 {{.*#+}} ymm0 = mem[0,1,2,3,0,1,2,3] +; X64-AVX512BWVL-NEXT: vbroadcastf128 {{.*#+}} ymm0 = mem[0,1,0,1] ; X64-AVX512BWVL-NEXT: vaddpd {{.*}}(%rip), %ymm0, %ymm0 ; X64-AVX512BWVL-NEXT: retq ; @@ -34,13 +34,13 @@ define <4 x i64> @test_broadcast_2i64_4i64(<2 x i64> *%p) nounwind { ; X64-AVX512VL-LABEL: test_broadcast_2i64_4i64: ; X64-AVX512VL: ## BB#0: -; X64-AVX512VL-NEXT: vbroadcasti32x4 {{.*#+}} ymm0 = mem[0,1,2,3,0,1,2,3] +; X64-AVX512VL-NEXT: vbroadcasti128 {{.*#+}} ymm0 = mem[0,1,0,1] ; X64-AVX512VL-NEXT: vpaddq {{.*}}(%rip), %ymm0, %ymm0 ; X64-AVX512VL-NEXT: retq ; ; X64-AVX512BWVL-LABEL: test_broadcast_2i64_4i64: ; X64-AVX512BWVL: ## BB#0: -; X64-AVX512BWVL-NEXT: vbroadcasti32x4 {{.*#+}} ymm0 = mem[0,1,2,3,0,1,2,3] +; X64-AVX512BWVL-NEXT: vbroadcasti128 {{.*#+}} ymm0 = mem[0,1,0,1] ; X64-AVX512BWVL-NEXT: vpaddq {{.*}}(%rip), %ymm0, %ymm0 ; X64-AVX512BWVL-NEXT: retq ; @@ -58,7 +58,7 @@ define <8 x float> @test_broadcast_4f32_8f32(<4 x float> *%p) nounwind { ; X64-AVX512-LABEL: test_broadcast_4f32_8f32: ; X64-AVX512: ## BB#0: -; X64-AVX512-NEXT: vbroadcastf32x4 {{.*#+}} ymm0 = mem[0,1,2,3,0,1,2,3] +; X64-AVX512-NEXT: vbroadcastf128 {{.*#+}} ymm0 = mem[0,1,0,1] ; X64-AVX512-NEXT: vaddps {{.*}}(%rip), %ymm0, %ymm0 ; X64-AVX512-NEXT: retq %1 = load <4 x float>, <4 x float> *%p @@ -70,7 +70,7 @@ define <8 x i32> @test_broadcast_4i32_8i32(<4 x i32> *%p) nounwind { ; X64-AVX512-LABEL: test_broadcast_4i32_8i32: ; X64-AVX512: ## BB#0: -; X64-AVX512-NEXT: vbroadcasti32x4 {{.*#+}} ymm0 = mem[0,1,2,3,0,1,2,3] +; X64-AVX512-NEXT: vbroadcasti128 {{.*#+}} ymm0 = mem[0,1,0,1] ; X64-AVX512-NEXT: vpaddd {{.*}}(%rip), %ymm0, %ymm0 ; X64-AVX512-NEXT: retq %1 = load <4 x i32>, <4 x i32> *%p @@ -82,7 +82,7 @@ define <16 x i16> @test_broadcast_8i16_16i16(<8 x i16> *%p) nounwind { ; X64-AVX512-LABEL: test_broadcast_8i16_16i16: ; X64-AVX512: ## BB#0: -; X64-AVX512-NEXT: vbroadcasti32x4 {{.*#+}} ymm0 = mem[0,1,2,3,0,1,2,3] +; X64-AVX512-NEXT: vbroadcasti128 {{.*#+}} ymm0 = mem[0,1,0,1] ; X64-AVX512-NEXT: vpaddw {{.*}}(%rip), %ymm0, %ymm0 ; X64-AVX512-NEXT: retq %1 = load <8 x i16>, <8 x i16> *%p @@ -94,7 +94,7 @@ define <32 x i8> @test_broadcast_16i8_32i8(<16 x i8> *%p) nounwind { ; X64-AVX512-LABEL: test_broadcast_16i8_32i8: ; X64-AVX512: ## BB#0: -; X64-AVX512-NEXT: vbroadcasti32x4 {{.*#+}} ymm0 = mem[0,1,2,3,0,1,2,3] +; X64-AVX512-NEXT: vbroadcasti128 {{.*#+}} ymm0 = mem[0,1,0,1] ; X64-AVX512-NEXT: vpaddb {{.*}}(%rip), %ymm0, %ymm0 ; X64-AVX512-NEXT: retq %1 = load <16 x i8>, <16 x i8> *%p @@ -182,7 +182,7 @@ define <32 x i16> @test_broadcast_8i16_32i16(<8 x i16> *%p) nounwind { ; X64-AVX512VL-LABEL: test_broadcast_8i16_32i16: ; X64-AVX512VL: ## BB#0: -; X64-AVX512VL-NEXT: vbroadcasti32x4 {{.*#+}} ymm1 = mem[0,1,2,3,0,1,2,3] +; X64-AVX512VL-NEXT: vbroadcasti128 {{.*#+}} ymm1 = mem[0,1,0,1] ; X64-AVX512VL-NEXT: vpaddw {{.*}}(%rip), %ymm1, %ymm0 ; X64-AVX512VL-NEXT: vpaddw {{.*}}(%rip), %ymm1, %ymm1 ; X64-AVX512VL-NEXT: retq @@ -195,7 +195,7 @@ ; ; X64-AVX512DQVL-LABEL: test_broadcast_8i16_32i16: ; X64-AVX512DQVL: ## BB#0: -; X64-AVX512DQVL-NEXT: vbroadcasti32x4 {{.*#+}} ymm1 = mem[0,1,2,3,0,1,2,3] +; X64-AVX512DQVL-NEXT: vbroadcasti128 {{.*#+}} ymm1 = mem[0,1,0,1] ; X64-AVX512DQVL-NEXT: vpaddw {{.*}}(%rip), %ymm1, %ymm0 ; X64-AVX512DQVL-NEXT: vpaddw {{.*}}(%rip), %ymm1, %ymm1 ; X64-AVX512DQVL-NEXT: retq @@ -208,7 +208,7 @@ define <64 x i8> @test_broadcast_16i8_64i8(<16 x i8> *%p) nounwind { ; X64-AVX512VL-LABEL: test_broadcast_16i8_64i8: ; X64-AVX512VL: ## BB#0: -; X64-AVX512VL-NEXT: vbroadcasti32x4 {{.*#+}} ymm1 = mem[0,1,2,3,0,1,2,3] +; X64-AVX512VL-NEXT: vbroadcasti128 {{.*#+}} ymm1 = mem[0,1,0,1] ; X64-AVX512VL-NEXT: vpaddb {{.*}}(%rip), %ymm1, %ymm0 ; X64-AVX512VL-NEXT: vpaddb {{.*}}(%rip), %ymm1, %ymm1 ; X64-AVX512VL-NEXT: retq @@ -221,7 +221,7 @@ ; ; X64-AVX512DQVL-LABEL: test_broadcast_16i8_64i8: ; X64-AVX512DQVL: ## BB#0: -; X64-AVX512DQVL-NEXT: vbroadcasti32x4 {{.*#+}} ymm1 = mem[0,1,2,3,0,1,2,3] +; X64-AVX512DQVL-NEXT: vbroadcasti128 {{.*#+}} ymm1 = mem[0,1,0,1] ; X64-AVX512DQVL-NEXT: vpaddb {{.*}}(%rip), %ymm1, %ymm0 ; X64-AVX512DQVL-NEXT: vpaddb {{.*}}(%rip), %ymm1, %ymm1 ; X64-AVX512DQVL-NEXT: retq Index: test/CodeGen/X86/avx512dqvl-intrinsics.ll =================================================================== --- test/CodeGen/X86/avx512dqvl-intrinsics.ll +++ test/CodeGen/X86/avx512dqvl-intrinsics.ll @@ -631,8 +631,7 @@ ; CHECK-NEXT: ## ymm1 {%k1} = xmm0[0,1,0,1,0,1,0,1] ; CHECK-NEXT: vbroadcastf32x2 %xmm0, %ymm2 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0xa9,0x19,0xd0] ; CHECK-NEXT: ## ymm2 {%k1} {z} = xmm0[0,1,0,1,0,1,0,1] -; CHECK-NEXT: vbroadcastf32x2 %xmm0, %ymm0 ## encoding: [0x62,0xf2,0x7d,0x28,0x19,0xc0] -; CHECK-NEXT: ## ymm0 = xmm0[0,1,0,1,0,1,0,1] +; CHECK-NEXT: vbroadcastsd %xmm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x19,0xc0] ; CHECK-NEXT: vaddps %ymm2, %ymm1, %ymm1 ## EVEX TO VEX Compression encoding: [0xc5,0xf4,0x58,0xca] ; CHECK-NEXT: vaddps %ymm0, %ymm1, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf4,0x58,0xc0] ; CHECK-NEXT: retq ## encoding: [0xc3] @@ -654,8 +653,7 @@ ; CHECK-NEXT: ## ymm1 {%k1} = mem[0,1,0,1,0,1,0,1] ; CHECK-NEXT: vbroadcasti32x2 %xmm0, %ymm2 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0xa9,0x59,0xd0] ; CHECK-NEXT: ## ymm2 {%k1} {z} = xmm0[0,1,0,1,0,1,0,1] -; CHECK-NEXT: vbroadcasti32x2 %xmm0, %ymm0 ## encoding: [0x62,0xf2,0x7d,0x28,0x59,0xc0] -; CHECK-NEXT: ## ymm0 = xmm0[0,1,0,1,0,1,0,1] +; CHECK-NEXT: vpbroadcastq %xmm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x59,0xc0] ; CHECK-NEXT: vpaddd %ymm0, %ymm2, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xed,0xfe,0xc0] ; CHECK-NEXT: vpaddd %ymm0, %ymm1, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf5,0xfe,0xc0] ; CHECK-NEXT: retq ## encoding: [0xc3] @@ -678,7 +676,7 @@ ; CHECK-NEXT: kmovb %edi, %k1 ## encoding: [0xc5,0xf9,0x92,0xcf] ; CHECK-NEXT: vbroadcasti32x2 %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf2,0x7d,0x09,0x59,0xc8] ; CHECK-NEXT: vbroadcasti32x2 %xmm0, %xmm2 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0x89,0x59,0xd0] -; CHECK-NEXT: vbroadcasti32x2 %xmm0, %xmm0 ## encoding: [0x62,0xf2,0x7d,0x08,0x59,0xc0] +; CHECK-NEXT: vpbroadcastq %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x59,0xc0] ; CHECK-NEXT: vpaddd %xmm2, %xmm1, %xmm1 ## EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfe,0xca] ; CHECK-NEXT: vpaddd %xmm0, %xmm1, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfe,0xc0] ; CHECK-NEXT: retq ## encoding: [0xc3] Index: test/CodeGen/X86/avx512vl-intrinsics-upgrade.ll =================================================================== --- test/CodeGen/X86/avx512vl-intrinsics-upgrade.ll +++ test/CodeGen/X86/avx512vl-intrinsics-upgrade.ll @@ -2106,7 +2106,7 @@ define <4 x i32> @test_mask_andnot_epi32_rr_128(<4 x i32> %a, <4 x i32> %b) { ; CHECK-LABEL: test_mask_andnot_epi32_rr_128: ; CHECK: ## BB#0: -; CHECK-NEXT: vpandnd %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xdf,0xc1] +; CHECK-NEXT: vpandn %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xdf,0xc1] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <4 x i32> @llvm.x86.avx512.mask.pandn.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> zeroinitializer, i8 -1) ret <4 x i32> %res @@ -2136,7 +2136,7 @@ define <4 x i32> @test_mask_andnot_epi32_rm_128(<4 x i32> %a, <4 x i32>* %ptr_b) { ; CHECK-LABEL: test_mask_andnot_epi32_rm_128: ; CHECK: ## BB#0: -; CHECK-NEXT: vpandnd (%rdi), %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xdf,0x07] +; CHECK-NEXT: vpandn (%rdi), %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xdf,0x07] ; CHECK-NEXT: retq ## encoding: [0xc3] %b = load <4 x i32>, <4 x i32>* %ptr_b %res = call <4 x i32> @llvm.x86.avx512.mask.pandn.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> zeroinitializer, i8 -1) @@ -2210,7 +2210,7 @@ define <8 x i32> @test_mask_andnot_epi32_rr_256(<8 x i32> %a, <8 x i32> %b) { ; CHECK-LABEL: test_mask_andnot_epi32_rr_256: ; CHECK: ## BB#0: -; CHECK-NEXT: vpandnd %ymm1, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7d,0x28,0xdf,0xc1] +; CHECK-NEXT: vpandn %ymm1, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0xdf,0xc1] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <8 x i32> @llvm.x86.avx512.mask.pandn.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> zeroinitializer, i8 -1) ret <8 x i32> %res @@ -2240,7 +2240,7 @@ define <8 x i32> @test_mask_andnot_epi32_rm_256(<8 x i32> %a, <8 x i32>* %ptr_b) { ; CHECK-LABEL: test_mask_andnot_epi32_rm_256: ; CHECK: ## BB#0: -; CHECK-NEXT: vpandnd (%rdi), %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7d,0x28,0xdf,0x07] +; CHECK-NEXT: vpandn (%rdi), %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0xdf,0x07] ; CHECK-NEXT: retq ## encoding: [0xc3] %b = load <8 x i32>, <8 x i32>* %ptr_b %res = call <8 x i32> @llvm.x86.avx512.mask.pandn.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> zeroinitializer, i8 -1) @@ -2314,7 +2314,7 @@ define <2 x i64> @test_mask_andnot_epi64_rr_128(<2 x i64> %a, <2 x i64> %b) { ; CHECK-LABEL: test_mask_andnot_epi64_rr_128: ; CHECK: ## BB#0: -; CHECK-NEXT: vpandnq %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0xfd,0x08,0xdf,0xc1] +; CHECK-NEXT: vpandn %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xdf,0xc1] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <2 x i64> @llvm.x86.avx512.mask.pandn.q.128(<2 x i64> %a, <2 x i64> %b, <2 x i64> zeroinitializer, i8 -1) ret <2 x i64> %res @@ -2344,7 +2344,7 @@ define <2 x i64> @test_mask_andnot_epi64_rm_128(<2 x i64> %a, <2 x i64>* %ptr_b) { ; CHECK-LABEL: test_mask_andnot_epi64_rm_128: ; CHECK: ## BB#0: -; CHECK-NEXT: vpandnq (%rdi), %xmm0, %xmm0 ## encoding: [0x62,0xf1,0xfd,0x08,0xdf,0x07] +; CHECK-NEXT: vpandn (%rdi), %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xdf,0x07] ; CHECK-NEXT: retq ## encoding: [0xc3] %b = load <2 x i64>, <2 x i64>* %ptr_b %res = call <2 x i64> @llvm.x86.avx512.mask.pandn.q.128(<2 x i64> %a, <2 x i64> %b, <2 x i64> zeroinitializer, i8 -1) @@ -2418,7 +2418,7 @@ define <4 x i64> @test_mask_andnot_epi64_rr_256(<4 x i64> %a, <4 x i64> %b) { ; CHECK-LABEL: test_mask_andnot_epi64_rr_256: ; CHECK: ## BB#0: -; CHECK-NEXT: vpandnq %ymm1, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0xfd,0x28,0xdf,0xc1] +; CHECK-NEXT: vpandn %ymm1, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0xdf,0xc1] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <4 x i64> @llvm.x86.avx512.mask.pandn.q.256(<4 x i64> %a, <4 x i64> %b, <4 x i64> zeroinitializer, i8 -1) ret <4 x i64> %res @@ -2448,7 +2448,7 @@ define <4 x i64> @test_mask_andnot_epi64_rm_256(<4 x i64> %a, <4 x i64>* %ptr_b) { ; CHECK-LABEL: test_mask_andnot_epi64_rm_256: ; CHECK: ## BB#0: -; CHECK-NEXT: vpandnq (%rdi), %ymm0, %ymm0 ## encoding: [0x62,0xf1,0xfd,0x28,0xdf,0x07] +; CHECK-NEXT: vpandn (%rdi), %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0xdf,0x07] ; CHECK-NEXT: retq ## encoding: [0xc3] %b = load <4 x i64>, <4 x i64>* %ptr_b %res = call <4 x i64> @llvm.x86.avx512.mask.pandn.q.256(<4 x i64> %a, <4 x i64> %b, <4 x i64> zeroinitializer, i8 -1) Index: test/CodeGen/X86/avx512vl-logic.ll =================================================================== --- test/CodeGen/X86/avx512vl-logic.ll +++ test/CodeGen/X86/avx512vl-logic.ll @@ -21,7 +21,7 @@ ; CHECK-LABEL: vpandnd256: ; CHECK: ## BB#0: ## %entry ; CHECK-NEXT: vpaddd {{.*}}(%rip){1to8}, %ymm0, %ymm1 -; CHECK-NEXT: vpandnd %ymm1, %ymm0, %ymm0 +; CHECK-NEXT: vpandn %ymm1, %ymm0, %ymm0 ; CHECK-NEXT: retq entry: ; Force the execution domain with an add. @@ -74,7 +74,7 @@ ; CHECK-LABEL: vpandnq256: ; CHECK: ## BB#0: ## %entry ; CHECK-NEXT: vpaddq {{.*}}(%rip){1to4}, %ymm0, %ymm0 -; CHECK-NEXT: vpandnq %ymm0, %ymm1, %ymm0 +; CHECK-NEXT: vpandn %ymm0, %ymm1, %ymm0 ; CHECK-NEXT: retq entry: ; Force the execution domain with an add. @@ -129,7 +129,7 @@ ; CHECK-LABEL: vpandnd128: ; CHECK: ## BB#0: ## %entry ; CHECK-NEXT: vpaddd {{.*}}(%rip){1to4}, %xmm0, %xmm0 -; CHECK-NEXT: vpandnd %xmm0, %xmm1, %xmm0 +; CHECK-NEXT: vpandn %xmm0, %xmm1, %xmm0 ; CHECK-NEXT: retq entry: ; Force the execution domain with an add. @@ -182,7 +182,7 @@ ; CHECK-LABEL: vpandnq128: ; CHECK: ## BB#0: ## %entry ; CHECK-NEXT: vpaddq {{.*}}(%rip), %xmm0, %xmm0 -; CHECK-NEXT: vpandnq %xmm0, %xmm1, %xmm0 +; CHECK-NEXT: vpandn %xmm0, %xmm1, %xmm0 ; CHECK-NEXT: retq entry: ; Force the execution domain with an add. Index: test/CodeGen/X86/evex-to-vex-compress.mir =================================================================== --- test/CodeGen/X86/evex-to-vex-compress.mir +++ test/CodeGen/X86/evex-to-vex-compress.mir @@ -119,6 +119,14 @@ %ymm0 = VPANDQZ256rm %ymm0, %rip, 1, _, %rax, _ ; CHECK: %ymm0 = VPANDYrr %ymm0, %ymm1 %ymm0 = VPANDQZ256rr %ymm0, %ymm1 + ; CHECK: %ymm0 = VPANDNYrm %ymm0, %rip, 1, _, %rax, _ + %ymm0 = VPANDNDZ256rm %ymm0, %rip, 1, _, %rax, _ + ; CHECK: %ymm0 = VPANDNYrr %ymm0, %ymm1 + %ymm0 = VPANDNDZ256rr %ymm0, %ymm1 + ; CHECK: %ymm0 = VPANDNYrm %ymm0, %rip, 1, _, %rax, _ + %ymm0 = VPANDNQZ256rm %ymm0, %rip, 1, _, %rax, _ + ; CHECK: %ymm0 = VPANDNYrr %ymm0, %ymm1 + %ymm0 = VPANDNQZ256rr %ymm0, %ymm1 ; CHECK: %ymm0 = VPAVGBYrm %ymm0, %rip, 1, _, %rax, _ %ymm0 = VPAVGBZ256rm %ymm0, %rip, 1, _, %rax, _ ; CHECK: %ymm0 = VPAVGBYrr %ymm0, %ymm1 @@ -347,13 +355,13 @@ %ymm0 = VMAXCPSZ256rm %ymm0, %rip, 1, _, %rax, _ ; CHECK: %ymm0 = VMAXCPSYrr %ymm0, %ymm1 %ymm0 = VMAXCPSZ256rr %ymm0, %ymm1 - ; CHECK: %ymm0 = VMAXPDYrm %ymm0, %rip, 1, _, %rax, _ + ; CHECK: %ymm0 = VMAXCPDYrm %ymm0, %rip, 1, _, %rax, _ %ymm0 = VMAXPDZ256rm %ymm0, %rip, 1, _, %rax, _ - ; CHECK: %ymm0 = VMAXPDYrr %ymm0, %ymm1 + ; CHECK: %ymm0 = VMAXCPDYrr %ymm0, %ymm1 %ymm0 = VMAXPDZ256rr %ymm0, %ymm1 - ; CHECK: %ymm0 = VMAXPSYrm %ymm0, %rip, 1, _, %rax, _ + ; CHECK: %ymm0 = VMAXCPSYrm %ymm0, %rip, 1, _, %rax, _ %ymm0 = VMAXPSZ256rm %ymm0, %rip, 1, _, %rax, _ - ; CHECK: %ymm0 = VMAXPSYrr %ymm0, %ymm1 + ; CHECK: %ymm0 = VMAXCPSYrr %ymm0, %ymm1 %ymm0 = VMAXPSZ256rr %ymm0, %ymm1 ; CHECK: %ymm0 = VMINCPDYrm %ymm0, %rip, 1, _, %rax, _ %ymm0 = VMINCPDZ256rm %ymm0, %rip, 1, _, %rax, _ @@ -363,13 +371,13 @@ %ymm0 = VMINCPSZ256rm %ymm0, %rip, 1, _, %rax, _ ; CHECK: %ymm0 = VMINCPSYrr %ymm0, %ymm1 %ymm0 = VMINCPSZ256rr %ymm0, %ymm1 - ; CHECK: %ymm0 = VMINPDYrm %ymm0, %rip, 1, _, %rax, _ + ; CHECK: %ymm0 = VMINCPDYrm %ymm0, %rip, 1, _, %rax, _ %ymm0 = VMINPDZ256rm %ymm0, %rip, 1, _, %rax, _ - ; CHECK: %ymm0 = VMINPDYrr %ymm0, %ymm1 + ; CHECK: %ymm0 = VMINCPDYrr %ymm0, %ymm1 %ymm0 = VMINPDZ256rr %ymm0, %ymm1 - ; CHECK: %ymm0 = VMINPSYrm %ymm0, %rip, 1, _, %rax, _ + ; CHECK: %ymm0 = VMINCPSYrm %ymm0, %rip, 1, _, %rax, _ %ymm0 = VMINPSZ256rm %ymm0, %rip, 1, _, %rax, _ - ; CHECK: %ymm0 = VMINPSYrr %ymm0, %ymm1 + ; CHECK: %ymm0 = VMINCPSYrr %ymm0, %ymm1 %ymm0 = VMINPSZ256rr %ymm0, %ymm1 ; CHECK: %ymm0 = VXORPDYrm %ymm0, %rip, 1, _, %rax, _ %ymm0 = VXORPDZ256rm %ymm0, %rip, 1, _, %rax, _ @@ -687,6 +695,12 @@ %ymm0 = VPMOVZXWQZ256rm %rip, 1, _, %rax, _ ; CHECK: %ymm0 = VPMOVZXWQYrr %xmm0 %ymm0 = VPMOVZXWQZ256rr %xmm0 + ; CHECK: %ymm0 = VBROADCASTF128 %rip, 1, _, %rax, _ + %ymm0 = VBROADCASTF32X4Z256rm %rip, 1, _, %rax, _ + ; CHECK: %ymm0 = VBROADCASTSDYrm %rip, 1, _, %rax, _ + %ymm0 = VBROADCASTF32X2Z256m %rip, 1, _, %rax, _ + ; CHECK: %ymm0 = VBROADCASTSDYrr %xmm0 + %ymm0 = VBROADCASTF32X2Z256r %xmm0 ; CHECK: %ymm0 = VBROADCASTSDYrm %rip, 1, _, %rax, _ %ymm0 = VBROADCASTSDZ256m %rip, 1, _, %rax, _ ; CHECK: %ymm0 = VBROADCASTSDYrr %xmm0 @@ -707,6 +721,12 @@ %ymm0 = VPBROADCASTWZ256m %rip, 1, _, %rax, _ ; CHECK: %ymm0 = VPBROADCASTWYrr %xmm0 %ymm0 = VPBROADCASTWZ256r %xmm0 + ; CHECK: %ymm0 = VBROADCASTI128 %rip, 1, _, %rax, _ + %ymm0 = VBROADCASTI32X4Z256rm %rip, 1, _, %rax, _ + ; CHECK: %ymm0 = VPBROADCASTQYrm %rip, 1, _, %rax, _ + %ymm0 = VBROADCASTI32X2Z256m %rip, 1, _, %rax, _ + ; CHECK: %ymm0 = VPBROADCASTQYrr %xmm0 + %ymm0 = VBROADCASTI32X2Z256r %xmm0 ; CHECK: %ymm0 = VPBROADCASTQYrm %rip, 1, _, %rax, _ %ymm0 = VPBROADCASTQZ256m %rip, 1, _, %rax, _ ; CHECK: %ymm0 = VPBROADCASTQYrr %xmm0 @@ -1039,13 +1059,13 @@ %xmm0 = VMAXCPSZ128rm %xmm0, %rip, 1, _, %rax, _ ; CHECK: %xmm0 = VMAXCPSrr %xmm0, %xmm1 %xmm0 = VMAXCPSZ128rr %xmm0, %xmm1 - ; CHECK: %xmm0 = VMAXPDrm %xmm0, %rip, 1, _, %rax, _ + ; CHECK: %xmm0 = VMAXCPDrm %xmm0, %rip, 1, _, %rax, _ %xmm0 = VMAXPDZ128rm %xmm0, %rip, 1, _, %rax, _ - ; CHECK: %xmm0 = VMAXPDrr %xmm0, %xmm1 + ; CHECK: %xmm0 = VMAXCPDrr %xmm0, %xmm1 %xmm0 = VMAXPDZ128rr %xmm0, %xmm1 - ; CHECK: %xmm0 = VMAXPSrm %xmm0, %rip, 1, _, %rax, _ + ; CHECK: %xmm0 = VMAXCPSrm %xmm0, %rip, 1, _, %rax, _ %xmm0 = VMAXPSZ128rm %xmm0, %rip, 1, _, %rax, _ - ; CHECK: %xmm0 = VMAXPSrr %xmm0, %xmm1 + ; CHECK: %xmm0 = VMAXCPSrr %xmm0, %xmm1 %xmm0 = VMAXPSZ128rr %xmm0, %xmm1 ; CHECK: %xmm0 = VMINCPDrm %xmm0, %rip, 1, _, %rax, _ %xmm0 = VMINCPDZ128rm %xmm0, %rip, 1, _, %rax, _ @@ -1055,13 +1075,13 @@ %xmm0 = VMINCPSZ128rm %xmm0, %rip, 1, _, %rax, _ ; CHECK: %xmm0 = VMINCPSrr %xmm0, %xmm1 %xmm0 = VMINCPSZ128rr %xmm0, %xmm1 - ; CHECK: %xmm0 = VMINPDrm %xmm0, %rip, 1, _, %rax, _ + ; CHECK: %xmm0 = VMINCPDrm %xmm0, %rip, 1, _, %rax, _ %xmm0 = VMINPDZ128rm %xmm0, %rip, 1, _, %rax, _ - ; CHECK: %xmm0 = VMINPDrr %xmm0, %xmm1 + ; CHECK: %xmm0 = VMINCPDrr %xmm0, %xmm1 %xmm0 = VMINPDZ128rr %xmm0, %xmm1 - ; CHECK: %xmm0 = VMINPSrm %xmm0, %rip, 1, _, %rax, _ + ; CHECK: %xmm0 = VMINCPSrm %xmm0, %rip, 1, _, %rax, _ %xmm0 = VMINPSZ128rm %xmm0, %rip, 1, _, %rax, _ - ; CHECK: %xmm0 = VMINPSrr %xmm0, %xmm1 + ; CHECK: %xmm0 = VMINCPSrr %xmm0, %xmm1 %xmm0 = VMINPSZ128rr %xmm0, %xmm1 ; CHECK: %xmm0 = VMULPDrm %xmm0, %rip, 1, _, %rax, _ %xmm0 = VMULPDZ128rm %xmm0, %rip, 1, _, %rax, _ @@ -1119,6 +1139,14 @@ %xmm0 = VPANDQZ128rm %xmm0, %rip, 1, _, %rax, _ ; CHECK: %xmm0 = VPANDrr %xmm0, %xmm1 %xmm0 = VPANDQZ128rr %xmm0, %xmm1 + ; CHECK: %xmm0 = VPANDNrm %xmm0, %rip, 1, _, %rax, _ + %xmm0 = VPANDNDZ128rm %xmm0, %rip, 1, _, %rax, _ + ; CHECK: %xmm0 = VPANDNrr %xmm0, %xmm1 + %xmm0 = VPANDNDZ128rr %xmm0, %xmm1 + ; CHECK: %xmm0 = VPANDNrm %xmm0, %rip, 1, _, %rax, _ + %xmm0 = VPANDNQZ128rm %xmm0, %rip, 1, _, %rax, _ + ; CHECK: %xmm0 = VPANDNrr %xmm0, %xmm1 + %xmm0 = VPANDNQZ128rr %xmm0, %xmm1 ; CHECK: %xmm0 = VPAVGBrm %xmm0, %rip, 1, _, %rax, _ %xmm0 = VPAVGBZ128rm %xmm0, %rip, 1, _, %rax, _ ; CHECK: %xmm0 = VPAVGBrr %xmm0, %xmm1 @@ -1707,6 +1735,10 @@ %xmm0 = VPBROADCASTWZ128m %rip, _, _, _, _ ; CHECK: %xmm0 = VPBROADCASTWrr %xmm0 %xmm0 = VPBROADCASTWZ128r %xmm0 + ; CHECK: %xmm0 = VPBROADCASTQrm %rip, _, _, _, _ + %xmm0 = VBROADCASTI32X2Z128m %rip, _, _, _, _ + ; CHECK: %xmm0 = VPBROADCASTQrr %xmm0 + %xmm0 = VBROADCASTI32X2Z128r %xmm0 ; CHECK: %xmm0 = VCVTPS2PHrr %xmm0, 2 %xmm0 = VCVTPS2PHZ128rr %xmm0, 2 ; CHECK: VCVTPS2PHmr %rdi, %xmm0, 1, _, 0, _, _ @@ -1778,19 +1810,19 @@ %xmm0 = VMAXCSSZrm %xmm0, %rip, 1, _, %rax, _ ; CHECK: %xmm0 = VMAXCSSrr %xmm0, %xmm1 %xmm0 = VMAXCSSZrr %xmm0, %xmm1 - ; CHECK: %xmm0 = VMAXSDrm %xmm0, %rip, 1, _, %rax, _ + ; CHECK: %xmm0 = VMAXCSDrm %xmm0, %rip, 1, _, %rax, _ %xmm0 = VMAXSDZrm %xmm0, %rip, 1, _, %rax, _ ; CHECK: %xmm0 = VMAXSDrm_Int %xmm0, %rip, 1, _, %rax, _ %xmm0 = VMAXSDZrm_Int %xmm0, %rip, 1, _, %rax, _ - ; CHECK: %xmm0 = VMAXSDrr %xmm0, %xmm1 + ; CHECK: %xmm0 = VMAXCSDrr %xmm0, %xmm1 %xmm0 = VMAXSDZrr %xmm0, %xmm1 ; CHECK: %xmm0 = VMAXSDrr_Int %xmm0, %xmm1 %xmm0 = VMAXSDZrr_Int %xmm0, %xmm1 - ; CHECK: %xmm0 = VMAXSSrm %xmm0, %rip, 1, _, %rax, _ + ; CHECK: %xmm0 = VMAXCSSrm %xmm0, %rip, 1, _, %rax, _ %xmm0 = VMAXSSZrm %xmm0, %rip, 1, _, %rax, _ ; CHECK: %xmm0 = VMAXSSrm_Int %xmm0, %rip, 1, _, %rax, _ %xmm0 = VMAXSSZrm_Int %xmm0, %rip, 1, _, %rax, _ - ; CHECK: %xmm0 = VMAXSSrr %xmm0, %xmm1 + ; CHECK: %xmm0 = VMAXCSSrr %xmm0, %xmm1 %xmm0 = VMAXSSZrr %xmm0, %xmm1 ; CHECK: %xmm0 = VMAXSSrr_Int %xmm0, %xmm1 %xmm0 = VMAXSSZrr_Int %xmm0, %xmm1 @@ -1802,19 +1834,19 @@ %xmm0 = VMINCSSZrm %xmm0, %rip, 1, _, %rax, _ ; CHECK: %xmm0 = VMINCSSrr %xmm0, %xmm1 %xmm0 = VMINCSSZrr %xmm0, %xmm1 - ; CHECK: %xmm0 = VMINSDrm %xmm0, %rip, 1, _, %rax, _ + ; CHECK: %xmm0 = VMINCSDrm %xmm0, %rip, 1, _, %rax, _ %xmm0 = VMINSDZrm %xmm0, %rip, 1, _, %rax, _ ; CHECK: %xmm0 = VMINSDrm_Int %xmm0, %rip, 1, _, %rax, _ %xmm0 = VMINSDZrm_Int %xmm0, %rip, 1, _, %rax, _ - ; CHECK: %xmm0 = VMINSDrr %xmm0, %xmm1 + ; CHECK: %xmm0 = VMINCSDrr %xmm0, %xmm1 %xmm0 = VMINSDZrr %xmm0, %xmm1 ; CHECK: %xmm0 = VMINSDrr_Int %xmm0, %xmm1 %xmm0 = VMINSDZrr_Int %xmm0, %xmm1 - ; CHECK: %xmm0 = VMINSSrm %xmm0, %rip, 1, _, %rax, _ + ; CHECK: %xmm0 = VMINCSSrm %xmm0, %rip, 1, _, %rax, _ %xmm0 = VMINSSZrm %xmm0, %rip, 1, _, %rax, _ ; CHECK: %xmm0 = VMINSSrm_Int %xmm0, %rip, 1, _, %rax, _ %xmm0 = VMINSSZrm_Int %xmm0, %rip, 1, _, %rax, _ - ; CHECK: %xmm0 = VMINSSrr %xmm0, %xmm1 + ; CHECK: %xmm0 = VMINCSSrr %xmm0, %xmm1 %xmm0 = VMINSSZrr %xmm0, %xmm1 ; CHECK: %xmm0 = VMINSSrr_Int %xmm0, %xmm1 %xmm0 = VMINSSZrr_Int %xmm0, %xmm1 @@ -2058,6 +2090,8 @@ VPEXTRWZmr %rdi, 1, _, 0, _, %xmm0, 3 ; CHECK: %eax = VPEXTRWri %xmm0, 1 %eax = VPEXTRWZrr %xmm0, 1 + ; CHECK: %eax = VPEXTRWrr_REV %xmm0, 1 + %eax = VPEXTRWZrr_REV %xmm0, 1 ; CHECK: %xmm0 = VPINSRBrm %xmm0, %rsi, 1, _, 0, _, 3 %xmm0 = VPINSRBZrm %xmm0, %rsi, 1, _, 0, _, 3 ; CHECK: %xmm0 = VPINSRBrr %xmm0, %edi, 5 @@ -2090,12 +2124,8 @@ %xmm0 = VSQRTSSZr %xmm0, _ ; CHECK: %xmm0 = VSQRTSSr_Int %xmm0, _ %xmm0 = VSQRTSSZr_Int %xmm0, _ - ; CHECK: %rdi = VCVTSD2SI64rm %rdi, %xmm0, 1, _, 0 - %rdi = VCVTSD2SI64Zrm %rdi, %xmm0, 1, _, 0 ; CHECK: %rdi = VCVTSD2SI64rr %xmm0 %rdi = VCVTSD2SI64Zrr %xmm0 - ; CHECK: %edi = VCVTSD2SIrm %rdi, %xmm0, 1, _, 0 - %edi = VCVTSD2SIZrm %rdi, %xmm0, 1, _, 0 ; CHECK: %edi = VCVTSD2SIrr %xmm0 %edi = VCVTSD2SIZrr %xmm0 ; CHECK: %xmm0 = VCVTSD2SSrm %xmm0, %rdi, 1, _, 0, _ @@ -2118,16 +2148,28 @@ %xmm0 = VCVTSI2SSZrr %xmm0, _ ; CHECK: %xmm0 = Int_VCVTSI2SSrr %xmm0, _ %xmm0 = VCVTSI2SSZrr_Int %xmm0, _ + ; CHECK: %xmm0 = VCVTSI2SD64rm %xmm0, %rdi, 1, _, 0, _ + %xmm0 = VCVTSI642SDZrm %xmm0, %rdi, 1, _, 0, _ + ; CHECK: %xmm0 = Int_VCVTSI2SD64rm %xmm0, %rdi, 1, _, 0, _ + %xmm0 = VCVTSI642SDZrm_Int %xmm0, %rdi, 1, _, 0, _ + ; CHECK: %xmm0 = VCVTSI2SD64rr %xmm0, _ + %xmm0 = VCVTSI642SDZrr %xmm0, _ + ; CHECK: %xmm0 = Int_VCVTSI2SD64rr %xmm0, _ + %xmm0 = VCVTSI642SDZrr_Int %xmm0, _ + ; CHECK: %xmm0 = VCVTSI2SS64rm %xmm0, %rdi, 1, _, 0, _ + %xmm0 = VCVTSI642SSZrm %xmm0, %rdi, 1, _, 0, _ + ; CHECK: %xmm0 = Int_VCVTSI2SS64rm %xmm0, %rdi, 1, _, 0, _ + %xmm0 = VCVTSI642SSZrm_Int %xmm0, %rdi, 1, _, 0, _ + ; CHECK: %xmm0 = VCVTSI2SS64rr %xmm0, _ + %xmm0 = VCVTSI642SSZrr %xmm0, _ + ; CHECK: %xmm0 = Int_VCVTSI2SS64rr %xmm0, _ + %xmm0 = VCVTSI642SSZrr_Int %xmm0, _ ; CHECK: %xmm0 = VCVTSS2SDrm %xmm0, %rdi, 1, _, 0, _ %xmm0 = VCVTSS2SDZrm %xmm0, %rdi, 1, _, 0, _ ; CHECK: %xmm0 = VCVTSS2SDrr %xmm0, _ %xmm0 = VCVTSS2SDZrr %xmm0, _ - ; CHECK: %rdi = VCVTSS2SI64rm %rdi, %xmm0, 1, _, 0 - %rdi = VCVTSS2SI64Zrm %rdi, %xmm0, 1, _, 0 ; CHECK: %rdi = VCVTSS2SI64rr %xmm0 %rdi = VCVTSS2SI64Zrr %xmm0 - ; CHECK: %edi = VCVTSS2SIrm %rdi, %xmm0, 1, _, 0 - %edi = VCVTSS2SIZrm %rdi, %xmm0, 1, _, 0 ; CHECK: %edi = VCVTSS2SIrr %xmm0 %edi = VCVTSS2SIZrr %xmm0 ; CHECK: %rdi = VCVTTSD2SI64rm %rdi, %xmm0, 1, _, 0 @@ -2174,6 +2216,12 @@ %xmm0 = VMOVSDZrm %rip, _, _, _, _ ; CHECK: %xmm0 = VMOVSDrr %xmm0, _ %xmm0 = VMOVSDZrr %xmm0, _ + ; CHECK: %xmm0 = VMOVSDrr_REV %xmm0, _ + %xmm0 = VMOVSDZrr_REV %xmm0, _ + ; CHECK: %rax = VMOVSDto64rr %xmm0 + %rax = VMOVSDto64Zrr %xmm0 + ; CHECK: VMOVSDto64mr %rdi, %xmm0, _, _, _, _ + VMOVSDto64Zmr %rdi, %xmm0, _, _, _, _ ; CHECK: VMOVSSmr %rdi, %xmm0, _, _, _, _ VMOVSSZmr %rdi, %xmm0, _, _, _, _ ; CHECK: %xmm0 = VMOVSSrm %rip, _, _, _, _ @@ -2182,8 +2230,14 @@ %xmm0 = VMOVSSZrr %xmm0, _ ; CHECK: %xmm0 = VMOVSSrr_REV %xmm0, _ %xmm0 = VMOVSSZrr_REV %xmm0, _ + ; CHECK: VMOVSS2DImr %rdi, %xmm0, _, _, _, _ + VMOVSS2DIZmr %rdi, %xmm0, _, _, _, _ + ; CHECK: %eax = VMOVSS2DIrr %xmm0 + %eax = VMOVSS2DIZrr %xmm0 ; CHECK: %xmm0 = VMOV64toPQIrr %rdi %xmm0 = VMOV64toPQIZrr %rdi + ; CHECK: %xmm0 = VMOV64toPQIrm %rdi, _, _, _, _ + %xmm0 = VMOV64toPQIZrm %rdi, _, _, _, _ ; CHECK: %xmm0 = VMOV64toSDrr %rdi %xmm0 = VMOV64toSDZrr %rdi ; CHECK: %xmm0 = VMOVDI2PDIrm %rip, _, _, _, _ @@ -2197,11 +2251,15 @@ ; CHECK: VMOVPDI2DImr %rdi, %xmm0, _, _, _, _ VMOVPDI2DIZmr %rdi, %xmm0, _, _, _, _ ; CHECK: %edi = VMOVPDI2DIrr %xmm0 - %edi = VMOVPDI2DIZrr %xmm0 + %edi = VMOVPDI2DIZrr %xmm0 + ; CHECK: %xmm0 = VMOVPQI2QIrr %xmm0 + %xmm0 = VMOVPQI2QIZrr %xmm0 ; CHECK: VMOVPQI2QImr %rdi, %xmm0, _, _, _, _ VMOVPQI2QIZmr %rdi, %xmm0, _, _, _, _ ; CHECK: %rdi = VMOVPQIto64rr %xmm0 %rdi = VMOVPQIto64Zrr %xmm0 + ; CHECK: VMOVPQIto64mr %rdi, %xmm0, _, _, _, _ + VMOVPQIto64Zmr %rdi, %xmm0, _, _, _, _ ; CHECK: %xmm0 = VMOVQI2PQIrm %rip, _, _, _, _ %xmm0 = VMOVQI2PQIZrm %rip, _, _, _, _ ; CHECK: %xmm0 = VMOVZPQILo2PQIrr %xmm0 @@ -2238,6 +2296,14 @@ VUCOMISSZrm %xmm0, %rdi, _, _, _, _, implicit-def %eflags ; CHECK: VUCOMISSrr %xmm0, %xmm1, implicit-def %eflags VUCOMISSZrr %xmm0, %xmm1, implicit-def %eflags + ; CHECK: VEXTRACTPSmr %rdi, 1, _, 0, _, %xmm0, _ + VEXTRACTPSZmr %rdi, 1, _, 0, _, %xmm0, _ + ; CHECK: %eax = VEXTRACTPSrr %xmm0, _ + %eax = VEXTRACTPSZrr %xmm0, _ + ; CHECK: %xmm0 = VINSERTPSrm %xmm0, %rdi, _, _, _, _, _ + %xmm0 = VINSERTPSZrm %xmm0, %rdi, _, _, _, _, _ + ; CHECK: %xmm0 = VINSERTPSrr %xmm0, %xmm0, _ + %xmm0 = VINSERTPSZrr %xmm0, %xmm0, _ RET 0, %zmm0, %zmm1 ... @@ -2350,6 +2416,14 @@ %ymm16 = VPANDQZ256rm %ymm16, %rip, 1, _, %rax, _ ; CHECK: %ymm16 = VPANDQZ256rr %ymm16, %ymm1 %ymm16 = VPANDQZ256rr %ymm16, %ymm1 + ; CHECK: %ymm16 = VPANDNDZ256rm %ymm16, %rip, 1, _, %rax, _ + %ymm16 = VPANDNDZ256rm %ymm16, %rip, 1, _, %rax, _ + ; CHECK: %ymm16 = VPANDNDZ256rr %ymm16, %ymm1 + %ymm16 = VPANDNDZ256rr %ymm16, %ymm1 + ; CHECK: %ymm16 = VPANDNQZ256rm %ymm16, %rip, 1, _, %rax, _ + %ymm16 = VPANDNQZ256rm %ymm16, %rip, 1, _, %rax, _ + ; CHECK: %ymm16 = VPANDNQZ256rr %ymm16, %ymm1 + %ymm16 = VPANDNQZ256rr %ymm16, %ymm1 ; CHECK: %ymm16 = VPAVGBZ256rm %ymm16, %rip, 1, _, %rax, _ %ymm16 = VPAVGBZ256rm %ymm16, %rip, 1, _, %rax, _ ; CHECK: %ymm16 = VPAVGBZ256rr %ymm16, %ymm1 @@ -2918,6 +2992,12 @@ %ymm16 = VPMOVZXWQZ256rm %rip, 1, _, %rax, _ ; CHECK: %ymm16 = VPMOVZXWQZ256rr %xmm0 %ymm16 = VPMOVZXWQZ256rr %xmm0 + ; CHECK: %ymm16 = VBROADCASTF32X2Z256m %rip, 1, _, %rax, _ + %ymm16 = VBROADCASTF32X2Z256m %rip, 1, _, %rax, _ + ; CHECK: %ymm16 = VBROADCASTF32X2Z256r %xmm16 + %ymm16 = VBROADCASTF32X2Z256r %xmm16 + ; CHECK: %ymm16 = VBROADCASTF32X4Z256rm %rip, 1, _, %rax, _ + %ymm16 = VBROADCASTF32X4Z256rm %rip, 1, _, %rax, _ ; CHECK: %ymm16 = VBROADCASTSDZ256m %rip, 1, _, %rax, _ %ymm16 = VBROADCASTSDZ256m %rip, 1, _, %rax, _ ; CHECK: %ymm16 = VBROADCASTSDZ256r %xmm0 @@ -2938,6 +3018,12 @@ %ymm16 = VPBROADCASTWZ256m %rip, 1, _, %rax, _ ; CHECK: %ymm16 = VPBROADCASTWZ256r %xmm0 %ymm16 = VPBROADCASTWZ256r %xmm0 + ; CHECK: %ymm16 = VBROADCASTI32X4Z256rm %rip, 1, _, %rax, _ + %ymm16 = VBROADCASTI32X4Z256rm %rip, 1, _, %rax, _ + ; CHECK: %ymm16 = VBROADCASTI32X2Z256m %rip, 1, _, %rax, _ + %ymm16 = VBROADCASTI32X2Z256m %rip, 1, _, %rax, _ + ; CHECK: %ymm16 = VBROADCASTI32X2Z256r %xmm16 + %ymm16 = VBROADCASTI32X2Z256r %xmm16 ; CHECK: %ymm16 = VPBROADCASTQZ256m %rip, 1, _, %rax, _ %ymm16 = VPBROADCASTQZ256m %rip, 1, _, %rax, _ ; CHECK: %ymm16 = VPBROADCASTQZ256r %xmm0 @@ -3350,6 +3436,14 @@ %xmm16 = VPANDQZ128rm %xmm16, %rip, 1, _, %rax, _ ; CHECK: %xmm16 = VPANDQZ128rr %xmm16, %xmm1 %xmm16 = VPANDQZ128rr %xmm16, %xmm1 + ; CHECK: %xmm16 = VPANDNDZ128rm %xmm16, %rip, 1, _, %rax, _ + %xmm16 = VPANDNDZ128rm %xmm16, %rip, 1, _, %rax, _ + ; CHECK: %xmm16 = VPANDNDZ128rr %xmm16, %xmm1 + %xmm16 = VPANDNDZ128rr %xmm16, %xmm1 + ; CHECK: %xmm16 = VPANDNQZ128rm %xmm16, %rip, 1, _, %rax, _ + %xmm16 = VPANDNQZ128rm %xmm16, %rip, 1, _, %rax, _ + ; CHECK: %xmm16 = VPANDNQZ128rr %xmm16, %xmm1 + %xmm16 = VPANDNQZ128rr %xmm16, %xmm1 ; CHECK: %xmm16 = VPAVGBZ128rm %xmm16, %rip, 1, _, %rax, _ %xmm16 = VPAVGBZ128rm %xmm16, %rip, 1, _, %rax, _ ; CHECK: %xmm16 = VPAVGBZ128rr %xmm16, %xmm1 @@ -3938,6 +4032,10 @@ %xmm16 = VPBROADCASTWZ128m %rip, _, _, _, _ ; CHECK: %xmm16 = VPBROADCASTWZ128r %xmm16 %xmm16 = VPBROADCASTWZ128r %xmm16 + ; CHECK: %xmm16 = VBROADCASTI32X2Z128m %rip, _, _, _, _ + %xmm16 = VBROADCASTI32X2Z128m %rip, _, _, _, _ + ; CHECK: %xmm16 = VBROADCASTI32X2Z128r %xmm0 + %xmm16 = VBROADCASTI32X2Z128r %xmm0 ; CHECK: %xmm16 = VCVTPS2PHZ128rr %xmm16, 2 %xmm16 = VCVTPS2PHZ128rr %xmm16, 2 ; CHECK: VCVTPS2PHZ128mr %rdi, %xmm16, 1, _, 0, _, _ @@ -3958,6 +4056,14 @@ %xmm16 = VPALIGNRZ128rmi %xmm16, _, _, _, _, _, _ ; CHECK: %xmm16 = VPALIGNRZ128rri %xmm16, %xmm1, 15 %xmm16 = VPALIGNRZ128rri %xmm16, %xmm1, 15 + ; CHECK: VEXTRACTPSZmr %rdi, 1, _, 0, _, %xmm16, _ + VEXTRACTPSZmr %rdi, 1, _, 0, _, %xmm16, _ + ; CHECK: %eax = VEXTRACTPSZrr %xmm16, _ + %eax = VEXTRACTPSZrr %xmm16, _ + ; CHECK: %xmm16 = VINSERTPSZrm %xmm16, %rdi, _, _, _, _, _ + %xmm16 = VINSERTPSZrm %xmm16, %rdi, _, _, _, _, _ + ; CHECK: %xmm16 = VINSERTPSZrr %xmm16, %xmm16, _ + %xmm16 = VINSERTPSZrr %xmm16, %xmm16, _ RET 0, %zmm0, %zmm1 ... @@ -4288,6 +4394,8 @@ VPEXTRWZmr %rdi, 1, _, 0, _, %xmm16, 3 ; CHECK: %eax = VPEXTRWZrr %xmm16, 1 %eax = VPEXTRWZrr %xmm16, 1 + ; CHECK: %eax = VPEXTRWZrr_REV %xmm16, 1 + %eax = VPEXTRWZrr_REV %xmm16, 1 ; CHECK: %xmm16 = VPINSRBZrm %xmm16, %rsi, 1, _, 0, _, 3 %xmm16 = VPINSRBZrm %xmm16, %rsi, 1, _, 0, _, 3 ; CHECK: %xmm16 = VPINSRBZrr %xmm16, %edi, 5 @@ -4348,6 +4456,22 @@ %xmm16 = VCVTSI2SSZrr %xmm16, _ ; CHECK: %xmm16 = VCVTSI2SSZrr_Int %xmm16, _ %xmm16 = VCVTSI2SSZrr_Int %xmm16, _ + ; CHECK: %xmm16 = VCVTSI642SDZrm %xmm16, %rdi, 1, _, 0, _ + %xmm16 = VCVTSI642SDZrm %xmm16, %rdi, 1, _, 0, _ + ; CHECK: %xmm16 = VCVTSI642SDZrm_Int %xmm16, %rdi, 1, _, 0, _ + %xmm16 = VCVTSI642SDZrm_Int %xmm16, %rdi, 1, _, 0, _ + ; CHECK: %xmm16 = VCVTSI642SDZrr %xmm16, _ + %xmm16 = VCVTSI642SDZrr %xmm16, _ + ; CHECK: %xmm16 = VCVTSI642SDZrr_Int %xmm16, _ + %xmm16 = VCVTSI642SDZrr_Int %xmm16, _ + ; CHECK: %xmm16 = VCVTSI642SSZrm %xmm16, %rdi, 1, _, 0, _ + %xmm16 = VCVTSI642SSZrm %xmm16, %rdi, 1, _, 0, _ + ; CHECK: %xmm16 = VCVTSI642SSZrm_Int %xmm16, %rdi, 1, _, 0, _ + %xmm16 = VCVTSI642SSZrm_Int %xmm16, %rdi, 1, _, 0, _ + ; CHECK: %xmm16 = VCVTSI642SSZrr %xmm16, _ + %xmm16 = VCVTSI642SSZrr %xmm16, _ + ; CHECK: %xmm16 = VCVTSI642SSZrr_Int %xmm16, _ + %xmm16 = VCVTSI642SSZrr_Int %xmm16, _ ; CHECK: %xmm16 = VCVTSS2SDZrm %xmm16, %rdi, 1, _, 0, _ %xmm16 = VCVTSS2SDZrm %xmm16, %rdi, 1, _, 0, _ ; CHECK: %xmm16 = VCVTSS2SDZrr %xmm16, _ @@ -4404,6 +4528,12 @@ %xmm16 = VMOVSDZrm %rip, _, _, _, _ ; CHECK: %xmm16 = VMOVSDZrr %xmm16, _ %xmm16 = VMOVSDZrr %xmm16, _ + ; CHECK: %xmm16 = VMOVSDZrr_REV %xmm16, _ + %xmm16 = VMOVSDZrr_REV %xmm16, _ + ; CHECK: %rax = VMOVSDto64Zrr %xmm16 + %rax = VMOVSDto64Zrr %xmm16 + ; CHECK: VMOVSDto64Zmr %rdi, %xmm16, _, _, _, _ + VMOVSDto64Zmr %rdi, %xmm16, _, _, _, _ ; CHECK: VMOVSSZmr %rdi, %xmm16, _, _, _, _ VMOVSSZmr %rdi, %xmm16, _, _, _, _ ; CHECK: %xmm16 = VMOVSSZrm %rip, _, _, _, _ @@ -4412,8 +4542,14 @@ %xmm16 = VMOVSSZrr %xmm16, _ ; CHECK: %xmm16 = VMOVSSZrr_REV %xmm16, _ %xmm16 = VMOVSSZrr_REV %xmm16, _ + ; CHECK: VMOVSS2DIZmr %rdi, %xmm16, _, _, _, _ + VMOVSS2DIZmr %rdi, %xmm16, _, _, _, _ + ; CHECK: %eax = VMOVSS2DIZrr %xmm16 + %eax = VMOVSS2DIZrr %xmm16 ; CHECK: %xmm16 = VMOV64toPQIZrr %rdi %xmm16 = VMOV64toPQIZrr %rdi + ; CHECK: %xmm16 = VMOV64toPQIZrm %rdi, _, _, _, _ + %xmm16 = VMOV64toPQIZrm %rdi, _, _, _, _ ; CHECK: %xmm16 = VMOV64toSDZrr %rdi %xmm16 = VMOV64toSDZrr %rdi ; CHECK: %xmm16 = VMOVDI2PDIZrm %rip, _, _, _, _ @@ -4428,10 +4564,14 @@ VMOVPDI2DIZmr %rdi, %xmm16, _, _, _, _ ; CHECK: %edi = VMOVPDI2DIZrr %xmm16 %edi = VMOVPDI2DIZrr %xmm16 + ; CHECK: %xmm16 = VMOVPQI2QIZrr %xmm16 + %xmm16 = VMOVPQI2QIZrr %xmm16 ; CHECK: VMOVPQI2QIZmr %rdi, %xmm16, _, _, _, _ VMOVPQI2QIZmr %rdi, %xmm16, _, _, _, _ ; CHECK: %rdi = VMOVPQIto64Zrr %xmm16 %rdi = VMOVPQIto64Zrr %xmm16 + ; CHECK: VMOVPQIto64Zmr %rdi, %xmm16, _, _, _, _ + VMOVPQIto64Zmr %rdi, %xmm16, _, _, _, _ ; CHECK: %xmm16 = VMOVQI2PQIZrm %rip, _, _, _, _ %xmm16 = VMOVQI2PQIZrm %rip, _, _, _, _ ; CHECK: %xmm16 = VMOVZPQILo2PQIZrr %xmm16 Index: test/CodeGen/X86/sse-intrinsics-x86_64.ll =================================================================== --- test/CodeGen/X86/sse-intrinsics-x86_64.ll +++ test/CodeGen/X86/sse-intrinsics-x86_64.ll @@ -45,7 +45,7 @@ ; ; SKX-LABEL: test_x86_sse_cvtsi642ss: ; SKX: ## BB#0: -; SKX-NEXT: vcvtsi2ssq %rdi, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0xfe,0x08,0x2a,0xc7] +; SKX-NEXT: vcvtsi2ssq %rdi, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe1,0xfa,0x2a,0xc7] ; SKX-NEXT: retq ## encoding: [0xc3] %res = call <4 x float> @llvm.x86.sse.cvtsi642ss(<4 x float> %a0, i64 %a1) ; <<4 x float>> [#uses=1] ret <4 x float> %res Index: test/CodeGen/X86/sse2-intrinsics-x86_64.ll =================================================================== --- test/CodeGen/X86/sse2-intrinsics-x86_64.ll +++ test/CodeGen/X86/sse2-intrinsics-x86_64.ll @@ -45,7 +45,7 @@ ; ; SKX-LABEL: test_x86_sse2_cvtsi642sd: ; SKX: ## BB#0: -; SKX-NEXT: vcvtsi2sdq %rdi, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0xff,0x08,0x2a,0xc7] +; SKX-NEXT: vcvtsi2sdq %rdi, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe1,0xfb,0x2a,0xc7] ; SKX-NEXT: retq ## encoding: [0xc3] %res = call <2 x double> @llvm.x86.sse2.cvtsi642sd(<2 x double> %a0, i64 %a1) ; <<2 x double>> [#uses=1] ret <2 x double> %res Index: test/CodeGen/X86/sse41-intrinsics-x86.ll =================================================================== --- test/CodeGen/X86/sse41-intrinsics-x86.ll +++ test/CodeGen/X86/sse41-intrinsics-x86.ll @@ -88,7 +88,7 @@ ; ; SKX-LABEL: test_x86_sse41_insertps: ; SKX: ## BB#0: -; SKX-NEXT: vinsertps $17, %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf3,0x7d,0x08,0x21,0xc1,0x11] +; SKX-NEXT: vinsertps $17, %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x21,0xc1,0x11] ; SKX-NEXT: ## xmm0 = zero,xmm1[0],xmm0[2,3] ; SKX-NEXT: retl ## encoding: [0xc3] %res = call <4 x float> @llvm.x86.sse41.insertps(<4 x float> %a0, <4 x float> %a1, i8 17) ; <<4 x float>> [#uses=1] Index: test/CodeGen/X86/subvector-broadcast.ll =================================================================== --- test/CodeGen/X86/subvector-broadcast.ll +++ test/CodeGen/X86/subvector-broadcast.ll @@ -24,13 +24,13 @@ ; X32-AVX512F-LABEL: test_broadcast_2f64_4f64: ; X32-AVX512F: ## BB#0: ; X32-AVX512F-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-AVX512F-NEXT: vbroadcastf32x4 {{.*#+}} ymm0 = mem[0,1,2,3,0,1,2,3] +; X32-AVX512F-NEXT: vbroadcastf128 {{.*#+}} ymm0 = mem[0,1,0,1] ; X32-AVX512F-NEXT: retl ; ; X32-AVX512BW-LABEL: test_broadcast_2f64_4f64: ; X32-AVX512BW: ## BB#0: ; X32-AVX512BW-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-AVX512BW-NEXT: vbroadcastf32x4 {{.*#+}} ymm0 = mem[0,1,2,3,0,1,2,3] +; X32-AVX512BW-NEXT: vbroadcastf128 {{.*#+}} ymm0 = mem[0,1,0,1] ; X32-AVX512BW-NEXT: retl ; ; X32-AVX512DQ-LABEL: test_broadcast_2f64_4f64: @@ -46,12 +46,12 @@ ; ; X64-AVX512F-LABEL: test_broadcast_2f64_4f64: ; X64-AVX512F: ## BB#0: -; X64-AVX512F-NEXT: vbroadcastf32x4 {{.*#+}} ymm0 = mem[0,1,2,3,0,1,2,3] +; X64-AVX512F-NEXT: vbroadcastf128 {{.*#+}} ymm0 = mem[0,1,0,1] ; X64-AVX512F-NEXT: retq ; ; X64-AVX512BW-LABEL: test_broadcast_2f64_4f64: ; X64-AVX512BW: ## BB#0: -; X64-AVX512BW-NEXT: vbroadcastf32x4 {{.*#+}} ymm0 = mem[0,1,2,3,0,1,2,3] +; X64-AVX512BW-NEXT: vbroadcastf128 {{.*#+}} ymm0 = mem[0,1,0,1] ; X64-AVX512BW-NEXT: retq ; ; X64-AVX512DQ-LABEL: test_broadcast_2f64_4f64: @@ -153,13 +153,13 @@ ; X32-AVX512F-LABEL: test_broadcast_2i64_4i64: ; X32-AVX512F: ## BB#0: ; X32-AVX512F-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-AVX512F-NEXT: vbroadcasti32x4 {{.*#+}} ymm0 = mem[0,1,2,3,0,1,2,3] +; X32-AVX512F-NEXT: vbroadcasti128 {{.*#+}} ymm0 = mem[0,1,0,1] ; X32-AVX512F-NEXT: retl ; ; X32-AVX512BW-LABEL: test_broadcast_2i64_4i64: ; X32-AVX512BW: ## BB#0: ; X32-AVX512BW-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-AVX512BW-NEXT: vbroadcasti32x4 {{.*#+}} ymm0 = mem[0,1,2,3,0,1,2,3] +; X32-AVX512BW-NEXT: vbroadcasti128 {{.*#+}} ymm0 = mem[0,1,0,1] ; X32-AVX512BW-NEXT: retl ; ; X32-AVX512DQ-LABEL: test_broadcast_2i64_4i64: @@ -175,12 +175,12 @@ ; ; X64-AVX512F-LABEL: test_broadcast_2i64_4i64: ; X64-AVX512F: ## BB#0: -; X64-AVX512F-NEXT: vbroadcasti32x4 {{.*#+}} ymm0 = mem[0,1,2,3,0,1,2,3] +; X64-AVX512F-NEXT: vbroadcasti128 {{.*#+}} ymm0 = mem[0,1,0,1] ; X64-AVX512F-NEXT: retq ; ; X64-AVX512BW-LABEL: test_broadcast_2i64_4i64: ; X64-AVX512BW: ## BB#0: -; X64-AVX512BW-NEXT: vbroadcasti32x4 {{.*#+}} ymm0 = mem[0,1,2,3,0,1,2,3] +; X64-AVX512BW-NEXT: vbroadcasti128 {{.*#+}} ymm0 = mem[0,1,0,1] ; X64-AVX512BW-NEXT: retq ; ; X64-AVX512DQ-LABEL: test_broadcast_2i64_4i64: @@ -286,27 +286,16 @@ } define <8 x float> @test_broadcast_4f32_8f32(<4 x float> *%p) nounwind { -; X32-AVX-LABEL: test_broadcast_4f32_8f32: -; X32-AVX: ## BB#0: -; X32-AVX-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-AVX-NEXT: vbroadcastf128 {{.*#+}} ymm0 = mem[0,1,0,1] -; X32-AVX-NEXT: retl -; -; X32-AVX512-LABEL: test_broadcast_4f32_8f32: -; X32-AVX512: ## BB#0: -; X32-AVX512-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-AVX512-NEXT: vbroadcastf32x4 {{.*#+}} ymm0 = mem[0,1,2,3,0,1,2,3] -; X32-AVX512-NEXT: retl -; -; X64-AVX-LABEL: test_broadcast_4f32_8f32: -; X64-AVX: ## BB#0: -; X64-AVX-NEXT: vbroadcastf128 {{.*#+}} ymm0 = mem[0,1,0,1] -; X64-AVX-NEXT: retq +; X32-LABEL: test_broadcast_4f32_8f32: +; X32: ## BB#0: +; X32-NEXT: movl {{[0-9]+}}(%esp), %eax +; X32-NEXT: vbroadcastf128 {{.*#+}} ymm0 = mem[0,1,0,1] +; X32-NEXT: retl ; -; X64-AVX512-LABEL: test_broadcast_4f32_8f32: -; X64-AVX512: ## BB#0: -; X64-AVX512-NEXT: vbroadcastf32x4 {{.*#+}} ymm0 = mem[0,1,2,3,0,1,2,3] -; X64-AVX512-NEXT: retq +; X64-LABEL: test_broadcast_4f32_8f32: +; X64: ## BB#0: +; X64-NEXT: vbroadcastf128 {{.*#+}} ymm0 = mem[0,1,0,1] +; X64-NEXT: retq %1 = load <4 x float>, <4 x float> *%p %2 = shufflevector <4 x float> %1, <4 x float> undef, <8 x i32> ret <8 x float> %2 @@ -402,7 +391,7 @@ ; X32-AVX512-LABEL: test_broadcast_4i32_8i32: ; X32-AVX512: ## BB#0: ; X32-AVX512-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-AVX512-NEXT: vbroadcasti32x4 {{.*#+}} ymm0 = mem[0,1,2,3,0,1,2,3] +; X32-AVX512-NEXT: vbroadcasti128 {{.*#+}} ymm0 = mem[0,1,0,1] ; X32-AVX512-NEXT: retl ; ; X64-AVX-LABEL: test_broadcast_4i32_8i32: @@ -412,7 +401,7 @@ ; ; X64-AVX512-LABEL: test_broadcast_4i32_8i32: ; X64-AVX512: ## BB#0: -; X64-AVX512-NEXT: vbroadcasti32x4 {{.*#+}} ymm0 = mem[0,1,2,3,0,1,2,3] +; X64-AVX512-NEXT: vbroadcasti128 {{.*#+}} ymm0 = mem[0,1,0,1] ; X64-AVX512-NEXT: retq %1 = load <4 x i32>, <4 x i32> *%p %2 = shufflevector <4 x i32> %1, <4 x i32> undef, <8 x i32> @@ -522,7 +511,7 @@ ; X32-AVX512-LABEL: test_broadcast_8i16_16i16: ; X32-AVX512: ## BB#0: ; X32-AVX512-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-AVX512-NEXT: vbroadcasti32x4 {{.*#+}} ymm0 = mem[0,1,2,3,0,1,2,3] +; X32-AVX512-NEXT: vbroadcasti128 {{.*#+}} ymm0 = mem[0,1,0,1] ; X32-AVX512-NEXT: retl ; ; X64-AVX-LABEL: test_broadcast_8i16_16i16: @@ -532,7 +521,7 @@ ; ; X64-AVX512-LABEL: test_broadcast_8i16_16i16: ; X64-AVX512: ## BB#0: -; X64-AVX512-NEXT: vbroadcasti32x4 {{.*#+}} ymm0 = mem[0,1,2,3,0,1,2,3] +; X64-AVX512-NEXT: vbroadcasti128 {{.*#+}} ymm0 = mem[0,1,0,1] ; X64-AVX512-NEXT: retq %1 = load <8 x i16>, <8 x i16> *%p %2 = shufflevector <8 x i16> %1, <8 x i16> undef, <16 x i32> @@ -557,7 +546,7 @@ ; X32-AVX512F-LABEL: test_broadcast_8i16_32i16: ; X32-AVX512F: ## BB#0: ; X32-AVX512F-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-AVX512F-NEXT: vbroadcasti32x4 {{.*#+}} ymm0 = mem[0,1,2,3,0,1,2,3] +; X32-AVX512F-NEXT: vbroadcasti128 {{.*#+}} ymm0 = mem[0,1,0,1] ; X32-AVX512F-NEXT: vmovdqa %ymm0, %ymm1 ; X32-AVX512F-NEXT: retl ; @@ -570,7 +559,7 @@ ; X32-AVX512DQ-LABEL: test_broadcast_8i16_32i16: ; X32-AVX512DQ: ## BB#0: ; X32-AVX512DQ-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-AVX512DQ-NEXT: vbroadcasti32x4 {{.*#+}} ymm0 = mem[0,1,2,3,0,1,2,3] +; X32-AVX512DQ-NEXT: vbroadcasti128 {{.*#+}} ymm0 = mem[0,1,0,1] ; X32-AVX512DQ-NEXT: vmovdqa %ymm0, %ymm1 ; X32-AVX512DQ-NEXT: retl ; @@ -588,7 +577,7 @@ ; ; X64-AVX512F-LABEL: test_broadcast_8i16_32i16: ; X64-AVX512F: ## BB#0: -; X64-AVX512F-NEXT: vbroadcasti32x4 {{.*#+}} ymm0 = mem[0,1,2,3,0,1,2,3] +; X64-AVX512F-NEXT: vbroadcasti128 {{.*#+}} ymm0 = mem[0,1,0,1] ; X64-AVX512F-NEXT: vmovdqa %ymm0, %ymm1 ; X64-AVX512F-NEXT: retq ; @@ -599,7 +588,7 @@ ; ; X64-AVX512DQ-LABEL: test_broadcast_8i16_32i16: ; X64-AVX512DQ: ## BB#0: -; X64-AVX512DQ-NEXT: vbroadcasti32x4 {{.*#+}} ymm0 = mem[0,1,2,3,0,1,2,3] +; X64-AVX512DQ-NEXT: vbroadcasti128 {{.*#+}} ymm0 = mem[0,1,0,1] ; X64-AVX512DQ-NEXT: vmovdqa %ymm0, %ymm1 ; X64-AVX512DQ-NEXT: retq %1 = load <8 x i16>, <8 x i16> *%p @@ -672,7 +661,7 @@ ; X32-AVX512-LABEL: test_broadcast_16i8_32i8: ; X32-AVX512: ## BB#0: ; X32-AVX512-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-AVX512-NEXT: vbroadcasti32x4 {{.*#+}} ymm0 = mem[0,1,2,3,0,1,2,3] +; X32-AVX512-NEXT: vbroadcasti128 {{.*#+}} ymm0 = mem[0,1,0,1] ; X32-AVX512-NEXT: retl ; ; X64-AVX-LABEL: test_broadcast_16i8_32i8: @@ -682,7 +671,7 @@ ; ; X64-AVX512-LABEL: test_broadcast_16i8_32i8: ; X64-AVX512: ## BB#0: -; X64-AVX512-NEXT: vbroadcasti32x4 {{.*#+}} ymm0 = mem[0,1,2,3,0,1,2,3] +; X64-AVX512-NEXT: vbroadcasti128 {{.*#+}} ymm0 = mem[0,1,0,1] ; X64-AVX512-NEXT: retq %1 = load <16 x i8>, <16 x i8> *%p %2 = shufflevector <16 x i8> %1, <16 x i8> undef, <32 x i32> @@ -707,7 +696,7 @@ ; X32-AVX512F-LABEL: test_broadcast_16i8_64i8: ; X32-AVX512F: ## BB#0: ; X32-AVX512F-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-AVX512F-NEXT: vbroadcasti32x4 {{.*#+}} ymm0 = mem[0,1,2,3,0,1,2,3] +; X32-AVX512F-NEXT: vbroadcasti128 {{.*#+}} ymm0 = mem[0,1,0,1] ; X32-AVX512F-NEXT: vmovdqa %ymm0, %ymm1 ; X32-AVX512F-NEXT: retl ; @@ -720,7 +709,7 @@ ; X32-AVX512DQ-LABEL: test_broadcast_16i8_64i8: ; X32-AVX512DQ: ## BB#0: ; X32-AVX512DQ-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-AVX512DQ-NEXT: vbroadcasti32x4 {{.*#+}} ymm0 = mem[0,1,2,3,0,1,2,3] +; X32-AVX512DQ-NEXT: vbroadcasti128 {{.*#+}} ymm0 = mem[0,1,0,1] ; X32-AVX512DQ-NEXT: vmovdqa %ymm0, %ymm1 ; X32-AVX512DQ-NEXT: retl ; @@ -738,7 +727,7 @@ ; ; X64-AVX512F-LABEL: test_broadcast_16i8_64i8: ; X64-AVX512F: ## BB#0: -; X64-AVX512F-NEXT: vbroadcasti32x4 {{.*#+}} ymm0 = mem[0,1,2,3,0,1,2,3] +; X64-AVX512F-NEXT: vbroadcasti128 {{.*#+}} ymm0 = mem[0,1,0,1] ; X64-AVX512F-NEXT: vmovdqa %ymm0, %ymm1 ; X64-AVX512F-NEXT: retq ; @@ -749,7 +738,7 @@ ; ; X64-AVX512DQ-LABEL: test_broadcast_16i8_64i8: ; X64-AVX512DQ: ## BB#0: -; X64-AVX512DQ-NEXT: vbroadcasti32x4 {{.*#+}} ymm0 = mem[0,1,2,3,0,1,2,3] +; X64-AVX512DQ-NEXT: vbroadcasti128 {{.*#+}} ymm0 = mem[0,1,0,1] ; X64-AVX512DQ-NEXT: vmovdqa %ymm0, %ymm1 ; X64-AVX512DQ-NEXT: retq %1 = load <16 x i8>, <16 x i8> *%p Index: test/CodeGen/X86/vector-shuffle-256-v4.ll =================================================================== --- test/CodeGen/X86/vector-shuffle-256-v4.ll +++ test/CodeGen/X86/vector-shuffle-256-v4.ll @@ -1376,7 +1376,7 @@ ; ; AVX512VL-LABEL: splat128_mem_v4i64_from_v2i64: ; AVX512VL: # BB#0: -; AVX512VL-NEXT: vbroadcasti32x4 {{.*#+}} ymm0 = mem[0,1,2,3,0,1,2,3] +; AVX512VL-NEXT: vbroadcasti128 {{.*#+}} ymm0 = mem[0,1,0,1] ; AVX512VL-NEXT: retq %v = load <2 x i64>, <2 x i64>* %ptr %shuffle = shufflevector <2 x i64> %v, <2 x i64> undef, <4 x i32> @@ -1384,20 +1384,10 @@ } define <4 x double> @splat128_mem_v4f64_from_v2f64(<2 x double>* %ptr) { -; AVX1-LABEL: splat128_mem_v4f64_from_v2f64: -; AVX1: # BB#0: -; AVX1-NEXT: vbroadcastf128 {{.*#+}} ymm0 = mem[0,1,0,1] -; AVX1-NEXT: retq -; -; AVX2-LABEL: splat128_mem_v4f64_from_v2f64: -; AVX2: # BB#0: -; AVX2-NEXT: vbroadcastf128 {{.*#+}} ymm0 = mem[0,1,0,1] -; AVX2-NEXT: retq -; -; AVX512VL-LABEL: splat128_mem_v4f64_from_v2f64: -; AVX512VL: # BB#0: -; AVX512VL-NEXT: vbroadcastf32x4 {{.*#+}} ymm0 = mem[0,1,2,3,0,1,2,3] -; AVX512VL-NEXT: retq +; ALL-LABEL: splat128_mem_v4f64_from_v2f64: +; ALL: # BB#0: +; ALL-NEXT: vbroadcastf128 {{.*#+}} ymm0 = mem[0,1,0,1] +; ALL-NEXT: retq %v = load <2 x double>, <2 x double>* %ptr %shuffle = shufflevector <2 x double> %v, <2 x double> undef, <4 x i32> ret <4 x double> %shuffle Index: test/CodeGen/X86/vselect-pcmp.ll =================================================================== --- test/CodeGen/X86/vselect-pcmp.ll +++ test/CodeGen/X86/vselect-pcmp.ll @@ -31,23 +31,14 @@ ; Sorry 16-bit, you're not important enough to support? define <8 x i16> @signbit_sel_v8i16(<8 x i16> %x, <8 x i16> %y, <8 x i16> %mask) { -; AVX12F-LABEL: signbit_sel_v8i16: -; AVX12F: # BB#0: -; AVX12F-NEXT: vpxor %xmm3, %xmm3, %xmm3 -; AVX12F-NEXT: vpcmpgtw %xmm2, %xmm3, %xmm2 -; AVX12F-NEXT: vpandn %xmm1, %xmm2, %xmm1 -; AVX12F-NEXT: vpand %xmm2, %xmm0, %xmm0 -; AVX12F-NEXT: vpor %xmm1, %xmm0, %xmm0 -; AVX12F-NEXT: retq -; -; AVX512VL-LABEL: signbit_sel_v8i16: -; AVX512VL: # BB#0: -; AVX512VL-NEXT: vpxor %xmm3, %xmm3, %xmm3 -; AVX512VL-NEXT: vpcmpgtw %xmm2, %xmm3, %xmm2 -; AVX512VL-NEXT: vpandnq %xmm1, %xmm2, %xmm1 -; AVX512VL-NEXT: vpand %xmm2, %xmm0, %xmm0 -; AVX512VL-NEXT: vpor %xmm1, %xmm0, %xmm0 -; AVX512VL-NEXT: retq +; AVX-LABEL: signbit_sel_v8i16: +; AVX: # BB#0: +; AVX-NEXT: vpxor %xmm3, %xmm3, %xmm3 +; AVX-NEXT: vpcmpgtw %xmm2, %xmm3, %xmm2 +; AVX-NEXT: vpandn %xmm1, %xmm2, %xmm1 +; AVX-NEXT: vpand %xmm2, %xmm0, %xmm0 +; AVX-NEXT: vpor %xmm1, %xmm0, %xmm0 +; AVX-NEXT: retq %tr = icmp slt <8 x i16> %mask, zeroinitializer %z = select <8 x i1> %tr, <8 x i16> %x, <8 x i16> %y ret <8 x i16> %z @@ -176,23 +167,14 @@ ; AVX2-NEXT: vpor %ymm1, %ymm0, %ymm0 ; AVX2-NEXT: retq ; -; AVX512F-LABEL: signbit_sel_v16i16: -; AVX512F: # BB#0: -; AVX512F-NEXT: vpxor %ymm3, %ymm3, %ymm3 -; AVX512F-NEXT: vpcmpgtw %ymm2, %ymm3, %ymm2 -; AVX512F-NEXT: vpandn %ymm1, %ymm2, %ymm1 -; AVX512F-NEXT: vpand %ymm2, %ymm0, %ymm0 -; AVX512F-NEXT: vpor %ymm1, %ymm0, %ymm0 -; AVX512F-NEXT: retq -; -; AVX512VL-LABEL: signbit_sel_v16i16: -; AVX512VL: # BB#0: -; AVX512VL-NEXT: vpxor %ymm3, %ymm3, %ymm3 -; AVX512VL-NEXT: vpcmpgtw %ymm2, %ymm3, %ymm2 -; AVX512VL-NEXT: vpandnq %ymm1, %ymm2, %ymm1 -; AVX512VL-NEXT: vpand %ymm2, %ymm0, %ymm0 -; AVX512VL-NEXT: vpor %ymm1, %ymm0, %ymm0 -; AVX512VL-NEXT: retq +; AVX512-LABEL: signbit_sel_v16i16: +; AVX512: # BB#0: +; AVX512-NEXT: vpxor %ymm3, %ymm3, %ymm3 +; AVX512-NEXT: vpcmpgtw %ymm2, %ymm3, %ymm2 +; AVX512-NEXT: vpandn %ymm1, %ymm2, %ymm1 +; AVX512-NEXT: vpand %ymm2, %ymm0, %ymm0 +; AVX512-NEXT: vpor %ymm1, %ymm0, %ymm0 +; AVX512-NEXT: retq %tr = icmp slt <16 x i16> %mask, zeroinitializer %z = select <16 x i1> %tr, <16 x i16> %x, <16 x i16> %y ret <16 x i16> %z