Index: lib/Target/X86/X86MCInstLower.cpp =================================================================== --- lib/Target/X86/X86MCInstLower.cpp +++ lib/Target/X86/X86MCInstLower.cpp @@ -1447,6 +1447,65 @@ } } +static void printConstantPoolOp(MCStreamer &OutStreamer, + const MachineInstr *MI, + StringRef Op, unsigned EltSize, + bool EOL) { + if (!OutStreamer.isVerboseAsm()) + return; + + const MCInstrDesc &Desc = MI->getDesc(); + int MemOperand = X86II::getOperandBias(Desc) + + X86II::getMemoryOperandNo(Desc.TSFlags); + const MachineOperand &MemOp = MI->getOperand(MemOperand + 3); + + auto *C = getConstantFromPool(*MI, MemOp); + if (!C) + return; + + SmallVector Vec; + APInt UndefElts; + if (!extractConstantMask(C, EltSize, UndefElts, Vec)) + return; + + const MachineOperand &DstOp = MI->getOperand(0); + const MachineOperand &SrcOp = MI->getOperand(Desc.getNumDefs()); + + std::string Comment; + raw_string_ostream CS(Comment); + + CS << X86ATTInstPrinter::getRegisterName(DstOp.getReg()); + CS << " = "; + CS << X86ATTInstPrinter::getRegisterName(SrcOp.getReg()); + CS << Op << "["; + + bool IsSplat = true; + for (unsigned i = 1; i < Vec.size(); ++i) { + if (Vec[0] != Vec[i]) { + IsSplat = false; + break; + } + } + + for (unsigned i = 0; i < Vec.size(); ++i) { + if (i != 0) + CS << ","; + if (UndefElts[i]) + CS << 'u'; + else + CS << format_hex(Vec[i], EltSize/4 + 2); + + if (IsSplat) { + CS << " splat"; + break; + } + } + + CS << "]"; + + OutStreamer.AddComment(CS.str(), EOL); +} + void X86AsmPrinter::EmitInstruction(const MachineInstr *MI) { X86MCInstLower MCInstLowering(*MF, *this); const X86RegisterInfo *RI = MF->getSubtarget().getRegisterInfo(); @@ -1840,6 +1899,145 @@ break; } + // TODO: Decode more instructions. Like mul, shift, compare. + case X86::PADDBrm: + case X86::VPADDBrm: + case X86::VPADDBYrm: + case X86::VPADDBZ128rm: + case X86::VPADDBZ256rm: + case X86::VPADDBZrm: + printConstantPoolOp(*OutStreamer, MI, " + ", 8, !EnablePrintSchedInfo); + break; + case X86::PSUBBrm: + case X86::VPSUBBrm: + case X86::VPSUBBYrm: + case X86::VPSUBBZ128rm: + case X86::VPSUBBZ256rm: + case X86::VPSUBBZrm: + printConstantPoolOp(*OutStreamer, MI, " - ", 8, !EnablePrintSchedInfo); + break; + + case X86::PADDWrm: + case X86::VPADDWrm: + case X86::VPADDWYrm: + case X86::VPADDWZ128rm: + case X86::VPADDWZ256rm: + case X86::VPADDWZrm: + printConstantPoolOp(*OutStreamer, MI, " + ", 16, !EnablePrintSchedInfo); + break; + case X86::PSUBWrm: + case X86::VPSUBWrm: + case X86::VPSUBWYrm: + case X86::VPSUBWZ128rm: + case X86::VPSUBWZ256rm: + case X86::VPSUBWZrm: + printConstantPoolOp(*OutStreamer, MI, " - ", 16, !EnablePrintSchedInfo); + break; + + case X86::PADDDrm: + case X86::VPADDDrm: + case X86::VPADDDYrm: + case X86::VPADDDZ128rm: + case X86::VPADDDZ256rm: + case X86::VPADDDZrm: + printConstantPoolOp(*OutStreamer, MI, " + ", 32, !EnablePrintSchedInfo); + break; + case X86::PSUBDrm: + case X86::VPSUBDrm: + case X86::VPSUBDYrm: + case X86::VPSUBDZ128rm: + case X86::VPSUBDZ256rm: + case X86::VPSUBDZrm: + printConstantPoolOp(*OutStreamer, MI, " - ", 32, !EnablePrintSchedInfo); + break; + + case X86::PADDQrm: + case X86::VPADDQrm: + case X86::VPADDQYrm: + case X86::VPADDQZ128rm: + case X86::VPADDQZ256rm: + case X86::VPADDQZrm: + printConstantPoolOp(*OutStreamer, MI, " + ", 64, !EnablePrintSchedInfo); + break; + case X86::PSUBQrm: + case X86::VPSUBQrm: + case X86::VPSUBQYrm: + case X86::VPSUBQZ128rm: + case X86::VPSUBQZ256rm: + case X86::VPSUBQZrm: + printConstantPoolOp(*OutStreamer, MI, " - ", 64, !EnablePrintSchedInfo); + break; + + case X86::ANDPDrm: + case X86::VANDPDrm: + case X86::VANDPDYrm: + case X86::VANDPDZ128rm: + case X86::VANDPDZ256rm: + case X86::VANDPDZrm: + case X86::ANDPSrm: + case X86::VANDPSrm: + case X86::VANDPSYrm: + case X86::VANDPSZ128rm: + case X86::VANDPSZ256rm: + case X86::VANDPSZrm: + case X86::PANDrm: + case X86::VPANDrm: + case X86::VPANDYrm: + case X86::VPANDDZ128rm: + case X86::VPANDDZ256rm: + case X86::VPANDDZrm: + case X86::VPANDQZ128rm: + case X86::VPANDQZ256rm: + case X86::VPANDQZrm: + printConstantPoolOp(*OutStreamer, MI, " & ", 64, !EnablePrintSchedInfo); + break; + case X86::ORPDrm: + case X86::VORPDrm: + case X86::VORPDYrm: + case X86::VORPDZ128rm: + case X86::VORPDZ256rm: + case X86::VORPDZrm: + case X86::ORPSrm: + case X86::VORPSrm: + case X86::VORPSYrm: + case X86::VORPSZ128rm: + case X86::VORPSZ256rm: + case X86::VORPSZrm: + case X86::PORrm: + case X86::VPORrm: + case X86::VPORYrm: + case X86::VPORDZ128rm: + case X86::VPORDZ256rm: + case X86::VPORDZrm: + case X86::VPORQZ128rm: + case X86::VPORQZ256rm: + case X86::VPORQZrm: + printConstantPoolOp(*OutStreamer, MI, " | ", 64, !EnablePrintSchedInfo); + break; + case X86::XORPDrm: + case X86::VXORPDrm: + case X86::VXORPDYrm: + case X86::VXORPDZ128rm: + case X86::VXORPDZ256rm: + case X86::VXORPDZrm: + case X86::XORPSrm: + case X86::VXORPSrm: + case X86::VXORPSYrm: + case X86::VXORPSZ128rm: + case X86::VXORPSZ256rm: + case X86::VXORPSZrm: + case X86::PXORrm: + case X86::VPXORrm: + case X86::VPXORYrm: + case X86::VPXORDZ128rm: + case X86::VPXORDZ256rm: + case X86::VPXORDZrm: + case X86::VPXORQZ128rm: + case X86::VPXORQZ256rm: + case X86::VPXORQZrm: + printConstantPoolOp(*OutStreamer, MI, " ^ ", 64, !EnablePrintSchedInfo); + break; + #define MOV_CASE(Prefix, Suffix) \ case X86::Prefix##MOVAPD##Suffix##rm: \ case X86::Prefix##MOVAPS##Suffix##rm: \ Index: lib/Target/X86/X86ShuffleDecodeConstantPool.h =================================================================== --- lib/Target/X86/X86ShuffleDecodeConstantPool.h +++ lib/Target/X86/X86ShuffleDecodeConstantPool.h @@ -15,6 +15,7 @@ #ifndef LLVM_LIB_TARGET_X86_X86SHUFFLEDECODECONSTANTPOOL_H #define LLVM_LIB_TARGET_X86_X86SHUFFLEDECODECONSTANTPOOL_H +#include "llvm/ADT/APInt.h" #include "llvm/ADT/SmallVector.h" //===----------------------------------------------------------------------===// @@ -25,6 +26,11 @@ class Constant; class MVT; +// Extract a constant vector into an array of uint64_ts. +bool extractConstantMask(const Constant *C, unsigned MaskEltSizeInBits, + APInt &UndefElts, + SmallVectorImpl &RawMask); + /// Decode a PSHUFB mask from an IR-level vector constant. void DecodePSHUFBMask(const Constant *C, SmallVectorImpl &ShuffleMask); Index: lib/Target/X86/X86ShuffleDecodeConstantPool.cpp =================================================================== --- lib/Target/X86/X86ShuffleDecodeConstantPool.cpp +++ lib/Target/X86/X86ShuffleDecodeConstantPool.cpp @@ -22,9 +22,9 @@ namespace llvm { -static bool extractConstantMask(const Constant *C, unsigned MaskEltSizeInBits, - APInt &UndefElts, - SmallVectorImpl &RawMask) { +bool extractConstantMask(const Constant *C, unsigned MaskEltSizeInBits, + APInt &UndefElts, + SmallVectorImpl &RawMask) { // It is not an error for shuffle masks to not be a vector of // MaskEltSizeInBits because the constant pool uniques constants by their // bit representation. Index: test/CodeGen/X86/avx512-any_extend_load.ll =================================================================== --- test/CodeGen/X86/avx512-any_extend_load.ll +++ test/CodeGen/X86/avx512-any_extend_load.ll @@ -31,7 +31,7 @@ ; KNL-LABEL: any_extend_load_v8i32: ; KNL: # %bb.0: ; KNL-NEXT: vpmovzxbw {{.*#+}} xmm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero -; KNL-NEXT: vpaddw {{.*}}(%rip), %xmm0, %xmm0 +; KNL-NEXT: vpaddw {{.*#+}} xmm0 = xmm0 + [0x0004 splat] ; KNL-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u] ; KNL-NEXT: vmovq %xmm0, (%rdi) ; KNL-NEXT: retq @@ -56,7 +56,7 @@ ; KNL-LABEL: any_extend_load_v8i16: ; KNL: # %bb.0: ; KNL-NEXT: vpmovzxbw {{.*#+}} xmm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero -; KNL-NEXT: vpaddb {{.*}}(%rip), %xmm0, %xmm0 +; KNL-NEXT: vpaddb {{.*#+}} xmm0 = xmm0 + [0x04,0x00,0x04,0x00,0x04,0x00,0x04,0x00,0x04,0x00,0x04,0x00,0x04,0x00,0x04,0x00] ; KNL-NEXT: vpackuswb %xmm0, %xmm0, %xmm0 ; KNL-NEXT: vmovq %xmm0, (%rdi) ; KNL-NEXT: retq @@ -64,7 +64,7 @@ ; SKX-LABEL: any_extend_load_v8i16: ; SKX: # %bb.0: ; SKX-NEXT: vpmovzxbw {{.*#+}} xmm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero -; SKX-NEXT: vpaddw {{.*}}(%rip), %xmm0, %xmm0 +; SKX-NEXT: vpaddw {{.*#+}} xmm0 = xmm0 + [0x0004 splat] ; SKX-NEXT: vpmovwb %xmm0, (%rdi) ; SKX-NEXT: retq %wide.load = load <8 x i8>, <8 x i8>* %ptr, align 1 Index: test/CodeGen/X86/avx512-bugfix-25270.ll =================================================================== --- test/CodeGen/X86/avx512-bugfix-25270.ll +++ test/CodeGen/X86/avx512-bugfix-25270.ll @@ -11,12 +11,12 @@ ; CHECK-NEXT: movq %rdi, %rbx ; CHECK-NEXT: vmovups (%rdi), %zmm0 ; CHECK-NEXT: vmovups %zmm0, (%rsp) ## 64-byte Spill -; CHECK-NEXT: vbroadcastss {{.*}}(%rip), %zmm1 +; CHECK-NEXT: vbroadcastss {{.*#+}} zmm1 = [4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4] ; CHECK-NEXT: vmovaps %zmm1, (%rdi) ; CHECK-NEXT: callq _Print__512 ; CHECK-NEXT: vmovups (%rsp), %zmm0 ## 64-byte Reload ; CHECK-NEXT: callq _Print__512 -; CHECK-NEXT: vbroadcastss {{.*}}(%rip), %zmm0 +; CHECK-NEXT: vbroadcastss {{.*#+}} zmm0 = [7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7] ; CHECK-NEXT: vmovaps %zmm0, (%rbx) ; CHECK-NEXT: addq $112, %rsp ; CHECK-NEXT: popq %rbx Index: test/CodeGen/X86/avx512-calling-conv.ll =================================================================== --- test/CodeGen/X86/avx512-calling-conv.ll +++ test/CodeGen/X86/avx512-calling-conv.ll @@ -197,7 +197,7 @@ ; KNL-NEXT: vpmovdw %zmm0, %ymm0 ; KNL-NEXT: ## kill: def $xmm0 killed $xmm0 killed $ymm0 ; KNL-NEXT: callq _func8xi1 -; KNL-NEXT: vandps {{.*}}(%rip), %xmm0, %xmm0 +; KNL-NEXT: vandps {{.*#+}} xmm0 = xmm0 & [0x0000000100000001 splat] ; KNL-NEXT: popq %rax ; KNL-NEXT: retq ; @@ -209,7 +209,7 @@ ; SKX-NEXT: vpmovm2w %k0, %xmm0 ; SKX-NEXT: vzeroupper ; SKX-NEXT: callq _func8xi1 -; SKX-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0 +; SKX-NEXT: vpand {{.*#+}} xmm0 = xmm0 & [0x0000000100000001 splat] ; SKX-NEXT: popq %rax ; SKX-NEXT: retq ; @@ -221,7 +221,7 @@ ; KNL_X32-NEXT: vpmovdw %zmm0, %ymm0 ; KNL_X32-NEXT: ## kill: def $xmm0 killed $xmm0 killed $ymm0 ; KNL_X32-NEXT: calll _func8xi1 -; KNL_X32-NEXT: vandps LCPI7_0, %xmm0, %xmm0 +; KNL_X32-NEXT: vandps {{.*#+}} xmm0 = xmm0 & [0x0000000100000001 splat] ; KNL_X32-NEXT: addl $12, %esp ; KNL_X32-NEXT: retl %cmpRes = icmp sgt <8 x i32>%a, %b Index: test/CodeGen/X86/avx512-cvt.ll =================================================================== --- test/CodeGen/X86/avx512-cvt.ll +++ test/CodeGen/X86/avx512-cvt.ll @@ -1782,7 +1782,7 @@ define <8 x double> @ucto8f64(<8 x i8> %a) { ; ALL-LABEL: ucto8f64: ; ALL: # %bb.0: -; ALL-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0 +; ALL-NEXT: vpand {{.*#+}} xmm0 = xmm0 & [0x00ff00ff00ff00ff splat] ; ALL-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero ; ALL-NEXT: vcvtdq2pd %ymm0, %zmm0 ; ALL-NEXT: retq Index: test/CodeGen/X86/avx512-ext.ll =================================================================== --- test/CodeGen/X86/avx512-ext.ll +++ test/CodeGen/X86/avx512-ext.ll @@ -1720,7 +1720,7 @@ ; KNL-NEXT: vpmovsxwd %ymm1, %zmm1 ; KNL-NEXT: vpmovdb %zmm1, %xmm1 ; KNL-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 -; KNL-NEXT: vpand {{.*}}(%rip), %ymm0, %ymm0 +; KNL-NEXT: vpand {{.*#+}} ymm0 = ymm0 & [0x0101010101010101 splat] ; KNL-NEXT: retq ; ; SKX-LABEL: zext_32xi1_to_32xi8: Index: test/CodeGen/X86/avx512-fma-commute.ll =================================================================== --- test/CodeGen/X86/avx512-fma-commute.ll +++ test/CodeGen/X86/avx512-fma-commute.ll @@ -9,7 +9,7 @@ define <4 x float> @test_int_x86_avx512_mask3_vfmadd_ss_load0(<4 x float>* %x0ptr, <4 x float> %x1, <4 x float> %x2) { ; CHECK-LABEL: test_int_x86_avx512_mask3_vfmadd_ss_load0: ; CHECK: ## %bb.0: -; CHECK-NEXT: vfmadd231ss (%rdi), %xmm0, %xmm1 +; CHECK-NEXT: vfmadd231ss {{.*#+}} xmm1 = (xmm0 * mem) + xmm1 ; CHECK-NEXT: vmovaps %xmm1, %xmm0 ; CHECK-NEXT: retq %x0 = load <4 x float>, <4 x float>* %x0ptr @@ -20,7 +20,7 @@ define <4 x float> @test_int_x86_avx512_mask3_vfmadd_ss_load1(<4 x float> %x0, <4 x float>* %x1ptr, <4 x float> %x2){ ; CHECK-LABEL: test_int_x86_avx512_mask3_vfmadd_ss_load1: ; CHECK: ## %bb.0: -; CHECK-NEXT: vfmadd231ss (%rdi), %xmm0, %xmm1 +; CHECK-NEXT: vfmadd231ss {{.*#+}} xmm1 = (xmm0 * mem) + xmm1 ; CHECK-NEXT: vmovaps %xmm1, %xmm0 ; CHECK-NEXT: retq %x1 = load <4 x float>, <4 x float>* %x1ptr @@ -31,7 +31,7 @@ define <2 x double> @test_int_x86_avx512_mask3_vfmadd_sd_load0(<2 x double>* %x0ptr, <2 x double> %x1, <2 x double> %x2) { ; CHECK-LABEL: test_int_x86_avx512_mask3_vfmadd_sd_load0: ; CHECK: ## %bb.0: -; CHECK-NEXT: vfmadd231sd (%rdi), %xmm0, %xmm1 +; CHECK-NEXT: vfmadd231sd {{.*#+}} xmm1 = (xmm0 * mem) + xmm1 ; CHECK-NEXT: vmovapd %xmm1, %xmm0 ; CHECK-NEXT: retq %x0 = load <2 x double>, <2 x double>* %x0ptr @@ -42,7 +42,7 @@ define <2 x double> @test_int_x86_avx512_mask3_vfmadd_sd_load1(<2 x double> %x0, <2 x double>* %x1ptr, <2 x double> %x2){ ; CHECK-LABEL: test_int_x86_avx512_mask3_vfmadd_sd_load1: ; CHECK: ## %bb.0: -; CHECK-NEXT: vfmadd231sd (%rdi), %xmm0, %xmm1 +; CHECK-NEXT: vfmadd231sd {{.*#+}} xmm1 = (xmm0 * mem) + xmm1 ; CHECK-NEXT: vmovapd %xmm1, %xmm0 ; CHECK-NEXT: retq %x1 = load <2 x double>, <2 x double>* %x1ptr @@ -53,7 +53,7 @@ define <4 x float> @test_int_x86_avx512_mask3_vfmsub_ss_load0(<4 x float>* %x0ptr, <4 x float> %x1, <4 x float> %x2) { ; CHECK-LABEL: test_int_x86_avx512_mask3_vfmsub_ss_load0: ; CHECK: ## %bb.0: -; CHECK-NEXT: vfmsub231ss (%rdi), %xmm0, %xmm1 +; CHECK-NEXT: vfmsub231ss {{.*#+}} xmm1 = (xmm0 * mem) - xmm1 ; CHECK-NEXT: vmovaps %xmm1, %xmm0 ; CHECK-NEXT: retq %x0 = load <4 x float>, <4 x float>* %x0ptr @@ -64,7 +64,7 @@ define <4 x float> @test_int_x86_avx512_mask3_vfmsub_ss_load1(<4 x float> %x0, <4 x float>* %x1ptr, <4 x float> %x2){ ; CHECK-LABEL: test_int_x86_avx512_mask3_vfmsub_ss_load1: ; CHECK: ## %bb.0: -; CHECK-NEXT: vfmsub231ss (%rdi), %xmm0, %xmm1 +; CHECK-NEXT: vfmsub231ss {{.*#+}} xmm1 = (xmm0 * mem) - xmm1 ; CHECK-NEXT: vmovaps %xmm1, %xmm0 ; CHECK-NEXT: retq %x1 = load <4 x float>, <4 x float>* %x1ptr @@ -75,7 +75,7 @@ define <2 x double> @test_int_x86_avx512_mask3_vfmsub_sd_load0(<2 x double>* %x0ptr, <2 x double> %x1, <2 x double> %x2) { ; CHECK-LABEL: test_int_x86_avx512_mask3_vfmsub_sd_load0: ; CHECK: ## %bb.0: -; CHECK-NEXT: vfmsub231sd (%rdi), %xmm0, %xmm1 +; CHECK-NEXT: vfmsub231sd {{.*#+}} xmm1 = (xmm0 * mem) - xmm1 ; CHECK-NEXT: vmovapd %xmm1, %xmm0 ; CHECK-NEXT: retq %x0 = load <2 x double>, <2 x double>* %x0ptr @@ -86,7 +86,7 @@ define <2 x double> @test_int_x86_avx512_mask3_vfmsub_sd_load1(<2 x double> %x0, <2 x double>* %x1ptr, <2 x double> %x2){ ; CHECK-LABEL: test_int_x86_avx512_mask3_vfmsub_sd_load1: ; CHECK: ## %bb.0: -; CHECK-NEXT: vfmsub231sd (%rdi), %xmm0, %xmm1 +; CHECK-NEXT: vfmsub231sd {{.*#+}} xmm1 = (xmm0 * mem) - xmm1 ; CHECK-NEXT: vmovapd %xmm1, %xmm0 ; CHECK-NEXT: retq %x1 = load <2 x double>, <2 x double>* %x1ptr Index: test/CodeGen/X86/avx512-fma-intrinsics.ll =================================================================== --- test/CodeGen/X86/avx512-fma-intrinsics.ll +++ test/CodeGen/X86/avx512-fma-intrinsics.ll @@ -7,7 +7,7 @@ define <16 x float> @test_x86_vfnmadd_ps_z(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2) { ; CHECK-LABEL: test_x86_vfnmadd_ps_z: ; CHECK: ## %bb.0: -; CHECK-NEXT: vfnmadd213ps %zmm2, %zmm1, %zmm0 +; CHECK-NEXT: vfnmadd213ps {{.*#+}} zmm0 = -(zmm1 * zmm0) + zmm2 ; CHECK-NEXT: retq %res = call <16 x float> @llvm.x86.avx512.mask.vfnmadd.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 -1, i32 4) nounwind ret <16 x float> %res @@ -18,7 +18,7 @@ ; CHECK-LABEL: test_mask_vfnmadd_ps: ; CHECK: ## %bb.0: ; CHECK-NEXT: kmovw %edi, %k1 -; CHECK-NEXT: vfnmadd132ps %zmm1, %zmm2, %zmm0 {%k1} +; CHECK-NEXT: vfnmadd132ps {{.*#+}} zmm0 = -(zmm0 * zmm1) + zmm2 ; CHECK-NEXT: retq %res = call <16 x float> @llvm.x86.avx512.mask.vfnmadd.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 %mask, i32 4) nounwind ret <16 x float> %res @@ -27,7 +27,7 @@ define <8 x double> @test_x86_vfnmadd_pd_z(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2) { ; CHECK-LABEL: test_x86_vfnmadd_pd_z: ; CHECK: ## %bb.0: -; CHECK-NEXT: vfnmadd213pd %zmm2, %zmm1, %zmm0 +; CHECK-NEXT: vfnmadd213pd {{.*#+}} zmm0 = -(zmm1 * zmm0) + zmm2 ; CHECK-NEXT: retq %res = call <8 x double> @llvm.x86.avx512.mask.vfnmadd.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 -1, i32 4) nounwind ret <8 x double> %res @@ -38,7 +38,7 @@ ; CHECK-LABEL: test_mask_vfnmadd_pd: ; CHECK: ## %bb.0: ; CHECK-NEXT: kmovw %edi, %k1 -; CHECK-NEXT: vfnmadd132pd %zmm1, %zmm2, %zmm0 {%k1} +; CHECK-NEXT: vfnmadd132pd {{.*#+}} zmm0 = -(zmm0 * zmm1) + zmm2 ; CHECK-NEXT: retq %res = call <8 x double> @llvm.x86.avx512.mask.vfnmadd.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask, i32 4) nounwind ret <8 x double> %res @@ -47,7 +47,7 @@ define <16 x float> @test_x86_vfnmsubps_z(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2) { ; CHECK-LABEL: test_x86_vfnmsubps_z: ; CHECK: ## %bb.0: -; CHECK-NEXT: vfnmsub213ps %zmm2, %zmm1, %zmm0 +; CHECK-NEXT: vfnmsub213ps {{.*#+}} zmm0 = -(zmm1 * zmm0) - zmm2 ; CHECK-NEXT: retq %res = call <16 x float> @llvm.x86.avx512.mask.vfnmsub.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 -1, i32 4) nounwind ret <16 x float> %res @@ -58,7 +58,7 @@ ; CHECK-LABEL: test_mask_vfnmsub_ps: ; CHECK: ## %bb.0: ; CHECK-NEXT: kmovw %edi, %k1 -; CHECK-NEXT: vfnmsub132ps %zmm1, %zmm2, %zmm0 {%k1} +; CHECK-NEXT: vfnmsub132ps {{.*#+}} zmm0 = -(zmm0 * zmm1) - zmm2 ; CHECK-NEXT: retq %res = call <16 x float> @llvm.x86.avx512.mask.vfnmsub.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 %mask, i32 4) nounwind ret <16 x float> %res @@ -67,7 +67,7 @@ define <8 x double> @test_x86_vfnmsubpd_z(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2) { ; CHECK-LABEL: test_x86_vfnmsubpd_z: ; CHECK: ## %bb.0: -; CHECK-NEXT: vfnmsub213pd %zmm2, %zmm1, %zmm0 +; CHECK-NEXT: vfnmsub213pd {{.*#+}} zmm0 = -(zmm1 * zmm0) - zmm2 ; CHECK-NEXT: retq %res = call <8 x double> @llvm.x86.avx512.mask.vfnmsub.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 -1, i32 4) nounwind ret <8 x double> %res @@ -78,7 +78,7 @@ ; CHECK-LABEL: test_mask_vfnmsub_pd: ; CHECK: ## %bb.0: ; CHECK-NEXT: kmovw %edi, %k1 -; CHECK-NEXT: vfnmsub132pd %zmm1, %zmm2, %zmm0 {%k1} +; CHECK-NEXT: vfnmsub132pd {{.*#+}} zmm0 = -(zmm0 * zmm1) - zmm2 ; CHECK-NEXT: retq %res = call <8 x double> @llvm.x86.avx512.mask.vfnmsub.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask, i32 4) nounwind ret <8 x double> %res @@ -87,7 +87,7 @@ define <16 x float> @test_x86_vfmaddsubps_z(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2) { ; CHECK-LABEL: test_x86_vfmaddsubps_z: ; CHECK: ## %bb.0: -; CHECK-NEXT: vfmaddsub213ps %zmm2, %zmm1, %zmm0 +; CHECK-NEXT: vfmaddsub213ps {{.*#+}} zmm0 = (zmm1 * zmm0) +/- zmm2 ; CHECK-NEXT: retq %res = call <16 x float> @llvm.x86.avx512.mask.vfmaddsub.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 -1, i32 4) nounwind ret <16 x float> %res @@ -97,7 +97,7 @@ ; CHECK-LABEL: test_mask_fmaddsub_ps: ; CHECK: ## %bb.0: ; CHECK-NEXT: kmovw %edi, %k1 -; CHECK-NEXT: vfmaddsub132ps %zmm1, %zmm2, %zmm0 {%k1} +; CHECK-NEXT: vfmaddsub132ps {{.*#+}} zmm0 = (zmm0 * zmm1) +/- zmm2 ; CHECK-NEXT: retq %res = call <16 x float> @llvm.x86.avx512.mask.vfmaddsub.ps.512(<16 x float> %a, <16 x float> %b, <16 x float> %c, i16 %mask, i32 4) ret <16 x float> %res @@ -108,7 +108,7 @@ define <8 x double> @test_x86_vfmaddsubpd_z(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2) { ; CHECK-LABEL: test_x86_vfmaddsubpd_z: ; CHECK: ## %bb.0: -; CHECK-NEXT: vfmaddsub213pd %zmm2, %zmm1, %zmm0 +; CHECK-NEXT: vfmaddsub213pd {{.*#+}} zmm0 = (zmm1 * zmm0) +/- zmm2 ; CHECK-NEXT: retq %res = call <8 x double> @llvm.x86.avx512.mask.vfmaddsub.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 -1, i32 4) nounwind ret <8 x double> %res @@ -119,7 +119,7 @@ ; CHECK-LABEL: test_mask_vfmaddsub_pd: ; CHECK: ## %bb.0: ; CHECK-NEXT: kmovw %edi, %k1 -; CHECK-NEXT: vfmaddsub132pd %zmm1, %zmm2, %zmm0 {%k1} +; CHECK-NEXT: vfmaddsub132pd {{.*#+}} zmm0 = (zmm0 * zmm1) +/- zmm2 ; CHECK-NEXT: retq %res = call <8 x double> @llvm.x86.avx512.mask.vfmaddsub.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask, i32 4) nounwind ret <8 x double> %res @@ -130,7 +130,7 @@ ; CHECK: ## %bb.0: ; CHECK-NEXT: kmovw %edi, %k1 ; CHECK-NEXT: vmovapd %zmm0, %zmm3 -; CHECK-NEXT: vfmaddsub132pd %zmm1, %zmm2, %zmm3 {%k1} +; CHECK-NEXT: vfmaddsub132pd {{.*#+}} zmm3 = (zmm3 * zmm1) +/- zmm2 ; CHECK-NEXT: vfmaddsub213pd {rn-sae}, %zmm2, %zmm0, %zmm1 ; CHECK-NEXT: vaddpd %zmm1, %zmm3, %zmm0 ; CHECK-NEXT: retq @@ -147,7 +147,7 @@ ; CHECK: ## %bb.0: ; CHECK-NEXT: kmovw %edi, %k1 ; CHECK-NEXT: vmovapd %zmm2, %zmm3 -; CHECK-NEXT: vfmaddsub231pd %zmm1, %zmm0, %zmm3 {%k1} +; CHECK-NEXT: vfmaddsub231pd {{.*#+}} zmm3 = (zmm0 * zmm1) +/- zmm3 ; CHECK-NEXT: vfmaddsub213pd {rn-sae}, %zmm2, %zmm0, %zmm1 ; CHECK-NEXT: vaddpd %zmm1, %zmm3, %zmm0 ; CHECK-NEXT: retq @@ -164,7 +164,7 @@ ; CHECK: ## %bb.0: ; CHECK-NEXT: kmovw %edi, %k1 ; CHECK-NEXT: vmovapd %zmm1, %zmm3 -; CHECK-NEXT: vfmaddsub213pd %zmm2, %zmm0, %zmm3 {%k1} {z} +; CHECK-NEXT: vfmaddsub213pd {{.*#+}} zmm3 = (zmm0 * zmm3) +/- zmm2 ; CHECK-NEXT: vfmaddsub213pd {rn-sae}, %zmm2, %zmm0, %zmm1 ; CHECK-NEXT: vaddpd %zmm1, %zmm3, %zmm0 ; CHECK-NEXT: retq @@ -179,7 +179,7 @@ ; CHECK: ## %bb.0: ; CHECK-NEXT: kmovw %edi, %k1 ; CHECK-NEXT: vmovaps %zmm0, %zmm3 -; CHECK-NEXT: vfmaddsub132ps %zmm1, %zmm2, %zmm3 {%k1} +; CHECK-NEXT: vfmaddsub132ps {{.*#+}} zmm3 = (zmm3 * zmm1) +/- zmm2 ; CHECK-NEXT: vfmaddsub213ps {rn-sae}, %zmm2, %zmm0, %zmm1 ; CHECK-NEXT: vaddps %zmm1, %zmm3, %zmm0 ; CHECK-NEXT: retq @@ -196,7 +196,7 @@ ; CHECK: ## %bb.0: ; CHECK-NEXT: kmovw %edi, %k1 ; CHECK-NEXT: vmovaps %zmm2, %zmm3 -; CHECK-NEXT: vfmaddsub231ps %zmm1, %zmm0, %zmm3 {%k1} +; CHECK-NEXT: vfmaddsub231ps {{.*#+}} zmm3 = (zmm0 * zmm1) +/- zmm3 ; CHECK-NEXT: vfmaddsub213ps {rn-sae}, %zmm2, %zmm0, %zmm1 ; CHECK-NEXT: vaddps %zmm1, %zmm3, %zmm0 ; CHECK-NEXT: retq @@ -213,7 +213,7 @@ ; CHECK: ## %bb.0: ; CHECK-NEXT: kmovw %edi, %k1 ; CHECK-NEXT: vmovaps %zmm1, %zmm3 -; CHECK-NEXT: vfmaddsub213ps %zmm2, %zmm0, %zmm3 {%k1} {z} +; CHECK-NEXT: vfmaddsub213ps {{.*#+}} zmm3 = (zmm0 * zmm3) +/- zmm2 ; CHECK-NEXT: vfmaddsub213ps {rn-sae}, %zmm2, %zmm0, %zmm1 ; CHECK-NEXT: vaddps %zmm1, %zmm3, %zmm0 ; CHECK-NEXT: retq @@ -230,7 +230,7 @@ ; CHECK: ## %bb.0: ; CHECK-NEXT: kmovw %edi, %k1 ; CHECK-NEXT: vmovapd %zmm2, %zmm3 -; CHECK-NEXT: vfmsubadd231pd %zmm1, %zmm0, %zmm3 {%k1} +; CHECK-NEXT: vfmsubadd231pd {{.*#+}} zmm3 = (zmm0 * zmm1) -/+ zmm3 ; CHECK-NEXT: vfmsubadd213pd {rn-sae}, %zmm2, %zmm0, %zmm1 ; CHECK-NEXT: vaddpd %zmm1, %zmm3, %zmm0 ; CHECK-NEXT: retq @@ -247,7 +247,7 @@ ; CHECK: ## %bb.0: ; CHECK-NEXT: kmovw %edi, %k1 ; CHECK-NEXT: vmovaps %zmm2, %zmm3 -; CHECK-NEXT: vfmsubadd231ps %zmm1, %zmm0, %zmm3 {%k1} +; CHECK-NEXT: vfmsubadd231ps {{.*#+}} zmm3 = (zmm0 * zmm1) -/+ zmm3 ; CHECK-NEXT: vfmsubadd213ps {rn-sae}, %zmm2, %zmm0, %zmm1 ; CHECK-NEXT: vaddps %zmm1, %zmm3, %zmm0 ; CHECK-NEXT: retq @@ -301,7 +301,7 @@ ; CHECK-LABEL: test_mask_round_vfmadd512_ps_rrb_current: ; CHECK: ## %bb.0: ; CHECK-NEXT: kmovw %edi, %k1 -; CHECK-NEXT: vfmadd132ps %zmm1, %zmm2, %zmm0 {%k1} +; CHECK-NEXT: vfmadd132ps {{.*#+}} zmm0 = (zmm0 * zmm1) + zmm2 ; CHECK-NEXT: retq %res = call <16 x float> @llvm.x86.avx512.mask.vfmadd.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 %mask, i32 4) nounwind ret <16 x float> %res @@ -346,7 +346,7 @@ define <16 x float> @test_mask_round_vfmadd512_ps_rrbz_current(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2) { ; CHECK-LABEL: test_mask_round_vfmadd512_ps_rrbz_current: ; CHECK: ## %bb.0: -; CHECK-NEXT: vfmadd213ps %zmm2, %zmm1, %zmm0 +; CHECK-NEXT: vfmadd213ps {{.*#+}} zmm0 = (zmm1 * zmm0) + zmm2 ; CHECK-NEXT: retq %res = call <16 x float> @llvm.x86.avx512.mask.vfmadd.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 -1, i32 4) nounwind ret <16 x float> %res @@ -359,7 +359,7 @@ ; CHECK: ## %bb.0: ; CHECK-NEXT: kmovw %edi, %k1 ; CHECK-NEXT: vmovapd %zmm2, %zmm3 -; CHECK-NEXT: vfmsub231pd %zmm1, %zmm0, %zmm3 {%k1} +; CHECK-NEXT: vfmsub231pd {{.*#+}} zmm3 = (zmm0 * zmm1) - zmm3 ; CHECK-NEXT: vfmsub213pd {rn-sae}, %zmm2, %zmm0, %zmm1 ; CHECK-NEXT: vaddpd %zmm1, %zmm3, %zmm0 ; CHECK-NEXT: retq @@ -376,7 +376,7 @@ ; CHECK: ## %bb.0: ; CHECK-NEXT: kmovw %edi, %k1 ; CHECK-NEXT: vmovaps %zmm2, %zmm3 -; CHECK-NEXT: vfmsub231ps %zmm1, %zmm0, %zmm3 {%k1} +; CHECK-NEXT: vfmsub231ps {{.*#+}} zmm3 = (zmm0 * zmm1) - zmm3 ; CHECK-NEXT: vfmsub213ps {rn-sae}, %zmm2, %zmm0, %zmm1 ; CHECK-NEXT: vaddps %zmm1, %zmm3, %zmm0 ; CHECK-NEXT: retq @@ -430,7 +430,7 @@ ; CHECK-LABEL: test_mask_round_vfmadd512_pd_rrb_current: ; CHECK: ## %bb.0: ; CHECK-NEXT: kmovw %edi, %k1 -; CHECK-NEXT: vfmadd132pd %zmm1, %zmm2, %zmm0 {%k1} +; CHECK-NEXT: vfmadd132pd {{.*#+}} zmm0 = (zmm0 * zmm1) + zmm2 ; CHECK-NEXT: retq %res = call <8 x double> @llvm.x86.avx512.mask.vfmadd.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask, i32 4) nounwind ret <8 x double> %res @@ -475,7 +475,7 @@ define <8 x double> @test_mask_round_vfmadd512_pd_rrbz_current(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2) { ; CHECK-LABEL: test_mask_round_vfmadd512_pd_rrbz_current: ; CHECK: ## %bb.0: -; CHECK-NEXT: vfmadd213pd %zmm2, %zmm1, %zmm0 +; CHECK-NEXT: vfmadd213pd {{.*#+}} zmm0 = (zmm1 * zmm0) + zmm2 ; CHECK-NEXT: retq %res = call <8 x double> @llvm.x86.avx512.mask.vfmadd.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 -1, i32 4) nounwind ret <8 x double> %res @@ -486,7 +486,7 @@ ; CHECK: ## %bb.0: ; CHECK-NEXT: kmovw %edi, %k1 ; CHECK-NEXT: vmovapd %zmm0, %zmm3 -; CHECK-NEXT: vfmadd132pd %zmm1, %zmm2, %zmm3 {%k1} +; CHECK-NEXT: vfmadd132pd {{.*#+}} zmm3 = (zmm3 * zmm1) + zmm2 ; CHECK-NEXT: vfmadd213pd {rn-sae}, %zmm2, %zmm0, %zmm1 ; CHECK-NEXT: vaddpd %zmm1, %zmm3, %zmm0 ; CHECK-NEXT: retq @@ -503,7 +503,7 @@ ; CHECK: ## %bb.0: ; CHECK-NEXT: kmovw %edi, %k1 ; CHECK-NEXT: vmovapd %zmm2, %zmm3 -; CHECK-NEXT: vfmadd231pd %zmm1, %zmm0, %zmm3 {%k1} +; CHECK-NEXT: vfmadd231pd {{.*#+}} zmm3 = (zmm0 * zmm1) + zmm3 ; CHECK-NEXT: vfmadd213pd {rn-sae}, %zmm2, %zmm0, %zmm1 ; CHECK-NEXT: vaddpd %zmm1, %zmm3, %zmm0 ; CHECK-NEXT: retq @@ -520,7 +520,7 @@ ; CHECK: ## %bb.0: ; CHECK-NEXT: kmovw %edi, %k1 ; CHECK-NEXT: vmovapd %zmm1, %zmm3 -; CHECK-NEXT: vfmadd213pd %zmm2, %zmm0, %zmm3 {%k1} {z} +; CHECK-NEXT: vfmadd213pd {{.*#+}} zmm3 = (zmm0 * zmm3) + zmm2 ; CHECK-NEXT: vfmadd213pd {rn-sae}, %zmm2, %zmm0, %zmm1 ; CHECK-NEXT: vaddpd %zmm1, %zmm3, %zmm0 ; CHECK-NEXT: retq @@ -535,7 +535,7 @@ ; CHECK: ## %bb.0: ; CHECK-NEXT: kmovw %edi, %k1 ; CHECK-NEXT: vmovaps %zmm0, %zmm3 -; CHECK-NEXT: vfmadd132ps %zmm1, %zmm2, %zmm3 {%k1} +; CHECK-NEXT: vfmadd132ps {{.*#+}} zmm3 = (zmm3 * zmm1) + zmm2 ; CHECK-NEXT: vfmadd213ps {rn-sae}, %zmm2, %zmm0, %zmm1 ; CHECK-NEXT: vaddps %zmm1, %zmm3, %zmm0 ; CHECK-NEXT: retq @@ -552,7 +552,7 @@ ; CHECK: ## %bb.0: ; CHECK-NEXT: kmovw %edi, %k1 ; CHECK-NEXT: vmovaps %zmm2, %zmm3 -; CHECK-NEXT: vfmadd231ps %zmm1, %zmm0, %zmm3 {%k1} +; CHECK-NEXT: vfmadd231ps {{.*#+}} zmm3 = (zmm0 * zmm1) + zmm3 ; CHECK-NEXT: vfmadd213ps {rn-sae}, %zmm2, %zmm0, %zmm1 ; CHECK-NEXT: vaddps %zmm1, %zmm3, %zmm0 ; CHECK-NEXT: retq @@ -569,7 +569,7 @@ ; CHECK: ## %bb.0: ; CHECK-NEXT: kmovw %edi, %k1 ; CHECK-NEXT: vmovaps %zmm1, %zmm3 -; CHECK-NEXT: vfmadd213ps %zmm2, %zmm0, %zmm3 {%k1} {z} +; CHECK-NEXT: vfmadd213ps {{.*#+}} zmm3 = (zmm0 * zmm3) + zmm2 ; CHECK-NEXT: vfmadd213ps {rn-sae}, %zmm2, %zmm0, %zmm1 ; CHECK-NEXT: vaddps %zmm1, %zmm3, %zmm0 ; CHECK-NEXT: retq @@ -624,7 +624,7 @@ ; CHECK-LABEL: test_mask_round_vfnmsub512_pd_rrb_current: ; CHECK: ## %bb.0: ; CHECK-NEXT: kmovw %edi, %k1 -; CHECK-NEXT: vfnmsub132pd %zmm1, %zmm2, %zmm0 {%k1} +; CHECK-NEXT: vfnmsub132pd {{.*#+}} zmm0 = -(zmm0 * zmm1) - zmm2 ; CHECK-NEXT: retq %res = call <8 x double> @llvm.x86.avx512.mask.vfnmsub.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask, i32 4) nounwind ret <8 x double> %res @@ -669,7 +669,7 @@ define <8 x double> @test_mask_round_vfnmsub512_pd_rrbz_current(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2) { ; CHECK-LABEL: test_mask_round_vfnmsub512_pd_rrbz_current: ; CHECK: ## %bb.0: -; CHECK-NEXT: vfnmsub213pd %zmm2, %zmm1, %zmm0 +; CHECK-NEXT: vfnmsub213pd {{.*#+}} zmm0 = -(zmm1 * zmm0) - zmm2 ; CHECK-NEXT: retq %res = call <8 x double> @llvm.x86.avx512.mask.vfnmsub.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 -1, i32 4) nounwind ret <8 x double> %res @@ -680,7 +680,7 @@ ; CHECK: ## %bb.0: ; CHECK-NEXT: kmovw %edi, %k1 ; CHECK-NEXT: vmovapd %zmm0, %zmm3 -; CHECK-NEXT: vfnmsub132pd %zmm1, %zmm2, %zmm3 {%k1} +; CHECK-NEXT: vfnmsub132pd {{.*#+}} zmm3 = -(zmm3 * zmm1) - zmm2 ; CHECK-NEXT: vfnmsub213pd {rn-sae}, %zmm2, %zmm0, %zmm1 ; CHECK-NEXT: vaddpd %zmm1, %zmm3, %zmm0 ; CHECK-NEXT: retq @@ -697,7 +697,7 @@ ; CHECK: ## %bb.0: ; CHECK-NEXT: kmovw %edi, %k1 ; CHECK-NEXT: vmovapd %zmm2, %zmm3 -; CHECK-NEXT: vfnmsub231pd %zmm1, %zmm0, %zmm3 {%k1} +; CHECK-NEXT: vfnmsub231pd {{.*#+}} zmm3 = -(zmm0 * zmm1) - zmm3 ; CHECK-NEXT: vfnmsub213pd {rn-sae}, %zmm2, %zmm0, %zmm1 ; CHECK-NEXT: vaddpd %zmm1, %zmm3, %zmm0 ; CHECK-NEXT: retq @@ -712,7 +712,7 @@ ; CHECK: ## %bb.0: ; CHECK-NEXT: kmovw %edi, %k1 ; CHECK-NEXT: vmovaps %zmm0, %zmm3 -; CHECK-NEXT: vfnmsub132ps %zmm1, %zmm2, %zmm3 {%k1} +; CHECK-NEXT: vfnmsub132ps {{.*#+}} zmm3 = -(zmm3 * zmm1) - zmm2 ; CHECK-NEXT: vfnmsub213ps {rn-sae}, %zmm2, %zmm0, %zmm1 ; CHECK-NEXT: vaddps %zmm1, %zmm3, %zmm0 ; CHECK-NEXT: retq @@ -729,7 +729,7 @@ ; CHECK: ## %bb.0: ; CHECK-NEXT: kmovw %edi, %k1 ; CHECK-NEXT: vmovaps %zmm2, %zmm3 -; CHECK-NEXT: vfnmsub231ps %zmm1, %zmm0, %zmm3 {%k1} +; CHECK-NEXT: vfnmsub231ps {{.*#+}} zmm3 = -(zmm0 * zmm1) - zmm3 ; CHECK-NEXT: vfnmsub213ps {rn-sae}, %zmm2, %zmm0, %zmm1 ; CHECK-NEXT: vaddps %zmm1, %zmm3, %zmm0 ; CHECK-NEXT: retq @@ -744,7 +744,7 @@ ; CHECK: ## %bb.0: ; CHECK-NEXT: kmovw %edi, %k1 ; CHECK-NEXT: vmovapd %zmm0, %zmm3 -; CHECK-NEXT: vfnmadd132pd %zmm1, %zmm2, %zmm3 {%k1} +; CHECK-NEXT: vfnmadd132pd {{.*#+}} zmm3 = -(zmm3 * zmm1) + zmm2 ; CHECK-NEXT: vfnmadd213pd {rn-sae}, %zmm2, %zmm0, %zmm1 ; CHECK-NEXT: vaddpd %zmm1, %zmm3, %zmm0 ; CHECK-NEXT: retq @@ -759,7 +759,7 @@ ; CHECK: ## %bb.0: ; CHECK-NEXT: kmovw %edi, %k1 ; CHECK-NEXT: vmovaps %zmm0, %zmm3 -; CHECK-NEXT: vfnmadd132ps %zmm1, %zmm2, %zmm3 {%k1} +; CHECK-NEXT: vfnmadd132ps {{.*#+}} zmm3 = -(zmm3 * zmm1) + zmm2 ; CHECK-NEXT: vfnmadd213ps {rn-sae}, %zmm2, %zmm0, %zmm1 ; CHECK-NEXT: vaddps %zmm1, %zmm3, %zmm0 ; CHECK-NEXT: retq Index: test/CodeGen/X86/avx512-fma.ll =================================================================== --- test/CodeGen/X86/avx512-fma.ll +++ test/CodeGen/X86/avx512-fma.ll @@ -5,7 +5,7 @@ define <16 x float> @test_x86_fmadd_ps_z(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2) { ; ALL-LABEL: test_x86_fmadd_ps_z: ; ALL: ## %bb.0: -; ALL-NEXT: vfmadd213ps %zmm2, %zmm1, %zmm0 +; ALL-NEXT: vfmadd213ps {{.*#+}} zmm0 = (zmm1 * zmm0) + zmm2 ; ALL-NEXT: retq %x = fmul <16 x float> %a0, %a1 %res = fadd <16 x float> %x, %a2 @@ -15,7 +15,7 @@ define <16 x float> @test_x86_fmsub_ps_z(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2) { ; ALL-LABEL: test_x86_fmsub_ps_z: ; ALL: ## %bb.0: -; ALL-NEXT: vfmsub213ps %zmm2, %zmm1, %zmm0 +; ALL-NEXT: vfmsub213ps {{.*#+}} zmm0 = (zmm1 * zmm0) - zmm2 ; ALL-NEXT: retq %x = fmul <16 x float> %a0, %a1 %res = fsub <16 x float> %x, %a2 @@ -25,7 +25,7 @@ define <16 x float> @test_x86_fnmadd_ps_z(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2) { ; ALL-LABEL: test_x86_fnmadd_ps_z: ; ALL: ## %bb.0: -; ALL-NEXT: vfnmadd213ps %zmm2, %zmm1, %zmm0 +; ALL-NEXT: vfnmadd213ps {{.*#+}} zmm0 = -(zmm1 * zmm0) + zmm2 ; ALL-NEXT: retq %x = fmul <16 x float> %a0, %a1 %res = fsub <16 x float> %a2, %x @@ -35,7 +35,7 @@ define <16 x float> @test_x86_fnmsub_ps_z(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2) { ; ALL-LABEL: test_x86_fnmsub_ps_z: ; ALL: ## %bb.0: -; ALL-NEXT: vfnmsub213ps %zmm2, %zmm1, %zmm0 +; ALL-NEXT: vfnmsub213ps {{.*#+}} zmm0 = -(zmm1 * zmm0) - zmm2 ; ALL-NEXT: retq %x = fmul <16 x float> %a0, %a1 %y = fsub <16 x float> @test_x86_fmadd_pd_z(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2) { ; ALL-LABEL: test_x86_fmadd_pd_z: ; ALL: ## %bb.0: -; ALL-NEXT: vfmadd213pd %zmm2, %zmm1, %zmm0 +; ALL-NEXT: vfmadd213pd {{.*#+}} zmm0 = (zmm1 * zmm0) + zmm2 ; ALL-NEXT: retq %x = fmul <8 x double> %a0, %a1 %res = fadd <8 x double> %x, %a2 @@ -59,7 +59,7 @@ define <8 x double> @test_x86_fmsub_pd_z(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2) { ; ALL-LABEL: test_x86_fmsub_pd_z: ; ALL: ## %bb.0: -; ALL-NEXT: vfmsub213pd %zmm2, %zmm1, %zmm0 +; ALL-NEXT: vfmsub213pd {{.*#+}} zmm0 = (zmm1 * zmm0) - zmm2 ; ALL-NEXT: retq %x = fmul <8 x double> %a0, %a1 %res = fsub <8 x double> %x, %a2 @@ -69,7 +69,7 @@ define double @test_x86_fmsub_213(double %a0, double %a1, double %a2) { ; ALL-LABEL: test_x86_fmsub_213: ; ALL: ## %bb.0: -; ALL-NEXT: vfmsub213sd %xmm2, %xmm1, %xmm0 +; ALL-NEXT: vfmsub213sd {{.*#+}} xmm0 = (xmm1 * xmm0) - xmm2 ; ALL-NEXT: retq %x = fmul double %a0, %a1 %res = fsub double %x, %a2 @@ -79,7 +79,7 @@ define double @test_x86_fmsub_213_m(double %a0, double %a1, double * %a2_ptr) { ; ALL-LABEL: test_x86_fmsub_213_m: ; ALL: ## %bb.0: -; ALL-NEXT: vfmsub213sd (%rdi), %xmm1, %xmm0 +; ALL-NEXT: vfmsub213sd {{.*#+}} xmm0 = (xmm1 * xmm0) - mem ; ALL-NEXT: retq %a2 = load double , double *%a2_ptr %x = fmul double %a0, %a1 @@ -90,7 +90,7 @@ define double @test_x86_fmsub_231_m(double %a0, double %a1, double * %a2_ptr) { ; ALL-LABEL: test_x86_fmsub_231_m: ; ALL: ## %bb.0: -; ALL-NEXT: vfmsub132sd (%rdi), %xmm1, %xmm0 +; ALL-NEXT: vfmsub132sd {{.*#+}} xmm0 = (xmm0 * mem) - xmm1 ; ALL-NEXT: retq %a2 = load double , double *%a2_ptr %x = fmul double %a0, %a2 @@ -101,7 +101,7 @@ define <16 x float> @test231_br(<16 x float> %a1, <16 x float> %a2) nounwind { ; ALL-LABEL: test231_br: ; ALL: ## %bb.0: -; ALL-NEXT: vfmadd132ps {{.*}}(%rip){1to16}, %zmm1, %zmm0 +; ALL-NEXT: vfmadd132ps {{.*#+}} zmm0 = (zmm0 * mem) + zmm1 ; ALL-NEXT: retq %b1 = fmul <16 x float> %a1, %b2 = fadd <16 x float> %b1, %a2 @@ -111,7 +111,7 @@ define <16 x float> @test213_br(<16 x float> %a1, <16 x float> %a2) nounwind { ; ALL-LABEL: test213_br: ; ALL: ## %bb.0: -; ALL-NEXT: vfmadd213ps {{.*}}(%rip){1to16}, %zmm1, %zmm0 +; ALL-NEXT: vfmadd213ps {{.*#+}} zmm0 = (zmm1 * zmm0) + mem ; ALL-NEXT: retq %b1 = fmul <16 x float> %a1, %a2 %b2 = fadd <16 x float> %b1, @@ -125,14 +125,14 @@ ; KNL-NEXT: vpmovsxbd %xmm2, %zmm2 ; KNL-NEXT: vpslld $31, %zmm2, %zmm2 ; KNL-NEXT: vptestmd %zmm2, %zmm2, %k1 -; KNL-NEXT: vfmadd132ps (%rdi), %zmm1, %zmm0 {%k1} +; KNL-NEXT: vfmadd132ps {{.*#+}} zmm0 = (zmm0 * mem) + zmm1 ; KNL-NEXT: retq ; ; SKX-LABEL: test_x86_fmadd132_ps: ; SKX: ## %bb.0: ; SKX-NEXT: vpsllw $7, %xmm2, %xmm2 ; SKX-NEXT: vpmovb2m %xmm2, %k1 -; SKX-NEXT: vfmadd132ps (%rdi), %zmm1, %zmm0 {%k1} +; SKX-NEXT: vfmadd132ps {{.*#+}} zmm0 = (zmm0 * mem) + zmm1 ; SKX-NEXT: retq %a2 = load <16 x float>,<16 x float> *%a2_ptrt,align 1 %x = fmul <16 x float> %a0, %a2 @@ -148,7 +148,7 @@ ; KNL-NEXT: vpmovsxbd %xmm2, %zmm2 ; KNL-NEXT: vpslld $31, %zmm2, %zmm2 ; KNL-NEXT: vptestmd %zmm2, %zmm2, %k1 -; KNL-NEXT: vfmadd231ps (%rdi), %zmm0, %zmm1 {%k1} +; KNL-NEXT: vfmadd231ps {{.*#+}} zmm1 = (zmm0 * mem) + zmm1 ; KNL-NEXT: vmovaps %zmm1, %zmm0 ; KNL-NEXT: retq ; @@ -156,7 +156,7 @@ ; SKX: ## %bb.0: ; SKX-NEXT: vpsllw $7, %xmm2, %xmm2 ; SKX-NEXT: vpmovb2m %xmm2, %k1 -; SKX-NEXT: vfmadd231ps (%rdi), %zmm0, %zmm1 {%k1} +; SKX-NEXT: vfmadd231ps {{.*#+}} zmm1 = (zmm0 * mem) + zmm1 ; SKX-NEXT: vmovaps %zmm1, %zmm0 ; SKX-NEXT: retq %a2 = load <16 x float>,<16 x float> *%a2_ptrt,align 1 @@ -173,7 +173,7 @@ ; KNL-NEXT: vpmovsxbd %xmm2, %zmm2 ; KNL-NEXT: vpslld $31, %zmm2, %zmm2 ; KNL-NEXT: vptestmd %zmm2, %zmm2, %k1 -; KNL-NEXT: vfmadd213ps (%rdi), %zmm0, %zmm1 {%k1} +; KNL-NEXT: vfmadd213ps {{.*#+}} zmm1 = (zmm0 * zmm1) + mem ; KNL-NEXT: vmovaps %zmm1, %zmm0 ; KNL-NEXT: retq ; @@ -181,7 +181,7 @@ ; SKX: ## %bb.0: ; SKX-NEXT: vpsllw $7, %xmm2, %xmm2 ; SKX-NEXT: vpmovb2m %xmm2, %k1 -; SKX-NEXT: vfmadd213ps (%rdi), %zmm0, %zmm1 {%k1} +; SKX-NEXT: vfmadd213ps {{.*#+}} zmm1 = (zmm0 * zmm1) + mem ; SKX-NEXT: vmovaps %zmm1, %zmm0 ; SKX-NEXT: retq %a2 = load <16 x float>,<16 x float> *%a2_ptrt,align 1 Index: test/CodeGen/X86/avx512-gather-scatter-intrin.ll =================================================================== --- test/CodeGen/X86/avx512-gather-scatter-intrin.ll +++ test/CodeGen/X86/avx512-gather-scatter-intrin.ll @@ -17,7 +17,7 @@ ; CHECK-NEXT: kmovd %edi, %k1 ; CHECK-NEXT: kmovq %k1, %k2 ; CHECK-NEXT: vgatherdps (%rsi,%zmm0,4), %zmm1 {%k2} -; CHECK-NEXT: vpaddd {{.*}}(%rip), %zmm0, %zmm0 +; CHECK-NEXT: vpaddd {{.*#+}} zmm0 = zmm0 + [0x00000000,0x00000001,0x00000002,0x00000003,0x00000000,0x00000001,0x00000002,0x00000003,0x00000000,0x00000001,0x00000002,0x00000003,0x00000000,0x00000001,0x00000002,0x00000003] ; CHECK-NEXT: vscatterdps %zmm1, (%rdx,%zmm0,4) {%k1} ; CHECK-NEXT: vzeroupper ; CHECK-NEXT: retq @@ -33,7 +33,7 @@ ; CHECK-NEXT: kmovd %edi, %k1 ; CHECK-NEXT: kmovq %k1, %k2 ; CHECK-NEXT: vgatherdpd (%rsi,%ymm0,4), %zmm1 {%k2} -; CHECK-NEXT: vpaddd {{.*}}(%rip), %ymm0, %ymm0 +; CHECK-NEXT: vpaddd {{.*#+}} ymm0 = ymm0 + [0x00000000,0x00000001,0x00000002,0x00000003,0x00000000,0x00000001,0x00000002,0x00000003] ; CHECK-NEXT: vscatterdpd %zmm1, (%rdx,%ymm0,4) {%k1} ; CHECK-NEXT: vzeroupper ; CHECK-NEXT: retq @@ -49,7 +49,7 @@ ; CHECK-NEXT: kmovd %edi, %k1 ; CHECK-NEXT: kmovq %k1, %k2 ; CHECK-NEXT: vgatherqps (%rsi,%zmm0,4), %ymm1 {%k2} -; CHECK-NEXT: vpaddq {{.*}}(%rip), %zmm0, %zmm0 +; CHECK-NEXT: vpaddq {{.*#+}} zmm0 = zmm0 + [0x0000000000000000,0x0000000000000001,0x0000000000000002,0x0000000000000003,0x0000000000000000,0x0000000000000001,0x0000000000000002,0x0000000000000003] ; CHECK-NEXT: vscatterqps %ymm1, (%rdx,%zmm0,4) {%k1} ; CHECK-NEXT: vzeroupper ; CHECK-NEXT: retq @@ -65,7 +65,7 @@ ; CHECK-NEXT: kmovd %edi, %k1 ; CHECK-NEXT: kmovq %k1, %k2 ; CHECK-NEXT: vgatherqpd (%rsi,%zmm0,4), %zmm1 {%k2} -; CHECK-NEXT: vpaddq {{.*}}(%rip), %zmm0, %zmm0 +; CHECK-NEXT: vpaddq {{.*#+}} zmm0 = zmm0 + [0x0000000000000000,0x0000000000000001,0x0000000000000002,0x0000000000000003,0x0000000000000000,0x0000000000000001,0x0000000000000002,0x0000000000000003] ; CHECK-NEXT: vscatterqpd %zmm1, (%rdx,%zmm0,4) {%k1} ; CHECK-NEXT: vzeroupper ; CHECK-NEXT: retq @@ -93,7 +93,7 @@ ; CHECK-NEXT: kmovd %edi, %k1 ; CHECK-NEXT: kmovq %k1, %k2 ; CHECK-NEXT: vpgatherdd (%rsi,%zmm0,4), %zmm1 {%k2} -; CHECK-NEXT: vpaddd {{.*}}(%rip), %zmm0, %zmm0 +; CHECK-NEXT: vpaddd {{.*#+}} zmm0 = zmm0 + [0x00000000,0x00000001,0x00000002,0x00000003,0x00000000,0x00000001,0x00000002,0x00000003,0x00000000,0x00000001,0x00000002,0x00000003,0x00000000,0x00000001,0x00000002,0x00000003] ; CHECK-NEXT: vpscatterdd %zmm1, (%rdx,%zmm0,4) {%k1} ; CHECK-NEXT: vzeroupper ; CHECK-NEXT: retq @@ -109,7 +109,7 @@ ; CHECK-NEXT: kmovd %edi, %k1 ; CHECK-NEXT: kmovq %k1, %k2 ; CHECK-NEXT: vpgatherqd (%rsi,%zmm0,4), %ymm1 {%k2} -; CHECK-NEXT: vpaddq {{.*}}(%rip), %zmm0, %zmm0 +; CHECK-NEXT: vpaddq {{.*#+}} zmm0 = zmm0 + [0x0000000000000000,0x0000000000000001,0x0000000000000002,0x0000000000000003,0x0000000000000000,0x0000000000000001,0x0000000000000002,0x0000000000000003] ; CHECK-NEXT: vpscatterqd %ymm1, (%rdx,%zmm0,4) {%k1} ; CHECK-NEXT: vzeroupper ; CHECK-NEXT: retq @@ -125,7 +125,7 @@ ; CHECK-NEXT: kmovd %edi, %k1 ; CHECK-NEXT: kmovq %k1, %k2 ; CHECK-NEXT: vpgatherqq (%rsi,%zmm0,4), %zmm1 {%k2} -; CHECK-NEXT: vpaddq {{.*}}(%rip), %zmm0, %zmm0 +; CHECK-NEXT: vpaddq {{.*#+}} zmm0 = zmm0 + [0x0000000000000000,0x0000000000000001,0x0000000000000002,0x0000000000000003,0x0000000000000000,0x0000000000000001,0x0000000000000002,0x0000000000000003] ; CHECK-NEXT: vpscatterqq %zmm1, (%rdx,%zmm0,4) {%k1} ; CHECK-NEXT: vzeroupper ; CHECK-NEXT: retq @@ -141,7 +141,7 @@ ; CHECK-NEXT: kmovd %edi, %k1 ; CHECK-NEXT: kmovq %k1, %k2 ; CHECK-NEXT: vpgatherdq (%rsi,%ymm0,4), %zmm1 {%k2} -; CHECK-NEXT: vpaddd {{.*}}(%rip), %ymm0, %ymm0 +; CHECK-NEXT: vpaddd {{.*#+}} ymm0 = ymm0 + [0x00000000,0x00000001,0x00000002,0x00000003,0x00000000,0x00000001,0x00000002,0x00000003] ; CHECK-NEXT: vpscatterdq %zmm1, (%rdx,%ymm0,4) {%k1} ; CHECK-NEXT: vzeroupper ; CHECK-NEXT: retq @@ -258,7 +258,7 @@ ; CHECK-NEXT: kxnorw %k0, %k0, %k1 ; CHECK-NEXT: kxnorw %k0, %k0, %k2 ; CHECK-NEXT: vgatherqps (%rdi,%zmm0,4), %ymm1 {%k2} -; CHECK-NEXT: vpaddq {{.*}}(%rip), %zmm0, %zmm0 +; CHECK-NEXT: vpaddq {{.*#+}} zmm0 = zmm0 + [0x0000000000000000,0x0000000000000001,0x0000000000000002,0x0000000000000003,0x0000000000000000,0x0000000000000001,0x0000000000000002,0x0000000000000003] ; CHECK-NEXT: vscatterqps %ymm1, (%rsi,%zmm0,4) {%k1} ; CHECK-NEXT: vzeroupper ; CHECK-NEXT: retq Index: test/CodeGen/X86/avx512-mask-op.ll =================================================================== --- test/CodeGen/X86/avx512-mask-op.ll +++ test/CodeGen/X86/avx512-mask-op.ll @@ -1857,8 +1857,8 @@ define <32 x i16> @test_build_vec_v32i1(<32 x i16> %x) { ; KNL-LABEL: test_build_vec_v32i1: ; KNL: ## %bb.0: -; KNL-NEXT: vandps {{.*}}(%rip), %ymm0, %ymm0 -; KNL-NEXT: vandps {{.*}}(%rip), %ymm1, %ymm1 +; KNL-NEXT: vandps {{.*#+}} ymm0 = ymm0 & [0x0000ffff0000ffff,0xffff00000000ffff,0x0000ffff00000000,0x0000ffff0000ffff] +; KNL-NEXT: vandps {{.*#+}} ymm1 = ymm1 & [0x0000ffff0000ffff,0x0000ffff00000000,0xffff00000000ffff,0x0000ffff0000ffff] ; KNL-NEXT: retq ; ; SKX-LABEL: test_build_vec_v32i1: @@ -1877,8 +1877,8 @@ ; ; AVX512DQ-LABEL: test_build_vec_v32i1: ; AVX512DQ: ## %bb.0: -; AVX512DQ-NEXT: vandps {{.*}}(%rip), %ymm0, %ymm0 -; AVX512DQ-NEXT: vandps {{.*}}(%rip), %ymm1, %ymm1 +; AVX512DQ-NEXT: vandps {{.*#+}} ymm0 = ymm0 & [0x0000ffff0000ffff,0xffff00000000ffff,0x0000ffff00000000,0x0000ffff0000ffff] +; AVX512DQ-NEXT: vandps {{.*#+}} ymm1 = ymm1 & [0x0000ffff0000ffff,0x0000ffff00000000,0xffff00000000ffff,0x0000ffff0000ffff] ; AVX512DQ-NEXT: retq ; ; X86-LABEL: test_build_vec_v32i1: @@ -1894,8 +1894,8 @@ define <64 x i8> @test_build_vec_v64i1(<64 x i8> %x) { ; KNL-LABEL: test_build_vec_v64i1: ; KNL: ## %bb.0: -; KNL-NEXT: vandps {{.*}}(%rip), %ymm0, %ymm0 -; KNL-NEXT: vandps {{.*}}(%rip), %ymm1, %ymm1 +; KNL-NEXT: vandps {{.*#+}} ymm0 = ymm0 & [0x00ff000000ff0000,0xff0000ff00ff00ff,0x00ff00ff00ff0000,0x00ff0000ff0000ff] +; KNL-NEXT: vandps {{.*#+}} ymm1 = ymm1 & [0x00ff000000ff00ff,0x00ff00ffff0000ff,0x00ff000000ff00ff,0x00ff00ffff0000ff] ; KNL-NEXT: retq ; ; SKX-LABEL: test_build_vec_v64i1: @@ -1910,8 +1910,8 @@ ; ; AVX512DQ-LABEL: test_build_vec_v64i1: ; AVX512DQ: ## %bb.0: -; AVX512DQ-NEXT: vandps {{.*}}(%rip), %ymm0, %ymm0 -; AVX512DQ-NEXT: vandps {{.*}}(%rip), %ymm1, %ymm1 +; AVX512DQ-NEXT: vandps {{.*#+}} ymm0 = ymm0 & [0x00ff000000ff0000,0xff0000ff00ff00ff,0x00ff00ff00ff0000,0x00ff0000ff0000ff] +; AVX512DQ-NEXT: vandps {{.*#+}} ymm1 = ymm1 & [0x00ff000000ff00ff,0x00ff00ffff0000ff,0x00ff000000ff00ff,0x00ff00ffff0000ff] ; AVX512DQ-NEXT: retq ; ; X86-LABEL: test_build_vec_v64i1: Index: test/CodeGen/X86/avx512-memfold.ll =================================================================== --- test/CodeGen/X86/avx512-memfold.ll +++ test/CodeGen/X86/avx512-memfold.ll @@ -58,7 +58,7 @@ ; CHECK-LABEL: test_int_x86_avx512_mask_vfmadd_sd: ; CHECK: ## %bb.0: ; CHECK-NEXT: kmovw %esi, %k1 -; CHECK-NEXT: vfmadd213sd (%rdi), %xmm1, %xmm0 {%k1} +; CHECK-NEXT: vfmadd213sd {{.*#+}} xmm0 = (xmm1 * xmm0) + mem ; CHECK-NEXT: retq %c.val = load double, double* %c %cv0 = insertelement <2 x double> undef, double %c.val, i32 0 Index: test/CodeGen/X86/avx512-nontemporal.ll =================================================================== --- test/CodeGen/X86/avx512-nontemporal.ll +++ test/CodeGen/X86/avx512-nontemporal.ll @@ -1,33 +1,62 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc < %s -mtriple=x86_64-- -mattr=+avx512f,+avx512bw | FileCheck %s define i32 @f(<16 x float> %A, <16 x float> %AA, i8* %B, <8 x double> %C, <8 x double> %CC, <8 x i64> %E, <8 x i64> %EE, <16 x i32> %F, <16 x i32> %FF, <32 x i16> %G, <32 x i16> %GG, <64 x i8> %H, <64 x i8> %HH, i32 * %loadptr) { -; CHECK: vmovntps %z +; CHECK-LABEL: f: +; CHECK: # %bb.0: +; CHECK-NEXT: pushq %rbp +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: .cfi_offset %rbp, -16 +; CHECK-NEXT: movq %rsp, %rbp +; CHECK-NEXT: .cfi_def_cfa_register %rbp +; CHECK-NEXT: andq $-64, %rsp +; CHECK-NEXT: subq $64, %rsp +; CHECK-NEXT: vmovdqa64 144(%rbp), %zmm8 +; CHECK-NEXT: vmovdqa64 16(%rbp), %zmm9 +; CHECK-NEXT: movl (%rsi), %eax +; CHECK-NEXT: vaddps %zmm1, %zmm0, %zmm0 +; CHECK-NEXT: vmovntps %zmm0, (%rdi) +; CHECK-NEXT: vpaddq %zmm5, %zmm4, %zmm0 +; CHECK-NEXT: addl (%rsi), %eax +; CHECK-NEXT: vmovntdq %zmm0, (%rdi) +; CHECK-NEXT: vaddpd %zmm3, %zmm2, %zmm0 +; CHECK-NEXT: addl (%rsi), %eax +; CHECK-NEXT: vmovntpd %zmm0, (%rdi) +; CHECK-NEXT: vpaddd %zmm7, %zmm6, %zmm0 +; CHECK-NEXT: addl (%rsi), %eax +; CHECK-NEXT: vmovntdq %zmm0, (%rdi) +; CHECK-NEXT: vpaddw 80(%rbp), %zmm9, %zmm0 +; CHECK-NEXT: addl (%rsi), %eax +; CHECK-NEXT: vmovntdq %zmm0, (%rdi) +; CHECK-NEXT: vpaddb 208(%rbp), %zmm8, %zmm0 +; CHECK-NEXT: addl (%rsi), %eax +; CHECK-NEXT: vmovntdq %zmm0, (%rdi) +; CHECK-NEXT: addl (%rsi), %eax +; CHECK-NEXT: movq %rbp, %rsp +; CHECK-NEXT: popq %rbp +; CHECK-NEXT: vzeroupper +; CHECK-NEXT: retq %v0 = load i32, i32* %loadptr, align 1 %cast = bitcast i8* %B to <16 x float>* %A2 = fadd <16 x float> %A, %AA store <16 x float> %A2, <16 x float>* %cast, align 64, !nontemporal !0 %v1 = load i32, i32* %loadptr, align 1 -; CHECK: vmovntdq %z %cast1 = bitcast i8* %B to <8 x i64>* %E2 = add <8 x i64> %E, %EE store <8 x i64> %E2, <8 x i64>* %cast1, align 64, !nontemporal !0 %v2 = load i32, i32* %loadptr, align 1 -; CHECK: vmovntpd %z %cast2 = bitcast i8* %B to <8 x double>* %C2 = fadd <8 x double> %C, %CC store <8 x double> %C2, <8 x double>* %cast2, align 64, !nontemporal !0 %v3 = load i32, i32* %loadptr, align 1 -; CHECK: vmovntdq %z %cast3 = bitcast i8* %B to <16 x i32>* %F2 = add <16 x i32> %F, %FF store <16 x i32> %F2, <16 x i32>* %cast3, align 64, !nontemporal !0 %v4 = load i32, i32* %loadptr, align 1 -; CHECK: vmovntdq %z %cast4 = bitcast i8* %B to <32 x i16>* %G2 = add <32 x i16> %G, %GG store <32 x i16> %G2, <32 x i16>* %cast4, align 64, !nontemporal !0 %v5 = load i32, i32* %loadptr, align 1 -; CHECK: vmovntdq %z %cast5 = bitcast i8* %B to <64 x i8>* %H2 = add <64 x i8> %H, %HH store <64 x i8> %H2, <64 x i8>* %cast5, align 64, !nontemporal !0 Index: test/CodeGen/X86/avx512-scalar_mask.ll =================================================================== --- test/CodeGen/X86/avx512-scalar_mask.ll +++ test/CodeGen/X86/avx512-scalar_mask.ll @@ -8,7 +8,7 @@ ; CHECK-LABEL: test_var_mask: ; CHECK: ## %bb.0: ; CHECK-NEXT: kmovw %edi, %k1 -; CHECK-NEXT: vfmadd213ss %xmm2, %xmm1, %xmm0 {%k1} +; CHECK-NEXT: vfmadd213ss {{.*#+}} xmm0 = (xmm1 * xmm0) + xmm2 ; CHECK-NEXT: retq %res = call <4 x float> @llvm.x86.avx512.mask.vfmadd.ss(<4 x float> %v0,<4 x float> %v1, <4 x float> %v2, i8 %mask, i32 4) ret < 4 x float> %res @@ -18,7 +18,7 @@ ; CHECK-LABEL: test_var_maskz: ; CHECK: ## %bb.0: ; CHECK-NEXT: kmovw %edi, %k1 -; CHECK-NEXT: vfmadd213ss %xmm2, %xmm1, %xmm0 {%k1} {z} +; CHECK-NEXT: vfmadd213ss {{.*#+}} xmm0 = (xmm1 * xmm0) + xmm2 ; CHECK-NEXT: retq %res = call <4 x float> @llvm.x86.avx512.maskz.vfmadd.ss(<4 x float> %v0,<4 x float> %v1, <4 x float> %v2, i8 %mask, i32 4) ret < 4 x float> %res @@ -30,7 +30,7 @@ ; CHECK: ## %bb.0: ; CHECK-NEXT: xorl %eax, %eax ; CHECK-NEXT: kmovw %eax, %k1 -; CHECK-NEXT: vfmadd213ss %xmm2, %xmm1, %xmm0 {%k1} +; CHECK-NEXT: vfmadd213ss {{.*#+}} xmm0 = (xmm1 * xmm0) + xmm2 ; CHECK-NEXT: retq %res = call <4 x float> @llvm.x86.avx512.mask.vfmadd.ss(<4 x float> %v0,<4 x float> %v1, <4 x float> %v2, i8 0, i32 4) ret < 4 x float> %res @@ -42,7 +42,7 @@ ; CHECK: ## %bb.0: ; CHECK-NEXT: xorl %eax, %eax ; CHECK-NEXT: kmovw %eax, %k1 -; CHECK-NEXT: vfmadd213ss %xmm2, %xmm1, %xmm0 {%k1} {z} +; CHECK-NEXT: vfmadd213ss {{.*#+}} xmm0 = (xmm1 * xmm0) + xmm2 ; CHECK-NEXT: retq %res = call <4 x float> @llvm.x86.avx512.maskz.vfmadd.ss(<4 x float> %v0,<4 x float> %v1, <4 x float> %v2, i8 0, i32 4) ret < 4 x float> %res @@ -54,7 +54,7 @@ ; CHECK: ## %bb.0: ; CHECK-NEXT: movb $2, %al ; CHECK-NEXT: kmovw %eax, %k1 -; CHECK-NEXT: vfmadd213ss %xmm2, %xmm1, %xmm0 {%k1} +; CHECK-NEXT: vfmadd213ss {{.*#+}} xmm0 = (xmm1 * xmm0) + xmm2 ; CHECK-NEXT: retq %res = call <4 x float> @llvm.x86.avx512.mask.vfmadd.ss(<4 x float> %v0,<4 x float> %v1, <4 x float> %v2, i8 2, i32 4) ret < 4 x float> %res @@ -66,7 +66,7 @@ ; CHECK: ## %bb.0: ; CHECK-NEXT: movb $2, %al ; CHECK-NEXT: kmovw %eax, %k1 -; CHECK-NEXT: vfmadd213ss %xmm2, %xmm1, %xmm0 {%k1} {z} +; CHECK-NEXT: vfmadd213ss {{.*#+}} xmm0 = (xmm1 * xmm0) + xmm2 ; CHECK-NEXT: retq %res = call <4 x float> @llvm.x86.avx512.maskz.vfmadd.ss(<4 x float> %v0,<4 x float> %v1, <4 x float> %v2, i8 2, i32 4) ret < 4 x float> %res @@ -75,7 +75,7 @@ define <4 x float>@test_const_allone_mask(<4 x float> %v0, <4 x float> %v1, <4 x float> %v2) { ; CHECK-LABEL: test_const_allone_mask: ; CHECK: ## %bb.0: -; CHECK-NEXT: vfmadd213ss %xmm2, %xmm1, %xmm0 +; CHECK-NEXT: vfmadd213ss {{.*#+}} xmm0 = (xmm1 * xmm0) + xmm2 ; CHECK-NEXT: retq %res = call <4 x float> @llvm.x86.avx512.mask.vfmadd.ss(<4 x float> %v0,<4 x float> %v1, <4 x float> %v2, i8 -1, i32 4) ret < 4 x float> %res @@ -84,7 +84,7 @@ define <4 x float>@test_const_allone_maskz(<4 x float> %v0, <4 x float> %v1, <4 x float> %v2) { ; CHECK-LABEL: test_const_allone_maskz: ; CHECK: ## %bb.0: -; CHECK-NEXT: vfmadd213ss %xmm2, %xmm1, %xmm0 +; CHECK-NEXT: vfmadd213ss {{.*#+}} xmm0 = (xmm1 * xmm0) + xmm2 ; CHECK-NEXT: retq %res = call <4 x float> @llvm.x86.avx512.maskz.vfmadd.ss(<4 x float> %v0,<4 x float> %v1, <4 x float> %v2, i8 -1, i32 4) ret < 4 x float> %res @@ -93,7 +93,7 @@ define <4 x float>@test_const_3_mask(<4 x float> %v0, <4 x float> %v1, <4 x float> %v2) { ; CHECK-LABEL: test_const_3_mask: ; CHECK: ## %bb.0: -; CHECK-NEXT: vfmadd213ss %xmm2, %xmm1, %xmm0 +; CHECK-NEXT: vfmadd213ss {{.*#+}} xmm0 = (xmm1 * xmm0) + xmm2 ; CHECK-NEXT: retq %res = call <4 x float> @llvm.x86.avx512.mask.vfmadd.ss(<4 x float> %v0,<4 x float> %v1, <4 x float> %v2, i8 3, i32 4) ret < 4 x float> %res @@ -102,7 +102,7 @@ define <4 x float>@test_const_3_maskz(<4 x float> %v0, <4 x float> %v1, <4 x float> %v2) { ; CHECK-LABEL: test_const_3_maskz: ; CHECK: ## %bb.0: -; CHECK-NEXT: vfmadd213ss %xmm2, %xmm1, %xmm0 +; CHECK-NEXT: vfmadd213ss {{.*#+}} xmm0 = (xmm1 * xmm0) + xmm2 ; CHECK-NEXT: retq %res = call <4 x float> @llvm.x86.avx512.maskz.vfmadd.ss(<4 x float> %v0,<4 x float> %v1, <4 x float> %v2, i8 3, i32 4) ret < 4 x float> %res Index: test/CodeGen/X86/avx512-schedule.ll =================================================================== --- test/CodeGen/X86/avx512-schedule.ll +++ test/CodeGen/X86/avx512-schedule.ll @@ -2589,14 +2589,14 @@ define <8 x double> @ucto8f64(<8 x i8> %a) { ; GENERIC-LABEL: ucto8f64: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0 # sched: [7:0.50] +; GENERIC-NEXT: vpand {{.*#+}} xmm0 = xmm0 & [0x00ff00ff00ff00ff splat] sched: [7:0.50] ; GENERIC-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero sched: [1:1.00] ; GENERIC-NEXT: vcvtdq2pd %ymm0, %zmm0 # sched: [4:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: ucto8f64: ; SKX: # %bb.0: -; SKX-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0 # sched: [7:0.50] +; SKX-NEXT: vpand {{.*#+}} xmm0 = xmm0 & [0x00ff00ff00ff00ff splat] sched: [7:0.50] ; SKX-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero sched: [3:1.00] ; SKX-NEXT: vcvtdq2pd %ymm0, %zmm0 # sched: [7:1.00] ; SKX-NEXT: retq # sched: [7:1.00] Index: test/CodeGen/X86/avx512-vbroadcasti128.ll =================================================================== --- test/CodeGen/X86/avx512-vbroadcasti128.ll +++ test/CodeGen/X86/avx512-vbroadcasti128.ll @@ -23,7 +23,7 @@ ; X64-AVX512-LABEL: test_broadcast_2i64_4i64: ; X64-AVX512: ## %bb.0: ; X64-AVX512-NEXT: vbroadcasti128 {{.*#+}} ymm0 = mem[0,1,0,1] -; X64-AVX512-NEXT: vpaddq {{.*}}(%rip), %ymm0, %ymm0 +; X64-AVX512-NEXT: vpaddq {{.*#+}} ymm0 = ymm0 + [0x0000000000000001,0x0000000000000002,0x0000000000000003,0x0000000000000004] ; X64-AVX512-NEXT: retq %1 = load <2 x i64>, <2 x i64> *%p %2 = shufflevector <2 x i64> %1, <2 x i64> undef, <4 x i32> @@ -47,7 +47,7 @@ ; X64-AVX512-LABEL: test_broadcast_4i32_8i32: ; X64-AVX512: ## %bb.0: ; X64-AVX512-NEXT: vbroadcasti128 {{.*#+}} ymm0 = mem[0,1,0,1] -; X64-AVX512-NEXT: vpaddd {{.*}}(%rip), %ymm0, %ymm0 +; X64-AVX512-NEXT: vpaddd {{.*#+}} ymm0 = ymm0 + [0x00000001,0x00000002,0x00000003,0x00000004,0x00000005,0x00000006,0x00000007,0x00000008] ; X64-AVX512-NEXT: retq %1 = load <4 x i32>, <4 x i32> *%p %2 = shufflevector <4 x i32> %1, <4 x i32> undef, <8 x i32> @@ -59,7 +59,7 @@ ; X64-AVX512-LABEL: test_broadcast_8i16_16i16: ; X64-AVX512: ## %bb.0: ; X64-AVX512-NEXT: vbroadcasti128 {{.*#+}} ymm0 = mem[0,1,0,1] -; X64-AVX512-NEXT: vpaddw {{.*}}(%rip), %ymm0, %ymm0 +; X64-AVX512-NEXT: vpaddw {{.*#+}} ymm0 = ymm0 + [0x0001,0x0002,0x0003,0x0004,0x0005,0x0006,0x0007,0x0008,0x0009,0x000a,0x000b,0x000c,0x000d,0x000e,0x000f,0x0010] ; X64-AVX512-NEXT: retq %1 = load <8 x i16>, <8 x i16> *%p %2 = shufflevector <8 x i16> %1, <8 x i16> undef, <16 x i32> @@ -71,7 +71,7 @@ ; X64-AVX512-LABEL: test_broadcast_16i8_32i8: ; X64-AVX512: ## %bb.0: ; X64-AVX512-NEXT: vbroadcasti128 {{.*#+}} ymm0 = mem[0,1,0,1] -; X64-AVX512-NEXT: vpaddb {{.*}}(%rip), %ymm0, %ymm0 +; X64-AVX512-NEXT: vpaddb {{.*#+}} ymm0 = ymm0 + [0x01,0x02,0x03,0x04,0x05,0x06,0x07,0x08,0x09,0x0a,0x0b,0x0c,0x0d,0x0e,0x0f,0x10,0x11,0x12,0x13,0x14,0x15,0x16,0x17,0x18,0x19,0x1a,0x1b,0x1c,0x1d,0x1e,0x1f,0x20] ; X64-AVX512-NEXT: retq %1 = load <16 x i8>, <16 x i8> *%p %2 = shufflevector <16 x i8> %1, <16 x i8> undef, <32 x i32> @@ -99,7 +99,7 @@ ; X64-AVX512-LABEL: test_broadcast_2i64_8i64: ; X64-AVX512: ## %bb.0: ; X64-AVX512-NEXT: vbroadcasti32x4 {{.*#+}} zmm0 = mem[0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3] -; X64-AVX512-NEXT: vpaddq {{.*}}(%rip), %zmm0, %zmm0 +; X64-AVX512-NEXT: vpaddq {{.*#+}} zmm0 = zmm0 + [0x0000000000000001,0x0000000000000002,0x0000000000000003,0x0000000000000004,0x0000000000000005,0x0000000000000006,0x0000000000000007,0x0000000000000008] ; X64-AVX512-NEXT: retq %1 = load <2 x i64>, <2 x i64> *%p %2 = shufflevector <2 x i64> %1, <2 x i64> undef, <8 x i32> @@ -123,7 +123,7 @@ ; X64-AVX512-LABEL: test_broadcast_4i32_16i32: ; X64-AVX512: ## %bb.0: ; X64-AVX512-NEXT: vbroadcasti32x4 {{.*#+}} zmm0 = mem[0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3] -; X64-AVX512-NEXT: vpaddd {{.*}}(%rip), %zmm0, %zmm0 +; X64-AVX512-NEXT: vpaddd {{.*#+}} zmm0 = zmm0 + [0x00000001,0x00000002,0x00000003,0x00000004,0x00000005,0x00000006,0x00000007,0x00000008,0x00000009,0x0000000a,0x0000000b,0x0000000c,0x0000000d,0x0000000e,0x0000000f,0x00000010] ; X64-AVX512-NEXT: retq %1 = load <4 x i32>, <4 x i32> *%p %2 = shufflevector <4 x i32> %1, <4 x i32> undef, <16 x i32> @@ -135,21 +135,21 @@ ; X64-AVX512VL-LABEL: test_broadcast_8i16_32i16: ; X64-AVX512VL: ## %bb.0: ; X64-AVX512VL-NEXT: vbroadcasti128 {{.*#+}} ymm1 = mem[0,1,0,1] -; X64-AVX512VL-NEXT: vpaddw {{.*}}(%rip), %ymm1, %ymm0 -; X64-AVX512VL-NEXT: vpaddw {{.*}}(%rip), %ymm1, %ymm1 +; X64-AVX512VL-NEXT: vpaddw {{.*#+}} ymm0 = ymm1 + [0x0001,0x0002,0x0003,0x0004,0x0005,0x0006,0x0007,0x0008,0x0009,0x000a,0x000b,0x000c,0x000d,0x000e,0x000f,0x0010] +; X64-AVX512VL-NEXT: vpaddw {{.*#+}} ymm1 = ymm1 + [0x0011,0x0012,0x0013,0x0014,0x0015,0x0016,0x0017,0x0018,0x0019,0x001a,0x001b,0x001c,0x001d,0x001e,0x001f,0x0020] ; X64-AVX512VL-NEXT: retq ; ; X64-AVX512BWVL-LABEL: test_broadcast_8i16_32i16: ; X64-AVX512BWVL: ## %bb.0: ; X64-AVX512BWVL-NEXT: vbroadcasti32x4 {{.*#+}} zmm0 = mem[0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3] -; X64-AVX512BWVL-NEXT: vpaddw {{.*}}(%rip), %zmm0, %zmm0 +; X64-AVX512BWVL-NEXT: vpaddw {{.*#+}} zmm0 = zmm0 + [0x0001,0x0002,0x0003,0x0004,0x0005,0x0006,0x0007,0x0008,0x0009,0x000a,0x000b,0x000c,0x000d,0x000e,0x000f,0x0010,0x0011,0x0012,0x0013,0x0014,0x0015,0x0016,0x0017,0x0018,0x0019,0x001a,0x001b,0x001c,0x001d,0x001e,0x001f,0x0020] ; X64-AVX512BWVL-NEXT: retq ; ; X64-AVX512DQVL-LABEL: test_broadcast_8i16_32i16: ; X64-AVX512DQVL: ## %bb.0: ; X64-AVX512DQVL-NEXT: vbroadcasti128 {{.*#+}} ymm1 = mem[0,1,0,1] -; X64-AVX512DQVL-NEXT: vpaddw {{.*}}(%rip), %ymm1, %ymm0 -; X64-AVX512DQVL-NEXT: vpaddw {{.*}}(%rip), %ymm1, %ymm1 +; X64-AVX512DQVL-NEXT: vpaddw {{.*#+}} ymm0 = ymm1 + [0x0001,0x0002,0x0003,0x0004,0x0005,0x0006,0x0007,0x0008,0x0009,0x000a,0x000b,0x000c,0x000d,0x000e,0x000f,0x0010] +; X64-AVX512DQVL-NEXT: vpaddw {{.*#+}} ymm1 = ymm1 + [0x0011,0x0012,0x0013,0x0014,0x0015,0x0016,0x0017,0x0018,0x0019,0x001a,0x001b,0x001c,0x001d,0x001e,0x001f,0x0020] ; X64-AVX512DQVL-NEXT: retq %1 = load <8 x i16>, <8 x i16> *%p %2 = shufflevector <8 x i16> %1, <8 x i16> undef, <32 x i32> @@ -161,21 +161,21 @@ ; X64-AVX512VL-LABEL: test_broadcast_16i8_64i8: ; X64-AVX512VL: ## %bb.0: ; X64-AVX512VL-NEXT: vbroadcasti128 {{.*#+}} ymm1 = mem[0,1,0,1] -; X64-AVX512VL-NEXT: vpaddb {{.*}}(%rip), %ymm1, %ymm0 -; X64-AVX512VL-NEXT: vpaddb {{.*}}(%rip), %ymm1, %ymm1 +; X64-AVX512VL-NEXT: vpaddb {{.*#+}} ymm0 = ymm1 + [0x01,0x02,0x03,0x04,0x05,0x06,0x07,0x08,0x09,0x0a,0x0b,0x0c,0x0d,0x0e,0x0f,0x10,0x11,0x12,0x13,0x14,0x15,0x16,0x17,0x18,0x19,0x1a,0x1b,0x1c,0x1d,0x1e,0x1f,0x20] +; X64-AVX512VL-NEXT: vpaddb {{.*#+}} ymm1 = ymm1 + [0x21,0x22,0x23,0x24,0x25,0x26,0x27,0x28,0x29,0x2a,0x2b,0x2c,0x2d,0x2e,0x2f,0x30,0x31,0x32,0x33,0x34,0x35,0x36,0x37,0x38,0x39,0x3a,0x3b,0x3c,0x3d,0x3e,0x3f,0x40] ; X64-AVX512VL-NEXT: retq ; ; X64-AVX512BWVL-LABEL: test_broadcast_16i8_64i8: ; X64-AVX512BWVL: ## %bb.0: ; X64-AVX512BWVL-NEXT: vbroadcasti32x4 {{.*#+}} zmm0 = mem[0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3] -; X64-AVX512BWVL-NEXT: vpaddb {{.*}}(%rip), %zmm0, %zmm0 +; X64-AVX512BWVL-NEXT: vpaddb {{.*#+}} zmm0 = zmm0 + [0x01,0x02,0x03,0x04,0x05,0x06,0x07,0x08,0x09,0x0a,0x0b,0x0c,0x0d,0x0e,0x0f,0x10,0x11,0x12,0x13,0x14,0x15,0x16,0x17,0x18,0x19,0x1a,0x1b,0x1c,0x1d,0x1e,0x1f,0x20,0x21,0x22,0x23,0x24,0x25,0x26,0x27,0x28,0x29,0x2a,0x2b,0x2c,0x2d,0x2e,0x2f,0x30,0x31,0x32,0x33,0x34,0x35,0x36,0x37,0x38,0x39,0x3a,0x3b,0x3c,0x3d,0x3e,0x3f,0x40] ; X64-AVX512BWVL-NEXT: retq ; ; X64-AVX512DQVL-LABEL: test_broadcast_16i8_64i8: ; X64-AVX512DQVL: ## %bb.0: ; X64-AVX512DQVL-NEXT: vbroadcasti128 {{.*#+}} ymm1 = mem[0,1,0,1] -; X64-AVX512DQVL-NEXT: vpaddb {{.*}}(%rip), %ymm1, %ymm0 -; X64-AVX512DQVL-NEXT: vpaddb {{.*}}(%rip), %ymm1, %ymm1 +; X64-AVX512DQVL-NEXT: vpaddb {{.*#+}} ymm0 = ymm1 + [0x01,0x02,0x03,0x04,0x05,0x06,0x07,0x08,0x09,0x0a,0x0b,0x0c,0x0d,0x0e,0x0f,0x10,0x11,0x12,0x13,0x14,0x15,0x16,0x17,0x18,0x19,0x1a,0x1b,0x1c,0x1d,0x1e,0x1f,0x20] +; X64-AVX512DQVL-NEXT: vpaddb {{.*#+}} ymm1 = ymm1 + [0x21,0x22,0x23,0x24,0x25,0x26,0x27,0x28,0x29,0x2a,0x2b,0x2c,0x2d,0x2e,0x2f,0x30,0x31,0x32,0x33,0x34,0x35,0x36,0x37,0x38,0x39,0x3a,0x3b,0x3c,0x3d,0x3e,0x3f,0x40] ; X64-AVX512DQVL-NEXT: retq %1 = load <16 x i8>, <16 x i8> *%p %2 = shufflevector <16 x i8> %1, <16 x i8> undef, <64 x i32> Index: test/CodeGen/X86/avx512-vbroadcasti256.ll =================================================================== --- test/CodeGen/X86/avx512-vbroadcasti256.ll +++ test/CodeGen/X86/avx512-vbroadcasti256.ll @@ -19,7 +19,7 @@ ; X64-AVX512-LABEL: test_broadcast_4i64_8i64: ; X64-AVX512: ## %bb.0: ; X64-AVX512-NEXT: vbroadcasti64x4 {{.*#+}} zmm0 = mem[0,1,2,3,0,1,2,3] -; X64-AVX512-NEXT: vpaddq {{.*}}(%rip), %zmm0, %zmm0 +; X64-AVX512-NEXT: vpaddq {{.*#+}} zmm0 = zmm0 + [0x0000000000000001,0x0000000000000002,0x0000000000000003,0x0000000000000004,0x0000000000000005,0x0000000000000006,0x0000000000000007,0x0000000000000008] ; X64-AVX512-NEXT: retq %1 = load <4 x i64>, <4 x i64> *%p %2 = shufflevector <4 x i64> %1, <4 x i64> undef, <8 x i32> @@ -43,7 +43,7 @@ ; X64-AVX512-LABEL: test_broadcast_8i32_16i32: ; X64-AVX512: ## %bb.0: ; X64-AVX512-NEXT: vbroadcasti64x4 {{.*#+}} zmm0 = mem[0,1,2,3,0,1,2,3] -; X64-AVX512-NEXT: vpaddd {{.*}}(%rip), %zmm0, %zmm0 +; X64-AVX512-NEXT: vpaddd {{.*#+}} zmm0 = zmm0 + [0x00000001,0x00000002,0x00000003,0x00000004,0x00000005,0x00000006,0x00000007,0x00000008,0x00000009,0x0000000a,0x0000000b,0x0000000c,0x0000000d,0x0000000e,0x0000000f,0x00000010] ; X64-AVX512-NEXT: retq %1 = load <8 x i32>, <8 x i32> *%p %2 = shufflevector <8 x i32> %1, <8 x i32> undef, <16 x i32> @@ -55,21 +55,21 @@ ; X64-AVX512VL-LABEL: test_broadcast_16i16_32i16: ; X64-AVX512VL: ## %bb.0: ; X64-AVX512VL-NEXT: vmovdqa (%rdi), %ymm1 -; X64-AVX512VL-NEXT: vpaddw {{.*}}(%rip), %ymm1, %ymm0 -; X64-AVX512VL-NEXT: vpaddw {{.*}}(%rip), %ymm1, %ymm1 +; X64-AVX512VL-NEXT: vpaddw {{.*#+}} ymm0 = ymm1 + [0x0001,0x0002,0x0003,0x0004,0x0005,0x0006,0x0007,0x0008,0x0009,0x000a,0x000b,0x000c,0x000d,0x000e,0x000f,0x0010] +; X64-AVX512VL-NEXT: vpaddw {{.*#+}} ymm1 = ymm1 + [0x0011,0x0012,0x0013,0x0014,0x0015,0x0016,0x0017,0x0018,0x0019,0x001a,0x001b,0x001c,0x001d,0x001e,0x001f,0x0020] ; X64-AVX512VL-NEXT: retq ; ; X64-AVX512BWVL-LABEL: test_broadcast_16i16_32i16: ; X64-AVX512BWVL: ## %bb.0: ; X64-AVX512BWVL-NEXT: vbroadcasti64x4 {{.*#+}} zmm0 = mem[0,1,2,3,0,1,2,3] -; X64-AVX512BWVL-NEXT: vpaddw {{.*}}(%rip), %zmm0, %zmm0 +; X64-AVX512BWVL-NEXT: vpaddw {{.*#+}} zmm0 = zmm0 + [0x0001,0x0002,0x0003,0x0004,0x0005,0x0006,0x0007,0x0008,0x0009,0x000a,0x000b,0x000c,0x000d,0x000e,0x000f,0x0010,0x0011,0x0012,0x0013,0x0014,0x0015,0x0016,0x0017,0x0018,0x0019,0x001a,0x001b,0x001c,0x001d,0x001e,0x001f,0x0020] ; X64-AVX512BWVL-NEXT: retq ; ; X64-AVX512DQVL-LABEL: test_broadcast_16i16_32i16: ; X64-AVX512DQVL: ## %bb.0: ; X64-AVX512DQVL-NEXT: vmovdqa (%rdi), %ymm1 -; X64-AVX512DQVL-NEXT: vpaddw {{.*}}(%rip), %ymm1, %ymm0 -; X64-AVX512DQVL-NEXT: vpaddw {{.*}}(%rip), %ymm1, %ymm1 +; X64-AVX512DQVL-NEXT: vpaddw {{.*#+}} ymm0 = ymm1 + [0x0001,0x0002,0x0003,0x0004,0x0005,0x0006,0x0007,0x0008,0x0009,0x000a,0x000b,0x000c,0x000d,0x000e,0x000f,0x0010] +; X64-AVX512DQVL-NEXT: vpaddw {{.*#+}} ymm1 = ymm1 + [0x0011,0x0012,0x0013,0x0014,0x0015,0x0016,0x0017,0x0018,0x0019,0x001a,0x001b,0x001c,0x001d,0x001e,0x001f,0x0020] ; X64-AVX512DQVL-NEXT: retq %1 = load <16 x i16>, <16 x i16> *%p %2 = shufflevector <16 x i16> %1, <16 x i16> undef, <32 x i32> @@ -81,21 +81,21 @@ ; X64-AVX512VL-LABEL: test_broadcast_32i8_64i8: ; X64-AVX512VL: ## %bb.0: ; X64-AVX512VL-NEXT: vmovdqa (%rdi), %ymm1 -; X64-AVX512VL-NEXT: vpaddb {{.*}}(%rip), %ymm1, %ymm0 -; X64-AVX512VL-NEXT: vpaddb {{.*}}(%rip), %ymm1, %ymm1 +; X64-AVX512VL-NEXT: vpaddb {{.*#+}} ymm0 = ymm1 + [0x01,0x02,0x03,0x04,0x05,0x06,0x07,0x08,0x09,0x0a,0x0b,0x0c,0x0d,0x0e,0x0f,0x10,0x11,0x12,0x13,0x14,0x15,0x16,0x17,0x18,0x19,0x1a,0x1b,0x1c,0x1d,0x1e,0x1f,0x20] +; X64-AVX512VL-NEXT: vpaddb {{.*#+}} ymm1 = ymm1 + [0x21,0x22,0x23,0x24,0x25,0x26,0x27,0x28,0x29,0x2a,0x2b,0x2c,0x2d,0x2e,0x2f,0x30,0x31,0x32,0x33,0x34,0x35,0x36,0x37,0x38,0x39,0x3a,0x3b,0x3c,0x3d,0x3e,0x3f,0x40] ; X64-AVX512VL-NEXT: retq ; ; X64-AVX512BWVL-LABEL: test_broadcast_32i8_64i8: ; X64-AVX512BWVL: ## %bb.0: ; X64-AVX512BWVL-NEXT: vbroadcasti64x4 {{.*#+}} zmm0 = mem[0,1,2,3,0,1,2,3] -; X64-AVX512BWVL-NEXT: vpaddb {{.*}}(%rip), %zmm0, %zmm0 +; X64-AVX512BWVL-NEXT: vpaddb {{.*#+}} zmm0 = zmm0 + [0x01,0x02,0x03,0x04,0x05,0x06,0x07,0x08,0x09,0x0a,0x0b,0x0c,0x0d,0x0e,0x0f,0x10,0x11,0x12,0x13,0x14,0x15,0x16,0x17,0x18,0x19,0x1a,0x1b,0x1c,0x1d,0x1e,0x1f,0x20,0x21,0x22,0x23,0x24,0x25,0x26,0x27,0x28,0x29,0x2a,0x2b,0x2c,0x2d,0x2e,0x2f,0x30,0x31,0x32,0x33,0x34,0x35,0x36,0x37,0x38,0x39,0x3a,0x3b,0x3c,0x3d,0x3e,0x3f,0x40] ; X64-AVX512BWVL-NEXT: retq ; ; X64-AVX512DQVL-LABEL: test_broadcast_32i8_64i8: ; X64-AVX512DQVL: ## %bb.0: ; X64-AVX512DQVL-NEXT: vmovdqa (%rdi), %ymm1 -; X64-AVX512DQVL-NEXT: vpaddb {{.*}}(%rip), %ymm1, %ymm0 -; X64-AVX512DQVL-NEXT: vpaddb {{.*}}(%rip), %ymm1, %ymm1 +; X64-AVX512DQVL-NEXT: vpaddb {{.*#+}} ymm0 = ymm1 + [0x01,0x02,0x03,0x04,0x05,0x06,0x07,0x08,0x09,0x0a,0x0b,0x0c,0x0d,0x0e,0x0f,0x10,0x11,0x12,0x13,0x14,0x15,0x16,0x17,0x18,0x19,0x1a,0x1b,0x1c,0x1d,0x1e,0x1f,0x20] +; X64-AVX512DQVL-NEXT: vpaddb {{.*#+}} ymm1 = ymm1 + [0x21,0x22,0x23,0x24,0x25,0x26,0x27,0x28,0x29,0x2a,0x2b,0x2c,0x2d,0x2e,0x2f,0x30,0x31,0x32,0x33,0x34,0x35,0x36,0x37,0x38,0x39,0x3a,0x3b,0x3c,0x3d,0x3e,0x3f,0x40] ; X64-AVX512DQVL-NEXT: retq %1 = load <32 x i8>, <32 x i8> *%p %2 = shufflevector <32 x i8> %1, <32 x i8> undef, <64 x i32> Index: test/CodeGen/X86/avx512-vec-cmp.ll =================================================================== --- test/CodeGen/X86/avx512-vec-cmp.ll +++ test/CodeGen/X86/avx512-vec-cmp.ll @@ -941,7 +941,8 @@ ; AVX512-NEXT: vcmpeqps %xmm1, %xmm0, %xmm0 ## encoding: [0xc5,0xf8,0xc2,0xc1,0x00] ; AVX512-NEXT: vpmovzxdq %xmm0, %xmm0 ## encoding: [0xc4,0xe2,0x79,0x35,0xc0] ; AVX512-NEXT: ## xmm0 = xmm0[0],zero,xmm0[1],zero -; AVX512-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xdb,0x05,A,A,A,A] +; AVX512-NEXT: vpand {{.*#+}} xmm0 = xmm0 & [0x0000000000000001 splat] +; AVX512-NEXT: ## encoding: [0xc5,0xf9,0xdb,0x05,A,A,A,A] ; AVX512-NEXT: ## fixup A - offset: 4, value: LCPI47_0-4, kind: reloc_riprel_4byte ; AVX512-NEXT: retq ## encoding: [0xc3] ; Index: test/CodeGen/X86/avx512-vpclmulqdq.ll =================================================================== --- test/CodeGen/X86/avx512-vpclmulqdq.ll +++ test/CodeGen/X86/avx512-vpclmulqdq.ll @@ -1,3 +1,4 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -mattr=+avx512f,+vpclmulqdq -show-mc-encoding | FileCheck %s --check-prefix=AVX512_VPCLMULQDQ define <8 x i64> @test_x86_pclmulqdq(<8 x i64> %a0, <8 x i64> %a1) { Index: test/CodeGen/X86/vector-shift-shl-128.ll =================================================================== --- test/CodeGen/X86/vector-shift-shl-128.ll +++ test/CodeGen/X86/vector-shift-shl-128.ll @@ -85,7 +85,7 @@ ; SSE2-LABEL: var_shift_v4i32: ; SSE2: # %bb.0: ; SSE2-NEXT: pslld $23, %xmm1 -; SSE2-NEXT: paddd {{.*}}(%rip), %xmm1 +; SSE2-NEXT: paddd {{.*#+}} xmm1 = xmm1 + [0x3f800000 splat] ; SSE2-NEXT: cvttps2dq %xmm1, %xmm1 ; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm1[1,1,3,3] ; SSE2-NEXT: pmuludq %xmm0, %xmm1 @@ -100,7 +100,7 @@ ; SSE41-LABEL: var_shift_v4i32: ; SSE41: # %bb.0: ; SSE41-NEXT: pslld $23, %xmm1 -; SSE41-NEXT: paddd {{.*}}(%rip), %xmm1 +; SSE41-NEXT: paddd {{.*#+}} xmm1 = xmm1 + [0x3f800000 splat] ; SSE41-NEXT: cvttps2dq %xmm1, %xmm1 ; SSE41-NEXT: pmulld %xmm1, %xmm0 ; SSE41-NEXT: retq @@ -108,7 +108,7 @@ ; AVX1-LABEL: var_shift_v4i32: ; AVX1: # %bb.0: ; AVX1-NEXT: vpslld $23, %xmm1, %xmm1 -; AVX1-NEXT: vpaddd {{.*}}(%rip), %xmm1, %xmm1 +; AVX1-NEXT: vpaddd {{.*#+}} xmm1 = xmm1 + [0x3f800000 splat] ; AVX1-NEXT: vcvttps2dq %xmm1, %xmm1 ; AVX1-NEXT: vpmulld %xmm0, %xmm1, %xmm0 ; AVX1-NEXT: retq @@ -141,7 +141,7 @@ ; X32-SSE-LABEL: var_shift_v4i32: ; X32-SSE: # %bb.0: ; X32-SSE-NEXT: pslld $23, %xmm1 -; X32-SSE-NEXT: paddd {{\.LCPI.*}}, %xmm1 +; X32-SSE-NEXT: paddd {{.*#+}} xmm1 = xmm1 + [0x3f800000 splat] ; X32-SSE-NEXT: cvttps2dq %xmm1, %xmm1 ; X32-SSE-NEXT: pshufd {{.*#+}} xmm2 = xmm1[1,1,3,3] ; X32-SSE-NEXT: pmuludq %xmm0, %xmm1 @@ -337,7 +337,7 @@ ; SSE2-NEXT: movdqa %xmm3, %xmm4 ; SSE2-NEXT: pandn %xmm0, %xmm4 ; SSE2-NEXT: psllw $4, %xmm0 -; SSE2-NEXT: pand {{.*}}(%rip), %xmm0 +; SSE2-NEXT: pand {{.*#+}} xmm0 = xmm0 & [0xf0f0f0f0f0f0f0f0 splat] ; SSE2-NEXT: pand %xmm3, %xmm0 ; SSE2-NEXT: por %xmm4, %xmm0 ; SSE2-NEXT: paddb %xmm1, %xmm1 @@ -346,7 +346,7 @@ ; SSE2-NEXT: movdqa %xmm3, %xmm4 ; SSE2-NEXT: pandn %xmm0, %xmm4 ; SSE2-NEXT: psllw $2, %xmm0 -; SSE2-NEXT: pand {{.*}}(%rip), %xmm0 +; SSE2-NEXT: pand {{.*#+}} xmm0 = xmm0 & [0xfcfcfcfcfcfcfcfc splat] ; SSE2-NEXT: pand %xmm3, %xmm0 ; SSE2-NEXT: por %xmm4, %xmm0 ; SSE2-NEXT: paddb %xmm1, %xmm1 @@ -364,12 +364,12 @@ ; SSE41-NEXT: psllw $5, %xmm1 ; SSE41-NEXT: movdqa %xmm0, %xmm3 ; SSE41-NEXT: psllw $4, %xmm3 -; SSE41-NEXT: pand {{.*}}(%rip), %xmm3 +; SSE41-NEXT: pand {{.*#+}} xmm3 = xmm3 & [0xf0f0f0f0f0f0f0f0 splat] ; SSE41-NEXT: movdqa %xmm1, %xmm0 ; SSE41-NEXT: pblendvb %xmm0, %xmm3, %xmm2 ; SSE41-NEXT: movdqa %xmm2, %xmm3 ; SSE41-NEXT: psllw $2, %xmm3 -; SSE41-NEXT: pand {{.*}}(%rip), %xmm3 +; SSE41-NEXT: pand {{.*#+}} xmm3 = xmm3 & [0xfcfcfcfcfcfcfcfc splat] ; SSE41-NEXT: paddb %xmm1, %xmm1 ; SSE41-NEXT: movdqa %xmm1, %xmm0 ; SSE41-NEXT: pblendvb %xmm0, %xmm3, %xmm2 @@ -385,10 +385,10 @@ ; AVX: # %bb.0: ; AVX-NEXT: vpsllw $5, %xmm1, %xmm1 ; AVX-NEXT: vpsllw $4, %xmm0, %xmm2 -; AVX-NEXT: vpand {{.*}}(%rip), %xmm2, %xmm2 +; AVX-NEXT: vpand {{.*#+}} xmm2 = xmm2 & [0xf0f0f0f0f0f0f0f0 splat] ; AVX-NEXT: vpblendvb %xmm1, %xmm2, %xmm0, %xmm0 ; AVX-NEXT: vpsllw $2, %xmm0, %xmm2 -; AVX-NEXT: vpand {{.*}}(%rip), %xmm2, %xmm2 +; AVX-NEXT: vpand {{.*#+}} xmm2 = xmm2 & [0xfcfcfcfcfcfcfcfc splat] ; AVX-NEXT: vpaddb %xmm1, %xmm1, %xmm1 ; AVX-NEXT: vpblendvb %xmm1, %xmm2, %xmm0, %xmm0 ; AVX-NEXT: vpaddb %xmm0, %xmm0, %xmm2 @@ -447,7 +447,7 @@ ; X32-SSE-NEXT: movdqa %xmm3, %xmm4 ; X32-SSE-NEXT: pandn %xmm0, %xmm4 ; X32-SSE-NEXT: psllw $4, %xmm0 -; X32-SSE-NEXT: pand {{\.LCPI.*}}, %xmm0 +; X32-SSE-NEXT: pand {{.*#+}} xmm0 = xmm0 & [0xf0f0f0f0f0f0f0f0 splat] ; X32-SSE-NEXT: pand %xmm3, %xmm0 ; X32-SSE-NEXT: por %xmm4, %xmm0 ; X32-SSE-NEXT: paddb %xmm1, %xmm1 @@ -456,7 +456,7 @@ ; X32-SSE-NEXT: movdqa %xmm3, %xmm4 ; X32-SSE-NEXT: pandn %xmm0, %xmm4 ; X32-SSE-NEXT: psllw $2, %xmm0 -; X32-SSE-NEXT: pand {{\.LCPI.*}}, %xmm0 +; X32-SSE-NEXT: pand {{.*#+}} xmm0 = xmm0 & [0xfcfcfcfcfcfcfcfc splat] ; X32-SSE-NEXT: pand %xmm3, %xmm0 ; X32-SSE-NEXT: por %xmm4, %xmm0 ; X32-SSE-NEXT: paddb %xmm1, %xmm1 @@ -621,7 +621,7 @@ ; SSE2-NEXT: movdqa %xmm3, %xmm4 ; SSE2-NEXT: pandn %xmm0, %xmm4 ; SSE2-NEXT: psllw $4, %xmm0 -; SSE2-NEXT: pand {{.*}}(%rip), %xmm0 +; SSE2-NEXT: pand {{.*#+}} xmm0 = xmm0 & [0xf0f0f0f0f0f0f0f0 splat] ; SSE2-NEXT: pand %xmm3, %xmm0 ; SSE2-NEXT: por %xmm4, %xmm0 ; SSE2-NEXT: paddb %xmm2, %xmm2 @@ -630,7 +630,7 @@ ; SSE2-NEXT: movdqa %xmm3, %xmm4 ; SSE2-NEXT: pandn %xmm0, %xmm4 ; SSE2-NEXT: psllw $2, %xmm0 -; SSE2-NEXT: pand {{.*}}(%rip), %xmm0 +; SSE2-NEXT: pand {{.*#+}} xmm0 = xmm0 & [0xfcfcfcfcfcfcfcfc splat] ; SSE2-NEXT: pand %xmm3, %xmm0 ; SSE2-NEXT: por %xmm4, %xmm0 ; SSE2-NEXT: paddb %xmm2, %xmm2 @@ -652,12 +652,12 @@ ; SSE41-NEXT: paddb %xmm1, %xmm3 ; SSE41-NEXT: movdqa %xmm2, %xmm4 ; SSE41-NEXT: psllw $4, %xmm4 -; SSE41-NEXT: pand {{.*}}(%rip), %xmm4 +; SSE41-NEXT: pand {{.*#+}} xmm4 = xmm4 & [0xf0f0f0f0f0f0f0f0 splat] ; SSE41-NEXT: movdqa %xmm1, %xmm0 ; SSE41-NEXT: pblendvb %xmm0, %xmm4, %xmm2 ; SSE41-NEXT: movdqa %xmm2, %xmm1 ; SSE41-NEXT: psllw $2, %xmm1 -; SSE41-NEXT: pand {{.*}}(%rip), %xmm1 +; SSE41-NEXT: pand {{.*#+}} xmm1 = xmm1 & [0xfcfcfcfcfcfcfcfc splat] ; SSE41-NEXT: movdqa %xmm3, %xmm0 ; SSE41-NEXT: pblendvb %xmm0, %xmm1, %xmm2 ; SSE41-NEXT: movdqa %xmm2, %xmm1 @@ -675,10 +675,10 @@ ; AVX1-NEXT: vpsllw $5, %xmm1, %xmm1 ; AVX1-NEXT: vpaddb %xmm1, %xmm1, %xmm2 ; AVX1-NEXT: vpsllw $4, %xmm0, %xmm3 -; AVX1-NEXT: vpand {{.*}}(%rip), %xmm3, %xmm3 +; AVX1-NEXT: vpand {{.*#+}} xmm3 = xmm3 & [0xf0f0f0f0f0f0f0f0 splat] ; AVX1-NEXT: vpblendvb %xmm1, %xmm3, %xmm0, %xmm0 ; AVX1-NEXT: vpsllw $2, %xmm0, %xmm1 -; AVX1-NEXT: vpand {{.*}}(%rip), %xmm1, %xmm1 +; AVX1-NEXT: vpand {{.*#+}} xmm1 = xmm1 & [0xfcfcfcfcfcfcfcfc splat] ; AVX1-NEXT: vpblendvb %xmm2, %xmm1, %xmm0, %xmm0 ; AVX1-NEXT: vpaddb %xmm0, %xmm0, %xmm1 ; AVX1-NEXT: vpaddb %xmm2, %xmm2, %xmm2 @@ -690,10 +690,10 @@ ; AVX2-NEXT: vpbroadcastb %xmm1, %xmm1 ; AVX2-NEXT: vpsllw $5, %xmm1, %xmm1 ; AVX2-NEXT: vpsllw $4, %xmm0, %xmm2 -; AVX2-NEXT: vpand {{.*}}(%rip), %xmm2, %xmm2 +; AVX2-NEXT: vpand {{.*#+}} xmm2 = xmm2 & [0xf0f0f0f0f0f0f0f0 splat] ; AVX2-NEXT: vpblendvb %xmm1, %xmm2, %xmm0, %xmm0 ; AVX2-NEXT: vpsllw $2, %xmm0, %xmm2 -; AVX2-NEXT: vpand {{.*}}(%rip), %xmm2, %xmm2 +; AVX2-NEXT: vpand {{.*#+}} xmm2 = xmm2 & [0xfcfcfcfcfcfcfcfc splat] ; AVX2-NEXT: vpaddb %xmm1, %xmm1, %xmm1 ; AVX2-NEXT: vpblendvb %xmm1, %xmm2, %xmm0, %xmm0 ; AVX2-NEXT: vpaddb %xmm0, %xmm0, %xmm2 @@ -767,7 +767,7 @@ ; X32-SSE-NEXT: movdqa %xmm3, %xmm4 ; X32-SSE-NEXT: pandn %xmm0, %xmm4 ; X32-SSE-NEXT: psllw $4, %xmm0 -; X32-SSE-NEXT: pand {{\.LCPI.*}}, %xmm0 +; X32-SSE-NEXT: pand {{.*#+}} xmm0 = xmm0 & [0xf0f0f0f0f0f0f0f0 splat] ; X32-SSE-NEXT: pand %xmm3, %xmm0 ; X32-SSE-NEXT: por %xmm4, %xmm0 ; X32-SSE-NEXT: paddb %xmm2, %xmm2 @@ -776,7 +776,7 @@ ; X32-SSE-NEXT: movdqa %xmm3, %xmm4 ; X32-SSE-NEXT: pandn %xmm0, %xmm4 ; X32-SSE-NEXT: psllw $2, %xmm0 -; X32-SSE-NEXT: pand {{\.LCPI.*}}, %xmm0 +; X32-SSE-NEXT: pand {{.*#+}} xmm0 = xmm0 & [0xfcfcfcfcfcfcfcfc splat] ; X32-SSE-NEXT: pand %xmm3, %xmm0 ; X32-SSE-NEXT: por %xmm4, %xmm0 ; X32-SSE-NEXT: paddb %xmm2, %xmm2 @@ -977,7 +977,7 @@ ; SSE2-NEXT: movdqa %xmm3, %xmm4 ; SSE2-NEXT: pandn %xmm0, %xmm4 ; SSE2-NEXT: psllw $4, %xmm0 -; SSE2-NEXT: pand {{.*}}(%rip), %xmm0 +; SSE2-NEXT: pand {{.*#+}} xmm0 = xmm0 & [0xf0f0f0f0f0f0f0f0 splat] ; SSE2-NEXT: pand %xmm3, %xmm0 ; SSE2-NEXT: por %xmm4, %xmm0 ; SSE2-NEXT: paddb %xmm2, %xmm2 @@ -986,7 +986,7 @@ ; SSE2-NEXT: movdqa %xmm3, %xmm4 ; SSE2-NEXT: pandn %xmm0, %xmm4 ; SSE2-NEXT: psllw $2, %xmm0 -; SSE2-NEXT: pand {{.*}}(%rip), %xmm0 +; SSE2-NEXT: pand {{.*#+}} xmm0 = xmm0 & [0xfcfcfcfcfcfcfcfc splat] ; SSE2-NEXT: pand %xmm3, %xmm0 ; SSE2-NEXT: por %xmm4, %xmm0 ; SSE2-NEXT: paddb %xmm2, %xmm2 @@ -1003,12 +1003,12 @@ ; SSE41-NEXT: movdqa %xmm0, %xmm1 ; SSE41-NEXT: movdqa %xmm0, %xmm2 ; SSE41-NEXT: psllw $4, %xmm2 -; SSE41-NEXT: pand {{.*}}(%rip), %xmm2 +; SSE41-NEXT: pand {{.*#+}} xmm2 = xmm2 & [0xf0f0f0f0f0f0f0f0 splat] ; SSE41-NEXT: movdqa {{.*#+}} xmm0 = [8192,24640,41088,57536,49376,32928,16480,32] ; SSE41-NEXT: pblendvb %xmm0, %xmm2, %xmm1 ; SSE41-NEXT: movdqa %xmm1, %xmm2 ; SSE41-NEXT: psllw $2, %xmm2 -; SSE41-NEXT: pand {{.*}}(%rip), %xmm2 +; SSE41-NEXT: pand {{.*#+}} xmm2 = xmm2 & [0xfcfcfcfcfcfcfcfc splat] ; SSE41-NEXT: paddb %xmm0, %xmm0 ; SSE41-NEXT: pblendvb %xmm0, %xmm2, %xmm1 ; SSE41-NEXT: movdqa %xmm1, %xmm2 @@ -1021,11 +1021,11 @@ ; AVX-LABEL: constant_shift_v16i8: ; AVX: # %bb.0: ; AVX-NEXT: vpsllw $4, %xmm0, %xmm1 -; AVX-NEXT: vpand {{.*}}(%rip), %xmm1, %xmm1 +; AVX-NEXT: vpand {{.*#+}} xmm1 = xmm1 & [0xf0f0f0f0f0f0f0f0 splat] ; AVX-NEXT: vmovdqa {{.*#+}} xmm2 = [8192,24640,41088,57536,49376,32928,16480,32] ; AVX-NEXT: vpblendvb %xmm2, %xmm1, %xmm0, %xmm0 ; AVX-NEXT: vpsllw $2, %xmm0, %xmm1 -; AVX-NEXT: vpand {{.*}}(%rip), %xmm1, %xmm1 +; AVX-NEXT: vpand {{.*#+}} xmm1 = xmm1 & [0xfcfcfcfcfcfcfcfc splat] ; AVX-NEXT: vpaddb %xmm2, %xmm2, %xmm2 ; AVX-NEXT: vpblendvb %xmm2, %xmm1, %xmm0, %xmm0 ; AVX-NEXT: vpaddb %xmm0, %xmm0, %xmm1 @@ -1081,7 +1081,7 @@ ; X32-SSE-NEXT: movdqa %xmm3, %xmm4 ; X32-SSE-NEXT: pandn %xmm0, %xmm4 ; X32-SSE-NEXT: psllw $4, %xmm0 -; X32-SSE-NEXT: pand {{\.LCPI.*}}, %xmm0 +; X32-SSE-NEXT: pand {{.*#+}} xmm0 = xmm0 & [0xf0f0f0f0f0f0f0f0 splat] ; X32-SSE-NEXT: pand %xmm3, %xmm0 ; X32-SSE-NEXT: por %xmm4, %xmm0 ; X32-SSE-NEXT: paddb %xmm2, %xmm2 @@ -1090,7 +1090,7 @@ ; X32-SSE-NEXT: movdqa %xmm3, %xmm4 ; X32-SSE-NEXT: pandn %xmm0, %xmm4 ; X32-SSE-NEXT: psllw $2, %xmm0 -; X32-SSE-NEXT: pand {{\.LCPI.*}}, %xmm0 +; X32-SSE-NEXT: pand {{.*#+}} xmm0 = xmm0 & [0xfcfcfcfcfcfcfcfc splat] ; X32-SSE-NEXT: pand %xmm3, %xmm0 ; X32-SSE-NEXT: por %xmm4, %xmm0 ; X32-SSE-NEXT: paddb %xmm2, %xmm2 @@ -1215,13 +1215,13 @@ ; SSE-LABEL: splatconstant_shift_v16i8: ; SSE: # %bb.0: ; SSE-NEXT: psllw $3, %xmm0 -; SSE-NEXT: pand {{.*}}(%rip), %xmm0 +; SSE-NEXT: pand {{.*#+}} xmm0 = xmm0 & [0xf8f8f8f8f8f8f8f8 splat] ; SSE-NEXT: retq ; ; AVX-LABEL: splatconstant_shift_v16i8: ; AVX: # %bb.0: ; AVX-NEXT: vpsllw $3, %xmm0, %xmm0 -; AVX-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0 +; AVX-NEXT: vpand {{.*#+}} xmm0 = xmm0 & [0xf8f8f8f8f8f8f8f8 splat] ; AVX-NEXT: retq ; ; XOP-LABEL: splatconstant_shift_v16i8: @@ -1232,19 +1232,19 @@ ; AVX512-LABEL: splatconstant_shift_v16i8: ; AVX512: # %bb.0: ; AVX512-NEXT: vpsllw $3, %xmm0, %xmm0 -; AVX512-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0 +; AVX512-NEXT: vpand {{.*#+}} xmm0 = xmm0 & [0xf8f8f8f8f8f8f8f8 splat] ; AVX512-NEXT: retq ; ; AVX512VL-LABEL: splatconstant_shift_v16i8: ; AVX512VL: # %bb.0: ; AVX512VL-NEXT: vpsllw $3, %xmm0, %xmm0 -; AVX512VL-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0 +; AVX512VL-NEXT: vpand {{.*#+}} xmm0 = xmm0 & [0xf8f8f8f8f8f8f8f8 splat] ; AVX512VL-NEXT: retq ; ; X32-SSE-LABEL: splatconstant_shift_v16i8: ; X32-SSE: # %bb.0: ; X32-SSE-NEXT: psllw $3, %xmm0 -; X32-SSE-NEXT: pand {{\.LCPI.*}}, %xmm0 +; X32-SSE-NEXT: pand {{.*#+}} xmm0 = xmm0 & [0xf8f8f8f8f8f8f8f8 splat] ; X32-SSE-NEXT: retl %shift = shl <16 x i8> %a, ret <16 x i8> %shift Index: test/CodeGen/X86/vector-shift-shl-256.ll =================================================================== --- test/CodeGen/X86/vector-shift-shl-256.ll +++ test/CodeGen/X86/vector-shift-shl-256.ll @@ -339,10 +339,10 @@ ; AVX2: # %bb.0: ; AVX2-NEXT: vpsllw $5, %ymm1, %ymm1 ; AVX2-NEXT: vpsllw $4, %ymm0, %ymm2 -; AVX2-NEXT: vpand {{.*}}(%rip), %ymm2, %ymm2 +; AVX2-NEXT: vpand {{.*#+}} ymm2 = ymm2 & [0xf0f0f0f0f0f0f0f0 splat] ; AVX2-NEXT: vpblendvb %ymm1, %ymm2, %ymm0, %ymm0 ; AVX2-NEXT: vpsllw $2, %ymm0, %ymm2 -; AVX2-NEXT: vpand {{.*}}(%rip), %ymm2, %ymm2 +; AVX2-NEXT: vpand {{.*#+}} ymm2 = ymm2 & [0xfcfcfcfcfcfcfcfc splat] ; AVX2-NEXT: vpaddb %ymm1, %ymm1, %ymm1 ; AVX2-NEXT: vpblendvb %ymm1, %ymm2, %ymm0, %ymm0 ; AVX2-NEXT: vpaddb %ymm0, %ymm0, %ymm2 @@ -372,10 +372,10 @@ ; AVX512DQ: # %bb.0: ; AVX512DQ-NEXT: vpsllw $5, %ymm1, %ymm1 ; AVX512DQ-NEXT: vpsllw $4, %ymm0, %ymm2 -; AVX512DQ-NEXT: vpand {{.*}}(%rip), %ymm2, %ymm2 +; AVX512DQ-NEXT: vpand {{.*#+}} ymm2 = ymm2 & [0xf0f0f0f0f0f0f0f0 splat] ; AVX512DQ-NEXT: vpblendvb %ymm1, %ymm2, %ymm0, %ymm0 ; AVX512DQ-NEXT: vpsllw $2, %ymm0, %ymm2 -; AVX512DQ-NEXT: vpand {{.*}}(%rip), %ymm2, %ymm2 +; AVX512DQ-NEXT: vpand {{.*#+}} ymm2 = ymm2 & [0xfcfcfcfcfcfcfcfc splat] ; AVX512DQ-NEXT: vpaddb %ymm1, %ymm1, %ymm1 ; AVX512DQ-NEXT: vpblendvb %ymm1, %ymm2, %ymm0, %ymm0 ; AVX512DQ-NEXT: vpaddb %ymm0, %ymm0, %ymm2 @@ -395,10 +395,10 @@ ; AVX512DQVL: # %bb.0: ; AVX512DQVL-NEXT: vpsllw $5, %ymm1, %ymm1 ; AVX512DQVL-NEXT: vpsllw $4, %ymm0, %ymm2 -; AVX512DQVL-NEXT: vpand {{.*}}(%rip), %ymm2, %ymm2 +; AVX512DQVL-NEXT: vpand {{.*#+}} ymm2 = ymm2 & [0xf0f0f0f0f0f0f0f0 splat] ; AVX512DQVL-NEXT: vpblendvb %ymm1, %ymm2, %ymm0, %ymm0 ; AVX512DQVL-NEXT: vpsllw $2, %ymm0, %ymm2 -; AVX512DQVL-NEXT: vpand {{.*}}(%rip), %ymm2, %ymm2 +; AVX512DQVL-NEXT: vpand {{.*#+}} ymm2 = ymm2 & [0xfcfcfcfcfcfcfcfc splat] ; AVX512DQVL-NEXT: vpaddb %ymm1, %ymm1, %ymm1 ; AVX512DQVL-NEXT: vpblendvb %ymm1, %ymm2, %ymm0, %ymm0 ; AVX512DQVL-NEXT: vpaddb %ymm0, %ymm0, %ymm2 @@ -449,10 +449,10 @@ ; X32-AVX2: # %bb.0: ; X32-AVX2-NEXT: vpsllw $5, %ymm1, %ymm1 ; X32-AVX2-NEXT: vpsllw $4, %ymm0, %ymm2 -; X32-AVX2-NEXT: vpand {{\.LCPI.*}}, %ymm2, %ymm2 +; X32-AVX2-NEXT: vpand {{.*#+}} ymm2 = ymm2 & [0xf0f0f0f0f0f0f0f0 splat] ; X32-AVX2-NEXT: vpblendvb %ymm1, %ymm2, %ymm0, %ymm0 ; X32-AVX2-NEXT: vpsllw $2, %ymm0, %ymm2 -; X32-AVX2-NEXT: vpand {{\.LCPI.*}}, %ymm2, %ymm2 +; X32-AVX2-NEXT: vpand {{.*#+}} ymm2 = ymm2 & [0xfcfcfcfcfcfcfcfc splat] ; X32-AVX2-NEXT: vpaddb %ymm1, %ymm1, %ymm1 ; X32-AVX2-NEXT: vpblendvb %ymm1, %ymm2, %ymm0, %ymm0 ; X32-AVX2-NEXT: vpaddb %ymm0, %ymm0, %ymm2 @@ -679,11 +679,11 @@ ; AVX2: # %bb.0: ; AVX2-NEXT: vpbroadcastb %xmm1, %ymm1 ; AVX2-NEXT: vpsllw $4, %ymm0, %ymm2 -; AVX2-NEXT: vpand {{.*}}(%rip), %ymm2, %ymm2 +; AVX2-NEXT: vpand {{.*#+}} ymm2 = ymm2 & [0xf0f0f0f0f0f0f0f0 splat] ; AVX2-NEXT: vpsllw $5, %ymm1, %ymm1 ; AVX2-NEXT: vpblendvb %ymm1, %ymm2, %ymm0, %ymm0 ; AVX2-NEXT: vpsllw $2, %ymm0, %ymm2 -; AVX2-NEXT: vpand {{.*}}(%rip), %ymm2, %ymm2 +; AVX2-NEXT: vpand {{.*#+}} ymm2 = ymm2 & [0xfcfcfcfcfcfcfcfc splat] ; AVX2-NEXT: vpaddb %ymm1, %ymm1, %ymm1 ; AVX2-NEXT: vpblendvb %ymm1, %ymm2, %ymm0, %ymm0 ; AVX2-NEXT: vpaddb %ymm0, %ymm0, %ymm2 @@ -715,11 +715,11 @@ ; AVX512DQ: # %bb.0: ; AVX512DQ-NEXT: vpbroadcastb %xmm1, %ymm1 ; AVX512DQ-NEXT: vpsllw $4, %ymm0, %ymm2 -; AVX512DQ-NEXT: vpand {{.*}}(%rip), %ymm2, %ymm2 +; AVX512DQ-NEXT: vpand {{.*#+}} ymm2 = ymm2 & [0xf0f0f0f0f0f0f0f0 splat] ; AVX512DQ-NEXT: vpsllw $5, %ymm1, %ymm1 ; AVX512DQ-NEXT: vpblendvb %ymm1, %ymm2, %ymm0, %ymm0 ; AVX512DQ-NEXT: vpsllw $2, %ymm0, %ymm2 -; AVX512DQ-NEXT: vpand {{.*}}(%rip), %ymm2, %ymm2 +; AVX512DQ-NEXT: vpand {{.*#+}} ymm2 = ymm2 & [0xfcfcfcfcfcfcfcfc splat] ; AVX512DQ-NEXT: vpaddb %ymm1, %ymm1, %ymm1 ; AVX512DQ-NEXT: vpblendvb %ymm1, %ymm2, %ymm0, %ymm0 ; AVX512DQ-NEXT: vpaddb %ymm0, %ymm0, %ymm2 @@ -740,11 +740,11 @@ ; AVX512DQVL: # %bb.0: ; AVX512DQVL-NEXT: vpbroadcastb %xmm1, %ymm1 ; AVX512DQVL-NEXT: vpsllw $4, %ymm0, %ymm2 -; AVX512DQVL-NEXT: vpand {{.*}}(%rip), %ymm2, %ymm2 +; AVX512DQVL-NEXT: vpand {{.*#+}} ymm2 = ymm2 & [0xf0f0f0f0f0f0f0f0 splat] ; AVX512DQVL-NEXT: vpsllw $5, %ymm1, %ymm1 ; AVX512DQVL-NEXT: vpblendvb %ymm1, %ymm2, %ymm0, %ymm0 ; AVX512DQVL-NEXT: vpsllw $2, %ymm0, %ymm2 -; AVX512DQVL-NEXT: vpand {{.*}}(%rip), %ymm2, %ymm2 +; AVX512DQVL-NEXT: vpand {{.*#+}} ymm2 = ymm2 & [0xfcfcfcfcfcfcfcfc splat] ; AVX512DQVL-NEXT: vpaddb %ymm1, %ymm1, %ymm1 ; AVX512DQVL-NEXT: vpblendvb %ymm1, %ymm2, %ymm0, %ymm0 ; AVX512DQVL-NEXT: vpaddb %ymm0, %ymm0, %ymm2 @@ -794,11 +794,11 @@ ; X32-AVX2: # %bb.0: ; X32-AVX2-NEXT: vpbroadcastb %xmm1, %ymm1 ; X32-AVX2-NEXT: vpsllw $4, %ymm0, %ymm2 -; X32-AVX2-NEXT: vpand {{\.LCPI.*}}, %ymm2, %ymm2 +; X32-AVX2-NEXT: vpand {{.*#+}} ymm2 = ymm2 & [0xf0f0f0f0f0f0f0f0 splat] ; X32-AVX2-NEXT: vpsllw $5, %ymm1, %ymm1 ; X32-AVX2-NEXT: vpblendvb %ymm1, %ymm2, %ymm0, %ymm0 ; X32-AVX2-NEXT: vpsllw $2, %ymm0, %ymm2 -; X32-AVX2-NEXT: vpand {{\.LCPI.*}}, %ymm2, %ymm2 +; X32-AVX2-NEXT: vpand {{.*#+}} ymm2 = ymm2 & [0xfcfcfcfcfcfcfcfc splat] ; X32-AVX2-NEXT: vpaddb %ymm1, %ymm1, %ymm1 ; X32-AVX2-NEXT: vpblendvb %ymm1, %ymm2, %ymm0, %ymm0 ; X32-AVX2-NEXT: vpaddb %ymm0, %ymm0, %ymm2 @@ -1029,11 +1029,11 @@ ; AVX2-LABEL: constant_shift_v32i8: ; AVX2: # %bb.0: ; AVX2-NEXT: vpsllw $4, %ymm0, %ymm1 -; AVX2-NEXT: vpand {{.*}}(%rip), %ymm1, %ymm1 +; AVX2-NEXT: vpand {{.*#+}} ymm1 = ymm1 & [0xf0f0f0f0f0f0f0f0 splat] ; AVX2-NEXT: vmovdqa {{.*#+}} ymm2 = [8192,24640,41088,57536,49376,32928,16480,32,8192,24640,41088,57536,49376,32928,16480,32] ; AVX2-NEXT: vpblendvb %ymm2, %ymm1, %ymm0, %ymm0 ; AVX2-NEXT: vpsllw $2, %ymm0, %ymm1 -; AVX2-NEXT: vpand {{.*}}(%rip), %ymm1, %ymm1 +; AVX2-NEXT: vpand {{.*#+}} ymm1 = ymm1 & [0xfcfcfcfcfcfcfcfc splat] ; AVX2-NEXT: vpaddb %ymm2, %ymm2, %ymm2 ; AVX2-NEXT: vpblendvb %ymm2, %ymm1, %ymm0, %ymm0 ; AVX2-NEXT: vpaddb %ymm0, %ymm0, %ymm1 @@ -1062,11 +1062,11 @@ ; AVX512DQ-LABEL: constant_shift_v32i8: ; AVX512DQ: # %bb.0: ; AVX512DQ-NEXT: vpsllw $4, %ymm0, %ymm1 -; AVX512DQ-NEXT: vpand {{.*}}(%rip), %ymm1, %ymm1 +; AVX512DQ-NEXT: vpand {{.*#+}} ymm1 = ymm1 & [0xf0f0f0f0f0f0f0f0 splat] ; AVX512DQ-NEXT: vmovdqa {{.*#+}} ymm2 = [8192,24640,41088,57536,49376,32928,16480,32,8192,24640,41088,57536,49376,32928,16480,32] ; AVX512DQ-NEXT: vpblendvb %ymm2, %ymm1, %ymm0, %ymm0 ; AVX512DQ-NEXT: vpsllw $2, %ymm0, %ymm1 -; AVX512DQ-NEXT: vpand {{.*}}(%rip), %ymm1, %ymm1 +; AVX512DQ-NEXT: vpand {{.*#+}} ymm1 = ymm1 & [0xfcfcfcfcfcfcfcfc splat] ; AVX512DQ-NEXT: vpaddb %ymm2, %ymm2, %ymm2 ; AVX512DQ-NEXT: vpblendvb %ymm2, %ymm1, %ymm0, %ymm0 ; AVX512DQ-NEXT: vpaddb %ymm0, %ymm0, %ymm1 @@ -1084,11 +1084,11 @@ ; AVX512DQVL-LABEL: constant_shift_v32i8: ; AVX512DQVL: # %bb.0: ; AVX512DQVL-NEXT: vpsllw $4, %ymm0, %ymm1 -; AVX512DQVL-NEXT: vpand {{.*}}(%rip), %ymm1, %ymm1 +; AVX512DQVL-NEXT: vpand {{.*#+}} ymm1 = ymm1 & [0xf0f0f0f0f0f0f0f0 splat] ; AVX512DQVL-NEXT: vmovdqa {{.*#+}} ymm2 = [8192,24640,41088,57536,49376,32928,16480,32,8192,24640,41088,57536,49376,32928,16480,32] ; AVX512DQVL-NEXT: vpblendvb %ymm2, %ymm1, %ymm0, %ymm0 ; AVX512DQVL-NEXT: vpsllw $2, %ymm0, %ymm1 -; AVX512DQVL-NEXT: vpand {{.*}}(%rip), %ymm1, %ymm1 +; AVX512DQVL-NEXT: vpand {{.*#+}} ymm1 = ymm1 & [0xfcfcfcfcfcfcfcfc splat] ; AVX512DQVL-NEXT: vpaddb %ymm2, %ymm2, %ymm2 ; AVX512DQVL-NEXT: vpblendvb %ymm2, %ymm1, %ymm0, %ymm0 ; AVX512DQVL-NEXT: vpaddb %ymm0, %ymm0, %ymm1 @@ -1133,11 +1133,11 @@ ; X32-AVX2-LABEL: constant_shift_v32i8: ; X32-AVX2: # %bb.0: ; X32-AVX2-NEXT: vpsllw $4, %ymm0, %ymm1 -; X32-AVX2-NEXT: vpand {{\.LCPI.*}}, %ymm1, %ymm1 +; X32-AVX2-NEXT: vpand {{.*#+}} ymm1 = ymm1 & [0xf0f0f0f0f0f0f0f0 splat] ; X32-AVX2-NEXT: vmovdqa {{.*#+}} ymm2 = [8192,24640,41088,57536,49376,32928,16480,32,8192,24640,41088,57536,49376,32928,16480,32] ; X32-AVX2-NEXT: vpblendvb %ymm2, %ymm1, %ymm0, %ymm0 ; X32-AVX2-NEXT: vpsllw $2, %ymm0, %ymm1 -; X32-AVX2-NEXT: vpand {{\.LCPI.*}}, %ymm1, %ymm1 +; X32-AVX2-NEXT: vpand {{.*#+}} ymm1 = ymm1 & [0xfcfcfcfcfcfcfcfc splat] ; X32-AVX2-NEXT: vpaddb %ymm2, %ymm2, %ymm2 ; X32-AVX2-NEXT: vpblendvb %ymm2, %ymm1, %ymm0, %ymm0 ; X32-AVX2-NEXT: vpaddb %ymm0, %ymm0, %ymm1 @@ -1326,7 +1326,7 @@ ; AVX2-LABEL: splatconstant_shift_v32i8: ; AVX2: # %bb.0: ; AVX2-NEXT: vpsllw $3, %ymm0, %ymm0 -; AVX2-NEXT: vpand {{.*}}(%rip), %ymm0, %ymm0 +; AVX2-NEXT: vpand {{.*#+}} ymm0 = ymm0 & [0xf8f8f8f8f8f8f8f8 splat] ; AVX2-NEXT: retq ; ; XOPAVX1-LABEL: splatconstant_shift_v32i8: @@ -1341,19 +1341,19 @@ ; XOPAVX2-LABEL: splatconstant_shift_v32i8: ; XOPAVX2: # %bb.0: ; XOPAVX2-NEXT: vpsllw $3, %ymm0, %ymm0 -; XOPAVX2-NEXT: vpand {{.*}}(%rip), %ymm0, %ymm0 +; XOPAVX2-NEXT: vpand {{.*#+}} ymm0 = ymm0 & [0xf8f8f8f8f8f8f8f8 splat] ; XOPAVX2-NEXT: retq ; ; AVX512-LABEL: splatconstant_shift_v32i8: ; AVX512: # %bb.0: ; AVX512-NEXT: vpsllw $3, %ymm0, %ymm0 -; AVX512-NEXT: vpand {{.*}}(%rip), %ymm0, %ymm0 +; AVX512-NEXT: vpand {{.*#+}} ymm0 = ymm0 & [0xf8f8f8f8f8f8f8f8 splat] ; AVX512-NEXT: retq ; ; AVX512VL-LABEL: splatconstant_shift_v32i8: ; AVX512VL: # %bb.0: ; AVX512VL-NEXT: vpsllw $3, %ymm0, %ymm0 -; AVX512VL-NEXT: vpand {{.*}}(%rip), %ymm0, %ymm0 +; AVX512VL-NEXT: vpand {{.*#+}} ymm0 = ymm0 & [0xf8f8f8f8f8f8f8f8 splat] ; AVX512VL-NEXT: retq ; ; X32-AVX1-LABEL: splatconstant_shift_v32i8: @@ -1370,7 +1370,7 @@ ; X32-AVX2-LABEL: splatconstant_shift_v32i8: ; X32-AVX2: # %bb.0: ; X32-AVX2-NEXT: vpsllw $3, %ymm0, %ymm0 -; X32-AVX2-NEXT: vpand {{\.LCPI.*}}, %ymm0, %ymm0 +; X32-AVX2-NEXT: vpand {{.*#+}} ymm0 = ymm0 & [0xf8f8f8f8f8f8f8f8 splat] ; X32-AVX2-NEXT: retl %shift = shl <32 x i8> %a, ret <32 x i8> %shift Index: test/CodeGen/X86/vector-shift-shl-512.ll =================================================================== --- test/CodeGen/X86/vector-shift-shl-512.ll +++ test/CodeGen/X86/vector-shift-shl-512.ll @@ -77,12 +77,12 @@ ; AVX512BW-LABEL: var_shift_v64i8: ; AVX512BW: # %bb.0: ; AVX512BW-NEXT: vpsllw $4, %zmm0, %zmm2 -; AVX512BW-NEXT: vpandq {{.*}}(%rip), %zmm2, %zmm2 +; AVX512BW-NEXT: vpandq {{.*#+}} zmm2 = zmm2 & [0xf0f0f0f0f0f0f0f0 splat] ; AVX512BW-NEXT: vpsllw $5, %zmm1, %zmm1 ; AVX512BW-NEXT: vpmovb2m %zmm1, %k1 ; AVX512BW-NEXT: vmovdqu8 %zmm2, %zmm0 {%k1} ; AVX512BW-NEXT: vpsllw $2, %zmm0, %zmm2 -; AVX512BW-NEXT: vpandq {{.*}}(%rip), %zmm2, %zmm2 +; AVX512BW-NEXT: vpandq {{.*#+}} zmm2 = zmm2 & [0xfcfcfcfcfcfcfcfc splat] ; AVX512BW-NEXT: vpaddb %zmm1, %zmm1, %zmm1 ; AVX512BW-NEXT: vpmovb2m %zmm1, %k1 ; AVX512BW-NEXT: vmovdqu8 %zmm2, %zmm0 {%k1} @@ -168,12 +168,12 @@ ; AVX512BW: # %bb.0: ; AVX512BW-NEXT: vpbroadcastb %xmm1, %zmm1 ; AVX512BW-NEXT: vpsllw $4, %zmm0, %zmm2 -; AVX512BW-NEXT: vpandq {{.*}}(%rip), %zmm2, %zmm2 +; AVX512BW-NEXT: vpandq {{.*#+}} zmm2 = zmm2 & [0xf0f0f0f0f0f0f0f0 splat] ; AVX512BW-NEXT: vpsllw $5, %zmm1, %zmm1 ; AVX512BW-NEXT: vpmovb2m %zmm1, %k1 ; AVX512BW-NEXT: vmovdqu8 %zmm2, %zmm0 {%k1} ; AVX512BW-NEXT: vpsllw $2, %zmm0, %zmm2 -; AVX512BW-NEXT: vpandq {{.*}}(%rip), %zmm2, %zmm2 +; AVX512BW-NEXT: vpandq {{.*#+}} zmm2 = zmm2 & [0xfcfcfcfcfcfcfcfc splat] ; AVX512BW-NEXT: vpaddb %zmm1, %zmm1, %zmm1 ; AVX512BW-NEXT: vpmovb2m %zmm1, %k1 ; AVX512BW-NEXT: vmovdqu8 %zmm2, %zmm0 {%k1} @@ -255,10 +255,10 @@ ; AVX512BW-NEXT: vmovdqa64 {{.*#+}} zmm1 = [8192,24640,41088,57536,49376,32928,16480,32,8192,24640,41088,57536,49376,32928,16480,32,8192,24640,41088,57536,49376,32928,16480,32,8192,24640,41088,57536,49376,32928,16480,32] ; AVX512BW-NEXT: vpmovb2m %zmm1, %k1 ; AVX512BW-NEXT: vpsllw $4, %zmm0, %zmm2 -; AVX512BW-NEXT: vpandq {{.*}}(%rip), %zmm2, %zmm2 +; AVX512BW-NEXT: vpandq {{.*#+}} zmm2 = zmm2 & [0xf0f0f0f0f0f0f0f0 splat] ; AVX512BW-NEXT: vmovdqu8 %zmm2, %zmm0 {%k1} ; AVX512BW-NEXT: vpsllw $2, %zmm0, %zmm2 -; AVX512BW-NEXT: vpandq {{.*}}(%rip), %zmm2, %zmm2 +; AVX512BW-NEXT: vpandq {{.*#+}} zmm2 = zmm2 & [0xfcfcfcfcfcfcfcfc splat] ; AVX512BW-NEXT: vpaddb %zmm1, %zmm1, %zmm1 ; AVX512BW-NEXT: vpmovb2m %zmm1, %k1 ; AVX512BW-NEXT: vmovdqu8 %zmm2, %zmm0 {%k1} @@ -320,7 +320,7 @@ ; AVX512BW-LABEL: splatconstant_shift_v64i8: ; AVX512BW: # %bb.0: ; AVX512BW-NEXT: vpsllw $3, %zmm0, %zmm0 -; AVX512BW-NEXT: vpandq {{.*}}(%rip), %zmm0, %zmm0 +; AVX512BW-NEXT: vpandq {{.*#+}} zmm0 = zmm0 & [0xf8f8f8f8f8f8f8f8 splat] ; AVX512BW-NEXT: retq %shift = shl <64 x i8> %a, ret <64 x i8> %shift