Index: lib/Target/X86/X86ISelLowering.cpp =================================================================== --- lib/Target/X86/X86ISelLowering.cpp +++ lib/Target/X86/X86ISelLowering.cpp @@ -4427,6 +4427,40 @@ return ConstsNode; } +static SDValue getConstVector(ArrayRef Values, SmallBitVector &Undefs, + MVT VT, SelectionDAG &DAG, const SDLoc &dl) { + assert(Values.size() == Undefs.size() && "Unequal constant and undef arrays"); + SmallVector Ops; + bool Split = false; + + MVT ConstVecVT = VT; + unsigned NumElts = VT.getVectorNumElements(); + bool In64BitMode = DAG.getTargetLoweringInfo().isTypeLegal(MVT::i64); + if (!In64BitMode && VT.getVectorElementType() == MVT::i64) { + ConstVecVT = MVT::getVectorVT(MVT::i32, NumElts * 2); + Split = true; + } + + MVT EltVT = ConstVecVT.getVectorElementType(); + for (unsigned i = 0, e = Values.size(); i != e; ++i) { + if (Undefs[i]) { + Ops.append(Split ? 2 : 1, DAG.getUNDEF(EltVT)); + continue; + } + const APInt &V = Values[i]; + assert(V.getBitWidth() == VT.getScalarSizeInBits() && "Unexpected sizes"); + if (Split) { + Ops.push_back(DAG.getConstant(V.trunc(32), dl, EltVT)); + Ops.push_back(DAG.getConstant(V.lshr(32).trunc(32), dl, EltVT)); + } else { + Ops.push_back(DAG.getConstant(V, dl, EltVT)); + } + } + + SDValue ConstsNode = DAG.getBuildVector(ConstVecVT, dl, Ops); + return DAG.getBitcast(VT, ConstsNode); +} + /// Returns a vector of specified type with all zero elements. static SDValue getZeroVector(MVT VT, const X86Subtarget &Subtarget, SelectionDAG &DAG, const SDLoc &dl) { @@ -31817,10 +31851,11 @@ return OptimizeConditionalInDecrement(N, DAG); } -static SDValue combineVZext(SDNode *N, SelectionDAG &DAG, - TargetLowering::DAGCombinerInfo &DCI, - const X86Subtarget &Subtarget) { +static SDValue combineVSZext(SDNode *N, SelectionDAG &DAG, + TargetLowering::DAGCombinerInfo &DCI, + const X86Subtarget &Subtarget) { SDLoc DL(N); + unsigned Opcode = N->getOpcode(); MVT VT = N->getSimpleValueType(0); MVT SVT = VT.getVectorElementType(); SDValue Op = N->getOperand(0); @@ -31830,24 +31865,26 @@ // Perform any constant folding. if (ISD::isBuildVectorOfConstantSDNodes(Op.getNode())) { - SmallVector Vals; - for (int i = 0, e = VT.getVectorNumElements(); i != e; ++i) { + unsigned NumDstElts = VT.getVectorNumElements(); + SmallBitVector Undefs(NumDstElts, false); + SmallVector Vals(NumDstElts, APInt(SVT.getSizeInBits(), 0)); + for (unsigned i = 0; i != NumDstElts; ++i) { SDValue OpElt = Op.getOperand(i); if (OpElt.getOpcode() == ISD::UNDEF) { - Vals.push_back(DAG.getUNDEF(SVT)); + Undefs[i] = true; continue; } APInt Cst = cast(OpElt.getNode())->getAPIntValue(); - assert(Cst.getBitWidth() == OpEltVT.getSizeInBits()); - Cst = Cst.zextOrTrunc(SVT.getSizeInBits()); - Vals.push_back(DAG.getConstant(Cst, DL, SVT)); + Vals[i] = Opcode == X86ISD::VZEXT ? Cst.zextOrTrunc(SVT.getSizeInBits()) + : Cst.sextOrTrunc(SVT.getSizeInBits()); } - return DAG.getBuildVector(VT, DL, Vals); + return getConstVector(Vals, Undefs, VT, DAG, DL); } // (vzext (bitcast (vzext (x)) -> (vzext x) + // TODO: (vsext (bitcast (vsext (x)) -> (vsext x) SDValue V = peekThroughBitcasts(Op); - if (V != Op && V.getOpcode() == X86ISD::VZEXT) { + if (Opcode == X86ISD::VZEXT && V != Op && V.getOpcode() == X86ISD::VZEXT) { MVT InnerVT = V.getSimpleValueType(); MVT InnerEltVT = InnerVT.getVectorElementType(); @@ -31872,7 +31909,9 @@ // Check if we can bypass extracting and re-inserting an element of an input // vector. Essentially: // (bitcast (sclr2vec (ext_vec_elt x))) -> (bitcast x) - if (V.getOpcode() == ISD::SCALAR_TO_VECTOR && + // TODO: Add X86ISD::VSEXT support + if (Opcode == X86ISD::VZEXT && + V.getOpcode() == ISD::SCALAR_TO_VECTOR && V.getOperand(0).getOpcode() == ISD::EXTRACT_VECTOR_ELT && V.getOperand(0).getSimpleValueType().getSizeInBits() == InputBits) { SDValue ExtractedV = V.getOperand(0); @@ -31994,7 +32033,8 @@ case ISD::SETCC: return combineSetCC(N, DAG, Subtarget); case X86ISD::SETCC: return combineX86SetCC(N, DAG, DCI, Subtarget); case X86ISD::BRCOND: return combineBrCond(N, DAG, DCI, Subtarget); - case X86ISD::VZEXT: return combineVZext(N, DAG, DCI, Subtarget); + case X86ISD::VSEXT: + case X86ISD::VZEXT: return combineVSZext(N, DAG, DCI, Subtarget); case X86ISD::SHUFP: // Handle all target specific shuffles case X86ISD::INSERTPS: case X86ISD::PALIGNR: Index: test/CodeGen/X86/fold-vector-sext-zext.ll =================================================================== --- test/CodeGen/X86/fold-vector-sext-zext.ll +++ test/CodeGen/X86/fold-vector-sext-zext.ll @@ -83,9 +83,8 @@ define <4 x i64> @test_sext_4i8_4i64() { ; X32-LABEL: test_sext_4i8_4i64: ; X32: # BB#0: -; X32-NEXT: vpmovsxbq {{\.LCPI.*}}, %xmm0 -; X32-NEXT: vpmovsxbq {{\.LCPI.*}}, %xmm1 -; X32-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 +; X32-NEXT: vmovaps {{.*#+}} xmm0 = [0,0,4294967295,4294967295] +; X32-NEXT: vinsertf128 $1, {{\.LCPI.*}}, %ymm0, %ymm0 ; X32-NEXT: retl ; ; X64-LABEL: test_sext_4i8_4i64: @@ -104,9 +103,7 @@ ; X32-LABEL: test_sext_4i8_4i64_undef: ; X32: # BB#0: ; X32-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 -; X32-NEXT: vpmovsxbq %xmm0, %xmm0 -; X32-NEXT: vpmovsxbq {{\.LCPI.*}}, %xmm1 -; X32-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 +; X32-NEXT: vinsertf128 $1, {{\.LCPI.*}}, %ymm0, %ymm0 ; X32-NEXT: retl ; ; X64-LABEL: test_sext_4i8_4i64_undef: Index: test/CodeGen/X86/pmul.ll =================================================================== --- test/CodeGen/X86/pmul.ll +++ test/CodeGen/X86/pmul.ll @@ -26,7 +26,7 @@ ; SSE41-LABEL: mul_v16i8c: ; SSE41: # BB#0: # %entry ; SSE41-NEXT: pmovsxbw %xmm0, %xmm1 -; SSE41-NEXT: pmovsxbw {{.*}}(%rip), %xmm2 +; SSE41-NEXT: movdqa {{.*#+}} xmm2 = [117,117,117,117,117,117,117,117] ; SSE41-NEXT: pmullw %xmm2, %xmm1 ; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [255,255,255,255,255,255,255,255] ; SSE41-NEXT: pand %xmm3, %xmm1 @@ -41,8 +41,7 @@ ; AVX2-LABEL: mul_v16i8c: ; AVX2: # BB#0: # %entry ; AVX2-NEXT: vpmovsxbw %xmm0, %ymm0 -; AVX2-NEXT: vpmovsxbw {{.*}}(%rip), %ymm1 -; AVX2-NEXT: vpmullw %ymm1, %ymm0, %ymm0 +; AVX2-NEXT: vpmullw {{.*}}(%rip), %ymm0, %ymm0 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm2 = <0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u> ; AVX2-NEXT: vpshufb %xmm2, %xmm1, %xmm1 @@ -54,8 +53,7 @@ ; AVX512F-LABEL: mul_v16i8c: ; AVX512F: # BB#0: # %entry ; AVX512F-NEXT: vpmovsxbw %xmm0, %ymm0 -; AVX512F-NEXT: vpmovsxbw {{.*}}(%rip), %ymm1 -; AVX512F-NEXT: vpmullw %ymm1, %ymm0, %ymm0 +; AVX512F-NEXT: vpmullw {{.*}}(%rip), %ymm0, %ymm0 ; AVX512F-NEXT: vpmovsxwd %ymm0, %zmm0 ; AVX512F-NEXT: vpmovdb %zmm0, %xmm0 ; AVX512F-NEXT: retq @@ -63,8 +61,7 @@ ; AVX512BW-LABEL: mul_v16i8c: ; AVX512BW: # BB#0: # %entry ; AVX512BW-NEXT: vpmovsxbw %xmm0, %ymm0 -; AVX512BW-NEXT: vpmovsxbw {{.*}}(%rip), %ymm1 -; AVX512BW-NEXT: vpmullw %ymm1, %ymm0, %ymm0 +; AVX512BW-NEXT: vpmullw {{.*}}(%rip), %ymm0, %ymm0 ; AVX512BW-NEXT: vpmovwb %zmm0, %ymm0 ; AVX512BW-NEXT: # kill: %XMM0 %XMM0 %YMM0 ; AVX512BW-NEXT: retq @@ -418,7 +415,7 @@ ; SSE41-LABEL: mul_v32i8c: ; SSE41: # BB#0: # %entry ; SSE41-NEXT: pmovsxbw %xmm0, %xmm2 -; SSE41-NEXT: pmovsxbw {{.*}}(%rip), %xmm4 +; SSE41-NEXT: movdqa {{.*#+}} xmm4 = [117,117,117,117,117,117,117,117] ; SSE41-NEXT: pmullw %xmm4, %xmm2 ; SSE41-NEXT: movdqa {{.*#+}} xmm5 = [255,255,255,255,255,255,255,255] ; SSE41-NEXT: pand %xmm5, %xmm2 @@ -443,7 +440,7 @@ ; AVX2: # BB#0: # %entry ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 ; AVX2-NEXT: vpmovsxbw %xmm1, %ymm1 -; AVX2-NEXT: vpmovsxbw {{.*}}(%rip), %ymm2 +; AVX2-NEXT: vmovdqa {{.*#+}} ymm2 = [117,117,117,117,117,117,117,117,117,117,117,117,117,117,117,117] ; AVX2-NEXT: vpmullw %ymm2, %ymm1, %ymm1 ; AVX2-NEXT: vextracti128 $1, %ymm1, %xmm3 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm4 = <0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u> @@ -462,7 +459,7 @@ ; AVX512F-LABEL: mul_v32i8c: ; AVX512F: # BB#0: # %entry ; AVX512F-NEXT: vpmovsxbw %xmm0, %ymm1 -; AVX512F-NEXT: vpmovsxbw {{.*}}(%rip), %ymm2 +; AVX512F-NEXT: vmovdqa {{.*#+}} ymm2 = [117,117,117,117,117,117,117,117,117,117,117,117,117,117,117,117] ; AVX512F-NEXT: vpmullw %ymm2, %ymm1, %ymm1 ; AVX512F-NEXT: vpmovsxwd %ymm1, %zmm1 ; AVX512F-NEXT: vpmovdb %zmm1, %xmm1 @@ -477,8 +474,7 @@ ; AVX512BW-LABEL: mul_v32i8c: ; AVX512BW: # BB#0: # %entry ; AVX512BW-NEXT: vpmovsxbw %ymm0, %zmm0 -; AVX512BW-NEXT: vpmovsxbw {{.*}}(%rip), %zmm1 -; AVX512BW-NEXT: vpmullw %zmm1, %zmm0, %zmm0 +; AVX512BW-NEXT: vpmullw {{.*}}(%rip), %zmm0, %zmm0 ; AVX512BW-NEXT: vpmovwb %zmm0, %ymm0 ; AVX512BW-NEXT: retq entry: @@ -833,7 +829,7 @@ ; SSE41-NEXT: movdqa %xmm1, %xmm4 ; SSE41-NEXT: movdqa %xmm0, %xmm1 ; SSE41-NEXT: pmovsxbw %xmm1, %xmm0 -; SSE41-NEXT: pmovsxbw {{.*}}(%rip), %xmm6 +; SSE41-NEXT: movdqa {{.*#+}} xmm6 = [117,117,117,117,117,117,117,117] ; SSE41-NEXT: pmullw %xmm6, %xmm0 ; SSE41-NEXT: movdqa {{.*#+}} xmm7 = [255,255,255,255,255,255,255,255] ; SSE41-NEXT: pand %xmm7, %xmm0 @@ -874,7 +870,7 @@ ; AVX2: # BB#0: # %entry ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm2 ; AVX2-NEXT: vpmovsxbw %xmm2, %ymm2 -; AVX2-NEXT: vpmovsxbw {{.*}}(%rip), %ymm3 +; AVX2-NEXT: vmovdqa {{.*#+}} ymm3 = [117,117,117,117,117,117,117,117,117,117,117,117,117,117,117,117] ; AVX2-NEXT: vpmullw %ymm3, %ymm2, %ymm2 ; AVX2-NEXT: vextracti128 $1, %ymm2, %xmm4 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm5 = <0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u> @@ -907,7 +903,7 @@ ; AVX512F-LABEL: mul_v64i8c: ; AVX512F: # BB#0: # %entry ; AVX512F-NEXT: vpmovsxbw %xmm0, %ymm2 -; AVX512F-NEXT: vpmovsxbw {{.*}}(%rip), %ymm3 +; AVX512F-NEXT: vmovdqa {{.*#+}} ymm3 = [117,117,117,117,117,117,117,117,117,117,117,117,117,117,117,117] ; AVX512F-NEXT: vpmullw %ymm3, %ymm2, %ymm2 ; AVX512F-NEXT: vpmovsxwd %ymm2, %zmm2 ; AVX512F-NEXT: vpmovdb %zmm2, %xmm2 @@ -932,7 +928,7 @@ ; AVX512BW-LABEL: mul_v64i8c: ; AVX512BW: # BB#0: # %entry ; AVX512BW-NEXT: vpmovsxbw %ymm0, %zmm1 -; AVX512BW-NEXT: vpmovsxbw {{.*}}(%rip), %zmm2 +; AVX512BW-NEXT: vmovdqu16 {{.*#+}} zmm2 = [117,117,117,117,117,117,117,117,117,117,117,117,117,117,117,117,117,117,117,117,117,117,117,117,117,117,117,117,117,117,117,117] ; AVX512BW-NEXT: vpmullw %zmm2, %zmm1, %zmm1 ; AVX512BW-NEXT: vpmovwb %zmm1, %ymm1 ; AVX512BW-NEXT: vextracti64x4 $1, %zmm0, %ymm0 Index: test/CodeGen/X86/vector-idiv-sdiv-128.ll =================================================================== --- test/CodeGen/X86/vector-idiv-sdiv-128.ll +++ test/CodeGen/X86/vector-idiv-sdiv-128.ll @@ -544,7 +544,7 @@ ; SSE41-NEXT: pand {{.*}}(%rip), %xmm1 ; SSE41-NEXT: paddb %xmm2, %xmm1 ; SSE41-NEXT: pmovsxbw %xmm1, %xmm2 -; SSE41-NEXT: pmovsxbw {{.*}}(%rip), %xmm3 +; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [7,7,7,7,7,7,7,7] ; SSE41-NEXT: pmullw %xmm3, %xmm2 ; SSE41-NEXT: movdqa {{.*#+}} xmm4 = [255,255,255,255,255,255,255,255] ; SSE41-NEXT: pand %xmm4, %xmm2 @@ -577,7 +577,7 @@ ; AVX1-NEXT: vpand {{.*}}(%rip), %xmm1, %xmm1 ; AVX1-NEXT: vpaddb %xmm1, %xmm2, %xmm1 ; AVX1-NEXT: vpmovsxbw %xmm1, %xmm2 -; AVX1-NEXT: vpmovsxbw {{.*}}(%rip), %xmm3 +; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [7,7,7,7,7,7,7,7] ; AVX1-NEXT: vpmullw %xmm3, %xmm2, %xmm2 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm4 = [255,255,255,255,255,255,255,255] ; AVX1-NEXT: vpand %xmm4, %xmm2, %xmm2 @@ -607,8 +607,7 @@ ; AVX2-NEXT: vpand {{.*}}(%rip), %xmm1, %xmm1 ; AVX2-NEXT: vpaddb %xmm1, %xmm2, %xmm1 ; AVX2-NEXT: vpmovsxbw %xmm1, %ymm1 -; AVX2-NEXT: vpmovsxbw {{.*}}(%rip), %ymm2 -; AVX2-NEXT: vpmullw %ymm2, %ymm1, %ymm1 +; AVX2-NEXT: vpmullw {{.*}}(%rip), %ymm1, %ymm1 ; AVX2-NEXT: vextracti128 $1, %ymm1, %xmm2 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = <0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u> ; AVX2-NEXT: vpshufb %xmm3, %xmm2, %xmm2 Index: test/CodeGen/X86/vector-idiv-sdiv-256.ll =================================================================== --- test/CodeGen/X86/vector-idiv-sdiv-256.ll +++ test/CodeGen/X86/vector-idiv-sdiv-256.ll @@ -459,7 +459,7 @@ ; AVX1-NEXT: vpsubb %xmm7, %xmm3, %xmm3 ; AVX1-NEXT: vpaddb %xmm4, %xmm3, %xmm3 ; AVX1-NEXT: vpmovsxbw %xmm3, %xmm4 -; AVX1-NEXT: vpmovsxbw {{.*}}(%rip), %xmm5 +; AVX1-NEXT: vmovdqa {{.*#+}} xmm5 = [7,7,7,7,7,7,7,7] ; AVX1-NEXT: vpmullw %xmm5, %xmm4, %xmm4 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm6 = [255,255,255,255,255,255,255,255] ; AVX1-NEXT: vpand %xmm6, %xmm4, %xmm4 @@ -524,7 +524,7 @@ ; AVX2-NEXT: vpaddb %ymm1, %ymm2, %ymm1 ; AVX2-NEXT: vextracti128 $1, %ymm1, %xmm2 ; AVX2-NEXT: vpmovsxbw %xmm2, %ymm2 -; AVX2-NEXT: vpmovsxbw {{.*}}(%rip), %ymm3 +; AVX2-NEXT: vmovdqa {{.*#+}} ymm3 = [7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7] ; AVX2-NEXT: vpmullw %ymm3, %ymm2, %ymm2 ; AVX2-NEXT: vextracti128 $1, %ymm2, %xmm4 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm5 = <0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u> Index: test/CodeGen/X86/vector-idiv-sdiv-512.ll =================================================================== --- test/CodeGen/X86/vector-idiv-sdiv-512.ll +++ test/CodeGen/X86/vector-idiv-sdiv-512.ll @@ -1439,7 +1439,7 @@ ; AVX512F-NEXT: vpsubb %ymm7, %ymm4, %ymm4 ; AVX512F-NEXT: vpaddb %ymm8, %ymm4, %ymm8 ; AVX512F-NEXT: vpmovsxbw %xmm8, %ymm9 -; AVX512F-NEXT: vpmovsxbw {{.*}}(%rip), %ymm4 +; AVX512F-NEXT: vmovdqa {{.*#+}} ymm4 = [7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7] ; AVX512F-NEXT: vpmullw %ymm4, %ymm9, %ymm9 ; AVX512F-NEXT: vpmovsxwd %ymm9, %zmm9 ; AVX512F-NEXT: vpmovdb %zmm9, %xmm9 Index: test/CodeGen/X86/vector-idiv-udiv-128.ll =================================================================== --- test/CodeGen/X86/vector-idiv-udiv-128.ll +++ test/CodeGen/X86/vector-idiv-udiv-128.ll @@ -520,7 +520,7 @@ ; SSE41-NEXT: psrlw $2, %xmm2 ; SSE41-NEXT: pand {{.*}}(%rip), %xmm2 ; SSE41-NEXT: pmovsxbw %xmm2, %xmm1 -; SSE41-NEXT: pmovsxbw {{.*}}(%rip), %xmm3 +; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [7,7,7,7,7,7,7,7] ; SSE41-NEXT: pmullw %xmm3, %xmm1 ; SSE41-NEXT: movdqa {{.*#+}} xmm4 = [255,255,255,255,255,255,255,255] ; SSE41-NEXT: pand %xmm4, %xmm1 @@ -550,7 +550,7 @@ ; AVX1-NEXT: vpsrlw $2, %xmm1, %xmm1 ; AVX1-NEXT: vpand {{.*}}(%rip), %xmm1, %xmm1 ; AVX1-NEXT: vpmovsxbw %xmm1, %xmm2 -; AVX1-NEXT: vpmovsxbw {{.*}}(%rip), %xmm3 +; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [7,7,7,7,7,7,7,7] ; AVX1-NEXT: vpmullw %xmm3, %xmm2, %xmm2 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm4 = [255,255,255,255,255,255,255,255] ; AVX1-NEXT: vpand %xmm4, %xmm2, %xmm2 @@ -577,8 +577,7 @@ ; AVX2-NEXT: vpsrlw $2, %xmm1, %xmm1 ; AVX2-NEXT: vpand {{.*}}(%rip), %xmm1, %xmm1 ; AVX2-NEXT: vpmovsxbw %xmm1, %ymm1 -; AVX2-NEXT: vpmovsxbw {{.*}}(%rip), %ymm2 -; AVX2-NEXT: vpmullw %ymm2, %ymm1, %ymm1 +; AVX2-NEXT: vpmullw {{.*}}(%rip), %ymm1, %ymm1 ; AVX2-NEXT: vextracti128 $1, %ymm1, %xmm2 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = <0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u> ; AVX2-NEXT: vpshufb %xmm3, %xmm2, %xmm2 Index: test/CodeGen/X86/vector-idiv-udiv-256.ll =================================================================== --- test/CodeGen/X86/vector-idiv-udiv-256.ll +++ test/CodeGen/X86/vector-idiv-udiv-256.ll @@ -470,7 +470,7 @@ ; AVX1-NEXT: vmovdqa {{.*#+}} xmm4 = [63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63] ; AVX1-NEXT: vpand %xmm4, %xmm3, %xmm3 ; AVX1-NEXT: vpmovsxbw %xmm3, %xmm6 -; AVX1-NEXT: vpmovsxbw {{.*}}(%rip), %xmm7 +; AVX1-NEXT: vmovdqa {{.*#+}} xmm7 = [7,7,7,7,7,7,7,7] ; AVX1-NEXT: vpmullw %xmm7, %xmm6, %xmm6 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm5 = [255,255,255,255,255,255,255,255] ; AVX1-NEXT: vpand %xmm5, %xmm6, %xmm6 @@ -530,7 +530,7 @@ ; AVX2-NEXT: vpand {{.*}}(%rip), %ymm1, %ymm1 ; AVX2-NEXT: vextracti128 $1, %ymm1, %xmm2 ; AVX2-NEXT: vpmovsxbw %xmm2, %ymm2 -; AVX2-NEXT: vpmovsxbw {{.*}}(%rip), %ymm3 +; AVX2-NEXT: vmovdqa {{.*#+}} ymm3 = [7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7] ; AVX2-NEXT: vpmullw %ymm3, %ymm2, %ymm2 ; AVX2-NEXT: vextracti128 $1, %ymm2, %xmm4 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm5 = <0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u> Index: test/CodeGen/X86/vector-idiv-udiv-512.ll =================================================================== --- test/CodeGen/X86/vector-idiv-udiv-512.ll +++ test/CodeGen/X86/vector-idiv-udiv-512.ll @@ -1277,7 +1277,7 @@ ; AVX512F-NEXT: vmovdqa {{.*#+}} ymm6 = [63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63] ; AVX512F-NEXT: vpand %ymm6, %ymm3, %ymm7 ; AVX512F-NEXT: vpmovsxbw %xmm7, %ymm8 -; AVX512F-NEXT: vpmovsxbw {{.*}}(%rip), %ymm3 +; AVX512F-NEXT: vmovdqa {{.*#+}} ymm3 = [7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7] ; AVX512F-NEXT: vpmullw %ymm3, %ymm8, %ymm8 ; AVX512F-NEXT: vpmovsxwd %ymm8, %zmm8 ; AVX512F-NEXT: vpmovdb %zmm8, %xmm8