Index: llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAG.cpp =================================================================== --- llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -3805,6 +3805,38 @@ return (computeKnownBits(A).Zero | computeKnownBits(B).Zero).isAllOnesValue(); } +static SDValue FoldBUILD_VECTOR(const SDLoc &DL, EVT VT, + ArrayRef Ops, + SelectionDAG &DAG) { + int NumOps = Ops.size(); + assert(NumOps != 0 && "Can't build an empty vector!"); + assert(VT.getVectorNumElements() == NumOps && + "Incorrect element count in BUILD_VECTOR!"); + + // BUILD_VECTOR of UNDEFs is UNDEF. + if (llvm::all_of(Ops, [](SDValue Op) { return Op.isUndef(); })) + return DAG.getUNDEF(VT); + + // BUILD_VECTOR of seq extract/insert from the same vector + type is Identity. + SDValue IdentitySrc; + bool IsIdentity = true; + for (int i = 0; i != NumOps; ++i) { + if (Ops[i].getOpcode() != ISD::EXTRACT_VECTOR_ELT || + Ops[i].getOperand(0).getValueType() != VT || + (IdentitySrc && Ops[i].getOperand(0) != IdentitySrc) || + !isa(Ops[i].getOperand(1)) || + cast(Ops[i].getOperand(1))->getAPIntValue() != i) { + IsIdentity = false; + break; + } + IdentitySrc = Ops[i].getOperand(0); + } + if (IsIdentity) + return IdentitySrc; + + return SDValue(); +} + static SDValue FoldCONCAT_VECTORS(const SDLoc &DL, EVT VT, ArrayRef Ops, SelectionDAG &DAG) { @@ -4059,6 +4091,13 @@ case ISD::MERGE_VALUES: case ISD::CONCAT_VECTORS: return Operand; // Factor, merge or concat of one node? No need. + case ISD::BUILD_VECTOR: { + // Attempt to simplify BUILD_VECTOR. + SDValue Ops[] = {Operand}; + if (SDValue V = FoldBUILD_VECTOR(DL, VT, Ops, *this)) + return V; + break; + } case ISD::FP_ROUND: llvm_unreachable("Invalid method to make FP_ROUND node"); case ISD::FP_EXTEND: assert(VT.isFloatingPoint() && @@ -4548,6 +4587,13 @@ if (N2.getOpcode() == ISD::EntryToken) return N1; if (N1 == N2) return N1; break; + case ISD::BUILD_VECTOR: { + // Attempt to simplify BUILD_VECTOR. + SDValue Ops[] = {N1, N2}; + if (SDValue V = FoldBUILD_VECTOR(DL, VT, Ops, *this)) + return V; + break; + } case ISD::CONCAT_VECTORS: { // Attempt to fold CONCAT_VECTORS into BUILD_VECTOR or UNDEF. SDValue Ops[] = {N1, N2}; @@ -5019,6 +5065,13 @@ } break; } + case ISD::BUILD_VECTOR: { + // Attempt to simplify BUILD_VECTOR. + SDValue Ops[] = {N1, N2, N3}; + if (SDValue V = FoldBUILD_VECTOR(DL, VT, Ops, *this)) + return V; + break; + } case ISD::CONCAT_VECTORS: { // Attempt to fold CONCAT_VECTORS into BUILD_VECTOR or UNDEF. SDValue Ops[] = {N1, N2, N3}; @@ -6788,6 +6841,11 @@ switch (Opcode) { default: break; + case ISD::BUILD_VECTOR: + // Attempt to simplify BUILD_VECTOR. + if (SDValue V = FoldBUILD_VECTOR(DL, VT, Ops, *this)) + return V; + break; case ISD::CONCAT_VECTORS: // Attempt to fold CONCAT_VECTORS into BUILD_VECTOR or UNDEF. if (SDValue V = FoldCONCAT_VECTORS(DL, VT, Ops, *this)) Index: llvm/trunk/lib/Target/AMDGPU/R600ISelLowering.cpp =================================================================== --- llvm/trunk/lib/Target/AMDGPU/R600ISelLowering.cpp +++ llvm/trunk/lib/Target/AMDGPU/R600ISelLowering.cpp @@ -1685,14 +1685,15 @@ static SDValue CompactSwizzlableVector( SelectionDAG &DAG, SDValue VectorEntry, DenseMap &RemapSwizzle) { - assert(VectorEntry.getOpcode() == ISD::BUILD_VECTOR); assert(RemapSwizzle.empty()); - SDValue NewBldVec[4] = { - VectorEntry.getOperand(0), - VectorEntry.getOperand(1), - VectorEntry.getOperand(2), - VectorEntry.getOperand(3) - }; + + SDLoc DL(VectorEntry); + EVT EltTy = VectorEntry.getValueType().getVectorElementType(); + + SDValue NewBldVec[4]; + for (unsigned i = 0; i < 4; i++) + NewBldVec[i] = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltTy, VectorEntry, + DAG.getIntPtrConstant(i, DL)); for (unsigned i = 0; i < 4; i++) { if (NewBldVec[i].isUndef()) @@ -1727,15 +1728,17 @@ static SDValue ReorganizeVector(SelectionDAG &DAG, SDValue VectorEntry, DenseMap &RemapSwizzle) { - assert(VectorEntry.getOpcode() == ISD::BUILD_VECTOR); assert(RemapSwizzle.empty()); - SDValue NewBldVec[4] = { - VectorEntry.getOperand(0), - VectorEntry.getOperand(1), - VectorEntry.getOperand(2), - VectorEntry.getOperand(3) - }; - bool isUnmovable[4] = { false, false, false, false }; + + SDLoc DL(VectorEntry); + EVT EltTy = VectorEntry.getValueType().getVectorElementType(); + + SDValue NewBldVec[4]; + bool isUnmovable[4] = {false, false, false, false}; + for (unsigned i = 0; i < 4; i++) + NewBldVec[i] = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltTy, VectorEntry, + DAG.getIntPtrConstant(i, DL)); + for (unsigned i = 0; i < 4; i++) { RemapSwizzle[i] = i; if (NewBldVec[i].getOpcode() == ISD::EXTRACT_VECTOR_ELT) { @@ -1766,7 +1769,6 @@ SDValue R600TargetLowering::OptimizeSwizzle(SDValue BuildVector, SDValue Swz[4], SelectionDAG &DAG, const SDLoc &DL) const { - assert(BuildVector.getOpcode() == ISD::BUILD_VECTOR); // Old -> New swizzle values DenseMap SwizzleRemap; Index: llvm/trunk/test/CodeGen/AMDGPU/load-local-i16.ll =================================================================== --- llvm/trunk/test/CodeGen/AMDGPU/load-local-i16.ll +++ llvm/trunk/test/CodeGen/AMDGPU/load-local-i16.ll @@ -96,8 +96,8 @@ ; GFX9-NOT: m0 ; SICIVI: s_mov_b32 m0 -; GCN-DAG: ds_read2_b64 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}} offset1:3{{$}} -; GCN-DAG: ds_read2_b64 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}} offset0:1 offset1:2{{$}} +; GCN-DAG: ds_read2_b64 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}} offset0:2 offset1:3{{$}} +; GCN-DAG: ds_read2_b64 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}} offset1:1{{$}} ; EG: LDS_READ_RET Index: llvm/trunk/test/CodeGen/X86/clear_upper_vector_element_bits.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/clear_upper_vector_element_bits.ll +++ llvm/trunk/test/CodeGen/X86/clear_upper_vector_element_bits.ll @@ -296,59 +296,10 @@ } define <16 x i8> @_clearupper16xi8a(<16 x i8>) nounwind { -; SSE2-LABEL: _clearupper16xi8a: -; SSE2: # %bb.0: -; SSE2-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp) -; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax -; SSE2-NEXT: movd %eax, %xmm0 -; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax -; SSE2-NEXT: movd %eax, %xmm1 -; SSE2-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7] -; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax -; SSE2-NEXT: movd %eax, %xmm0 -; SSE2-NEXT: movd {{.*#+}} xmm2 = mem[0],zero,zero,zero -; SSE2-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7] -; SSE2-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3] -; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax -; SSE2-NEXT: movd %eax, %xmm0 -; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax -; SSE2-NEXT: movd %eax, %xmm3 -; SSE2-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1],xmm3[2],xmm0[2],xmm3[3],xmm0[3],xmm3[4],xmm0[4],xmm3[5],xmm0[5],xmm3[6],xmm0[6],xmm3[7],xmm0[7] -; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax -; SSE2-NEXT: movd %eax, %xmm0 -; SSE2-NEXT: movd {{.*#+}} xmm1 = mem[0],zero,zero,zero -; SSE2-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7] -; SSE2-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1],xmm1[2],xmm3[2],xmm1[3],xmm3[3] -; SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1] -; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax -; SSE2-NEXT: movd %eax, %xmm0 -; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax -; SSE2-NEXT: movd %eax, %xmm2 -; SSE2-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7] -; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax -; SSE2-NEXT: movd %eax, %xmm0 -; SSE2-NEXT: movd {{.*#+}} xmm3 = mem[0],zero,zero,zero -; SSE2-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1],xmm3[2],xmm0[2],xmm3[3],xmm0[3],xmm3[4],xmm0[4],xmm3[5],xmm0[5],xmm3[6],xmm0[6],xmm3[7],xmm0[7] -; SSE2-NEXT: punpcklwd {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3] -; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax -; SSE2-NEXT: movd %eax, %xmm0 -; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax -; SSE2-NEXT: movd %eax, %xmm2 -; SSE2-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7] -; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax -; SSE2-NEXT: movd %eax, %xmm4 -; SSE2-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero -; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1],xmm0[2],xmm4[2],xmm0[3],xmm4[3],xmm0[4],xmm4[4],xmm0[5],xmm4[5],xmm0[6],xmm4[6],xmm0[7],xmm4[7] -; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3] -; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1] -; SSE2-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] -; SSE2-NEXT: pand {{.*}}(%rip), %xmm0 -; SSE2-NEXT: retq -; -; SSE42-LABEL: _clearupper16xi8a: -; SSE42: # %bb.0: -; SSE42-NEXT: andps {{.*}}(%rip), %xmm0 -; SSE42-NEXT: retq +; SSE-LABEL: _clearupper16xi8a: +; SSE: # %bb.0: +; SSE-NEXT: andps {{.*}}(%rip), %xmm0 +; SSE-NEXT: retq ; ; AVX-LABEL: _clearupper16xi8a: ; AVX: # %bb.0: @@ -422,107 +373,12 @@ } define <32 x i8> @_clearupper32xi8a(<32 x i8>) nounwind { -; SSE2-LABEL: _clearupper32xi8a: -; SSE2: # %bb.0: -; SSE2-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp) -; SSE2-NEXT: movaps %xmm1, -{{[0-9]+}}(%rsp) -; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax -; SSE2-NEXT: movd %eax, %xmm0 -; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax -; SSE2-NEXT: movd %eax, %xmm1 -; SSE2-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7] -; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax -; SSE2-NEXT: movd %eax, %xmm0 -; SSE2-NEXT: movd {{.*#+}} xmm2 = mem[0],zero,zero,zero -; SSE2-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7] -; SSE2-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3] -; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax -; SSE2-NEXT: movd %eax, %xmm0 -; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax -; SSE2-NEXT: movd %eax, %xmm3 -; SSE2-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1],xmm3[2],xmm0[2],xmm3[3],xmm0[3],xmm3[4],xmm0[4],xmm3[5],xmm0[5],xmm3[6],xmm0[6],xmm3[7],xmm0[7] -; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax -; SSE2-NEXT: movd %eax, %xmm0 -; SSE2-NEXT: movd {{.*#+}} xmm1 = mem[0],zero,zero,zero -; SSE2-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7] -; SSE2-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1],xmm1[2],xmm3[2],xmm1[3],xmm3[3] -; SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1] -; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax -; SSE2-NEXT: movd %eax, %xmm0 -; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax -; SSE2-NEXT: movd %eax, %xmm2 -; SSE2-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7] -; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax -; SSE2-NEXT: movd %eax, %xmm0 -; SSE2-NEXT: movd {{.*#+}} xmm3 = mem[0],zero,zero,zero -; SSE2-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1],xmm3[2],xmm0[2],xmm3[3],xmm0[3],xmm3[4],xmm0[4],xmm3[5],xmm0[5],xmm3[6],xmm0[6],xmm3[7],xmm0[7] -; SSE2-NEXT: punpcklwd {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3] -; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax -; SSE2-NEXT: movd %eax, %xmm0 -; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax -; SSE2-NEXT: movd %eax, %xmm2 -; SSE2-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7] -; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax -; SSE2-NEXT: movd %eax, %xmm4 -; SSE2-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero -; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1],xmm0[2],xmm4[2],xmm0[3],xmm4[3],xmm0[4],xmm4[4],xmm0[5],xmm4[5],xmm0[6],xmm4[6],xmm0[7],xmm4[7] -; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3] -; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1] -; SSE2-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] -; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] -; SSE2-NEXT: pand %xmm2, %xmm0 -; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax -; SSE2-NEXT: movd %eax, %xmm1 -; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax -; SSE2-NEXT: movd %eax, %xmm3 -; SSE2-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm1[0],xmm3[1],xmm1[1],xmm3[2],xmm1[2],xmm3[3],xmm1[3],xmm3[4],xmm1[4],xmm3[5],xmm1[5],xmm3[6],xmm1[6],xmm3[7],xmm1[7] -; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax -; SSE2-NEXT: movd %eax, %xmm1 -; SSE2-NEXT: movd {{.*#+}} xmm4 = mem[0],zero,zero,zero -; SSE2-NEXT: punpcklbw {{.*#+}} xmm4 = xmm4[0],xmm1[0],xmm4[1],xmm1[1],xmm4[2],xmm1[2],xmm4[3],xmm1[3],xmm4[4],xmm1[4],xmm4[5],xmm1[5],xmm4[6],xmm1[6],xmm4[7],xmm1[7] -; SSE2-NEXT: punpcklwd {{.*#+}} xmm4 = xmm4[0],xmm3[0],xmm4[1],xmm3[1],xmm4[2],xmm3[2],xmm4[3],xmm3[3] -; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax -; SSE2-NEXT: movd %eax, %xmm1 -; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax -; SSE2-NEXT: movd %eax, %xmm5 -; SSE2-NEXT: punpcklbw {{.*#+}} xmm5 = xmm5[0],xmm1[0],xmm5[1],xmm1[1],xmm5[2],xmm1[2],xmm5[3],xmm1[3],xmm5[4],xmm1[4],xmm5[5],xmm1[5],xmm5[6],xmm1[6],xmm5[7],xmm1[7] -; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax -; SSE2-NEXT: movd %eax, %xmm1 -; SSE2-NEXT: movd {{.*#+}} xmm3 = mem[0],zero,zero,zero -; SSE2-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm1[0],xmm3[1],xmm1[1],xmm3[2],xmm1[2],xmm3[3],xmm1[3],xmm3[4],xmm1[4],xmm3[5],xmm1[5],xmm3[6],xmm1[6],xmm3[7],xmm1[7] -; SSE2-NEXT: punpcklwd {{.*#+}} xmm3 = xmm3[0],xmm5[0],xmm3[1],xmm5[1],xmm3[2],xmm5[2],xmm3[3],xmm5[3] -; SSE2-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm4[0],xmm3[1],xmm4[1] -; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax -; SSE2-NEXT: movd %eax, %xmm1 -; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax -; SSE2-NEXT: movd %eax, %xmm4 -; SSE2-NEXT: punpcklbw {{.*#+}} xmm4 = xmm4[0],xmm1[0],xmm4[1],xmm1[1],xmm4[2],xmm1[2],xmm4[3],xmm1[3],xmm4[4],xmm1[4],xmm4[5],xmm1[5],xmm4[6],xmm1[6],xmm4[7],xmm1[7] -; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax -; SSE2-NEXT: movd %eax, %xmm1 -; SSE2-NEXT: movd {{.*#+}} xmm5 = mem[0],zero,zero,zero -; SSE2-NEXT: punpcklbw {{.*#+}} xmm5 = xmm5[0],xmm1[0],xmm5[1],xmm1[1],xmm5[2],xmm1[2],xmm5[3],xmm1[3],xmm5[4],xmm1[4],xmm5[5],xmm1[5],xmm5[6],xmm1[6],xmm5[7],xmm1[7] -; SSE2-NEXT: punpcklwd {{.*#+}} xmm5 = xmm5[0],xmm4[0],xmm5[1],xmm4[1],xmm5[2],xmm4[2],xmm5[3],xmm4[3] -; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax -; SSE2-NEXT: movd %eax, %xmm1 -; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax -; SSE2-NEXT: movd %eax, %xmm4 -; SSE2-NEXT: punpcklbw {{.*#+}} xmm4 = xmm4[0],xmm1[0],xmm4[1],xmm1[1],xmm4[2],xmm1[2],xmm4[3],xmm1[3],xmm4[4],xmm1[4],xmm4[5],xmm1[5],xmm4[6],xmm1[6],xmm4[7],xmm1[7] -; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax -; SSE2-NEXT: movd %eax, %xmm6 -; SSE2-NEXT: movd {{.*#+}} xmm1 = mem[0],zero,zero,zero -; SSE2-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm6[0],xmm1[1],xmm6[1],xmm1[2],xmm6[2],xmm1[3],xmm6[3],xmm1[4],xmm6[4],xmm1[5],xmm6[5],xmm1[6],xmm6[6],xmm1[7],xmm6[7] -; SSE2-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm4[0],xmm1[1],xmm4[1],xmm1[2],xmm4[2],xmm1[3],xmm4[3] -; SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm5[0],xmm1[1],xmm5[1] -; SSE2-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm3[0] -; SSE2-NEXT: pand %xmm2, %xmm1 -; SSE2-NEXT: retq -; -; SSE42-LABEL: _clearupper32xi8a: -; SSE42: # %bb.0: -; SSE42-NEXT: movaps {{.*#+}} xmm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] -; SSE42-NEXT: andps %xmm2, %xmm0 -; SSE42-NEXT: andps %xmm2, %xmm1 -; SSE42-NEXT: retq +; SSE-LABEL: _clearupper32xi8a: +; SSE: # %bb.0: +; SSE-NEXT: movaps {{.*#+}} xmm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] +; SSE-NEXT: andps %xmm2, %xmm0 +; SSE-NEXT: andps %xmm2, %xmm1 +; SSE-NEXT: retq ; ; AVX-LABEL: _clearupper32xi8a: ; AVX: # %bb.0: Index: llvm/trunk/test/CodeGen/X86/lower-vec-shift.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/lower-vec-shift.ll +++ llvm/trunk/test/CodeGen/X86/lower-vec-shift.ll @@ -234,8 +234,6 @@ ; SSE-LABEL: test10: ; SSE: # %bb.0: ; SSE-NEXT: movdqa (%rdi), %xmm0 -; SSE-NEXT: movdqa 16(%rdi), %xmm1 -; SSE-NEXT: psrad %xmm0, %xmm1 ; SSE-NEXT: psrad $1, %xmm0 ; SSE-NEXT: retq ;