Index: llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp =================================================================== --- llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -16933,12 +16933,15 @@ SDValue N0 = SVN->getOperand(0); SDValue N1 = SVN->getOperand(1); - if (!N0->hasOneUse() || !N1->hasOneUse()) + if (!N0->hasOneUse()) return SDValue(); // If only one of N1,N2 is constant, bail out if it is not ALL_ZEROS as // discussed above. if (!N1.isUndef()) { + if (!N1->hasOneUse()) + return SDValue(); + bool N0AnyConst = isAnyConstantBuildVector(N0.getNode()); bool N1AnyConst = isAnyConstantBuildVector(N1.getNode()); if (N0AnyConst && !N1AnyConst && !ISD::isBuildVectorAllZeros(N0.getNode())) @@ -17371,7 +17374,7 @@ // Attempt to combine a shuffle of 2 inputs of 'scalar sources' - // BUILD_VECTOR or SCALAR_TO_VECTOR into a single BUILD_VECTOR. - if (Level < AfterLegalizeVectorOps && TLI.isTypeLegal(VT)) + if (Level < AfterLegalizeDAG && TLI.isTypeLegal(VT)) if (SDValue Res = combineShuffleOfScalars(SVN, DAG, TLI)) return Res; Index: llvm/trunk/test/CodeGen/Mips/cconv/vector.ll =================================================================== --- llvm/trunk/test/CodeGen/Mips/cconv/vector.ll +++ llvm/trunk/test/CodeGen/Mips/cconv/vector.ll @@ -61,19 +61,15 @@ ; MIPS32R5EB-NEXT: sw $5, 36($sp) ; MIPS32R5EB-NEXT: sw $4, 40($sp) ; MIPS32R5EB-NEXT: lbu $1, 37($sp) -; MIPS32R5EB-NEXT: sw $1, 20($sp) +; MIPS32R5EB-NEXT: sw $1, 28($sp) ; MIPS32R5EB-NEXT: lbu $1, 36($sp) -; MIPS32R5EB-NEXT: sw $1, 16($sp) +; MIPS32R5EB-NEXT: sw $1, 20($sp) +; MIPS32R5EB-NEXT: lbu $1, 41($sp) +; MIPS32R5EB-NEXT: sw $1, 12($sp) ; MIPS32R5EB-NEXT: lbu $1, 40($sp) -; MIPS32R5EB-NEXT: lbu $2, 41($sp) -; MIPS32R5EB-NEXT: sw $2, 4($sp) -; MIPS32R5EB-NEXT: sw $1, 0($sp) -; MIPS32R5EB-NEXT: ld.w $w0, 16($sp) -; MIPS32R5EB-NEXT: ilvr.w $w0, $w0, $w0 -; MIPS32R5EB-NEXT: shf.w $w0, $w0, 177 -; MIPS32R5EB-NEXT: ld.w $w1, 0($sp) -; MIPS32R5EB-NEXT: ilvr.w $w1, $w1, $w1 -; MIPS32R5EB-NEXT: shf.w $w1, $w1, 177 +; MIPS32R5EB-NEXT: sw $1, 4($sp) +; MIPS32R5EB-NEXT: ld.d $w0, 16($sp) +; MIPS32R5EB-NEXT: ld.d $w1, 0($sp) ; MIPS32R5EB-NEXT: addv.d $w0, $w1, $w0 ; MIPS32R5EB-NEXT: shf.w $w0, $w0, 177 ; MIPS32R5EB-NEXT: copy_s.w $1, $w0[1] @@ -166,17 +162,15 @@ ; MIPS32R5EL-NEXT: sw $5, 36($sp) ; MIPS32R5EL-NEXT: sw $4, 40($sp) ; MIPS32R5EL-NEXT: lbu $1, 37($sp) -; MIPS32R5EL-NEXT: sw $1, 20($sp) +; MIPS32R5EL-NEXT: sw $1, 24($sp) ; MIPS32R5EL-NEXT: lbu $1, 36($sp) ; MIPS32R5EL-NEXT: sw $1, 16($sp) ; MIPS32R5EL-NEXT: lbu $1, 41($sp) -; MIPS32R5EL-NEXT: sw $1, 4($sp) +; MIPS32R5EL-NEXT: sw $1, 8($sp) ; MIPS32R5EL-NEXT: lbu $1, 40($sp) ; MIPS32R5EL-NEXT: sw $1, 0($sp) -; MIPS32R5EL-NEXT: ld.w $w0, 16($sp) -; MIPS32R5EL-NEXT: ilvr.w $w0, $w0, $w0 -; MIPS32R5EL-NEXT: ld.w $w1, 0($sp) -; MIPS32R5EL-NEXT: ilvr.w $w1, $w1, $w1 +; MIPS32R5EL-NEXT: ld.d $w0, 16($sp) +; MIPS32R5EL-NEXT: ld.d $w1, 0($sp) ; MIPS32R5EL-NEXT: addv.d $w0, $w1, $w0 ; MIPS32R5EL-NEXT: copy_s.w $1, $w0[0] ; MIPS32R5EL-NEXT: copy_s.w $2, $w0[2] @@ -327,61 +321,47 @@ ; MIPS32R5EB-NEXT: sw $5, 132($sp) ; MIPS32R5EB-NEXT: sw $4, 136($sp) ; MIPS32R5EB-NEXT: lbu $1, 133($sp) -; MIPS32R5EB-NEXT: sw $1, 68($sp) +; MIPS32R5EB-NEXT: sw $1, 76($sp) ; MIPS32R5EB-NEXT: lbu $1, 132($sp) -; MIPS32R5EB-NEXT: sw $1, 64($sp) +; MIPS32R5EB-NEXT: sw $1, 68($sp) +; MIPS32R5EB-NEXT: lbu $1, 137($sp) +; MIPS32R5EB-NEXT: sw $1, 60($sp) ; MIPS32R5EB-NEXT: lbu $1, 136($sp) -; MIPS32R5EB-NEXT: lbu $2, 137($sp) -; MIPS32R5EB-NEXT: sw $2, 52($sp) -; MIPS32R5EB-NEXT: sw $1, 48($sp) -; MIPS32R5EB-NEXT: ld.w $w0, 64($sp) -; MIPS32R5EB-NEXT: ilvr.w $w0, $w0, $w0 -; MIPS32R5EB-NEXT: shf.w $w0, $w0, 177 -; MIPS32R5EB-NEXT: ld.w $w1, 48($sp) -; MIPS32R5EB-NEXT: ilvr.w $w1, $w1, $w1 -; MIPS32R5EB-NEXT: shf.w $w1, $w1, 177 +; MIPS32R5EB-NEXT: sw $1, 52($sp) +; MIPS32R5EB-NEXT: ld.d $w0, 64($sp) +; MIPS32R5EB-NEXT: ld.d $w1, 48($sp) ; MIPS32R5EB-NEXT: addv.d $w0, $w1, $w0 ; MIPS32R5EB-NEXT: sw $6, 128($sp) ; MIPS32R5EB-NEXT: lbu $1, 129($sp) -; MIPS32R5EB-NEXT: sw $1, 84($sp) +; MIPS32R5EB-NEXT: sw $1, 92($sp) ; MIPS32R5EB-NEXT: lbu $1, 128($sp) -; MIPS32R5EB-NEXT: sw $1, 80($sp) -; MIPS32R5EB-NEXT: ld.w $w1, 80($sp) -; MIPS32R5EB-NEXT: ilvr.w $w1, $w1, $w1 -; MIPS32R5EB-NEXT: shf.w $w1, $w1, 177 +; MIPS32R5EB-NEXT: sw $1, 84($sp) +; MIPS32R5EB-NEXT: ld.d $w1, 80($sp) ; MIPS32R5EB-NEXT: addv.d $w0, $w0, $w1 ; MIPS32R5EB-NEXT: sw $7, 124($sp) ; MIPS32R5EB-NEXT: lbu $1, 125($sp) -; MIPS32R5EB-NEXT: sw $1, 100($sp) +; MIPS32R5EB-NEXT: sw $1, 108($sp) ; MIPS32R5EB-NEXT: lbu $1, 124($sp) -; MIPS32R5EB-NEXT: sw $1, 96($sp) -; MIPS32R5EB-NEXT: ld.w $w1, 96($sp) -; MIPS32R5EB-NEXT: ilvr.w $w1, $w1, $w1 -; MIPS32R5EB-NEXT: shf.w $w1, $w1, 177 +; MIPS32R5EB-NEXT: sw $1, 100($sp) +; MIPS32R5EB-NEXT: ld.d $w1, 96($sp) ; MIPS32R5EB-NEXT: addv.d $w0, $w0, $w1 ; MIPS32R5EB-NEXT: lbu $1, 161($fp) -; MIPS32R5EB-NEXT: sw $1, 4($sp) +; MIPS32R5EB-NEXT: sw $1, 12($sp) ; MIPS32R5EB-NEXT: lbu $1, 160($fp) -; MIPS32R5EB-NEXT: sw $1, 0($sp) -; MIPS32R5EB-NEXT: ld.w $w1, 0($sp) -; MIPS32R5EB-NEXT: ilvr.w $w1, $w1, $w1 -; MIPS32R5EB-NEXT: shf.w $w1, $w1, 177 +; MIPS32R5EB-NEXT: sw $1, 4($sp) +; MIPS32R5EB-NEXT: ld.d $w1, 0($sp) ; MIPS32R5EB-NEXT: addv.d $w0, $w0, $w1 ; MIPS32R5EB-NEXT: lbu $1, 165($fp) -; MIPS32R5EB-NEXT: sw $1, 20($sp) +; MIPS32R5EB-NEXT: sw $1, 28($sp) ; MIPS32R5EB-NEXT: lbu $1, 164($fp) -; MIPS32R5EB-NEXT: sw $1, 16($sp) -; MIPS32R5EB-NEXT: ld.w $w1, 16($sp) -; MIPS32R5EB-NEXT: ilvr.w $w1, $w1, $w1 -; MIPS32R5EB-NEXT: shf.w $w1, $w1, 177 +; MIPS32R5EB-NEXT: sw $1, 20($sp) +; MIPS32R5EB-NEXT: ld.d $w1, 16($sp) ; MIPS32R5EB-NEXT: addv.d $w0, $w0, $w1 ; MIPS32R5EB-NEXT: lbu $1, 169($fp) -; MIPS32R5EB-NEXT: sw $1, 36($sp) +; MIPS32R5EB-NEXT: sw $1, 44($sp) ; MIPS32R5EB-NEXT: lbu $1, 168($fp) -; MIPS32R5EB-NEXT: sw $1, 32($sp) -; MIPS32R5EB-NEXT: ld.w $w1, 32($sp) -; MIPS32R5EB-NEXT: ilvr.w $w1, $w1, $w1 -; MIPS32R5EB-NEXT: shf.w $w1, $w1, 177 +; MIPS32R5EB-NEXT: sw $1, 36($sp) +; MIPS32R5EB-NEXT: ld.d $w1, 32($sp) ; MIPS32R5EB-NEXT: addv.d $w0, $w0, $w1 ; MIPS32R5EB-NEXT: shf.w $w0, $w0, 177 ; MIPS32R5EB-NEXT: copy_s.w $1, $w0[1] @@ -579,54 +559,47 @@ ; MIPS32R5EL-NEXT: sw $5, 132($sp) ; MIPS32R5EL-NEXT: sw $4, 136($sp) ; MIPS32R5EL-NEXT: lbu $1, 133($sp) -; MIPS32R5EL-NEXT: sw $1, 68($sp) +; MIPS32R5EL-NEXT: sw $1, 72($sp) ; MIPS32R5EL-NEXT: lbu $1, 132($sp) ; MIPS32R5EL-NEXT: sw $1, 64($sp) ; MIPS32R5EL-NEXT: lbu $1, 137($sp) -; MIPS32R5EL-NEXT: sw $1, 52($sp) +; MIPS32R5EL-NEXT: sw $1, 56($sp) ; MIPS32R5EL-NEXT: lbu $1, 136($sp) ; MIPS32R5EL-NEXT: sw $1, 48($sp) -; MIPS32R5EL-NEXT: ld.w $w0, 64($sp) -; MIPS32R5EL-NEXT: ilvr.w $w0, $w0, $w0 -; MIPS32R5EL-NEXT: ld.w $w1, 48($sp) -; MIPS32R5EL-NEXT: ilvr.w $w1, $w1, $w1 +; MIPS32R5EL-NEXT: ld.d $w0, 64($sp) +; MIPS32R5EL-NEXT: ld.d $w1, 48($sp) ; MIPS32R5EL-NEXT: addv.d $w0, $w1, $w0 ; MIPS32R5EL-NEXT: sw $6, 128($sp) ; MIPS32R5EL-NEXT: lbu $1, 129($sp) -; MIPS32R5EL-NEXT: sw $1, 84($sp) +; MIPS32R5EL-NEXT: sw $1, 88($sp) ; MIPS32R5EL-NEXT: lbu $1, 128($sp) ; MIPS32R5EL-NEXT: sw $1, 80($sp) -; MIPS32R5EL-NEXT: ld.w $w1, 80($sp) -; MIPS32R5EL-NEXT: ilvr.w $w1, $w1, $w1 +; MIPS32R5EL-NEXT: ld.d $w1, 80($sp) ; MIPS32R5EL-NEXT: addv.d $w0, $w0, $w1 ; MIPS32R5EL-NEXT: sw $7, 124($sp) ; MIPS32R5EL-NEXT: lbu $1, 125($sp) -; MIPS32R5EL-NEXT: sw $1, 100($sp) +; MIPS32R5EL-NEXT: sw $1, 104($sp) ; MIPS32R5EL-NEXT: lbu $1, 124($sp) ; MIPS32R5EL-NEXT: sw $1, 96($sp) -; MIPS32R5EL-NEXT: ld.w $w1, 96($sp) -; MIPS32R5EL-NEXT: ilvr.w $w1, $w1, $w1 +; MIPS32R5EL-NEXT: ld.d $w1, 96($sp) ; MIPS32R5EL-NEXT: addv.d $w0, $w0, $w1 ; MIPS32R5EL-NEXT: lbu $1, 161($fp) -; MIPS32R5EL-NEXT: sw $1, 4($sp) +; MIPS32R5EL-NEXT: sw $1, 8($sp) ; MIPS32R5EL-NEXT: lbu $1, 160($fp) ; MIPS32R5EL-NEXT: sw $1, 0($sp) -; MIPS32R5EL-NEXT: ld.w $w1, 0($sp) -; MIPS32R5EL-NEXT: ilvr.w $w1, $w1, $w1 +; MIPS32R5EL-NEXT: ld.d $w1, 0($sp) ; MIPS32R5EL-NEXT: addv.d $w0, $w0, $w1 ; MIPS32R5EL-NEXT: lbu $1, 165($fp) -; MIPS32R5EL-NEXT: sw $1, 20($sp) +; MIPS32R5EL-NEXT: sw $1, 24($sp) ; MIPS32R5EL-NEXT: lbu $1, 164($fp) ; MIPS32R5EL-NEXT: sw $1, 16($sp) -; MIPS32R5EL-NEXT: ld.w $w1, 16($sp) -; MIPS32R5EL-NEXT: ilvr.w $w1, $w1, $w1 +; MIPS32R5EL-NEXT: ld.d $w1, 16($sp) ; MIPS32R5EL-NEXT: addv.d $w0, $w0, $w1 ; MIPS32R5EL-NEXT: lbu $1, 169($fp) -; MIPS32R5EL-NEXT: sw $1, 36($sp) +; MIPS32R5EL-NEXT: sw $1, 40($sp) ; MIPS32R5EL-NEXT: lbu $1, 168($fp) ; MIPS32R5EL-NEXT: sw $1, 32($sp) -; MIPS32R5EL-NEXT: ld.w $w1, 32($sp) -; MIPS32R5EL-NEXT: ilvr.w $w1, $w1, $w1 +; MIPS32R5EL-NEXT: ld.d $w1, 32($sp) ; MIPS32R5EL-NEXT: addv.d $w0, $w0, $w1 ; MIPS32R5EL-NEXT: copy_s.w $1, $w0[0] ; MIPS32R5EL-NEXT: copy_s.w $2, $w0[2] Index: llvm/trunk/test/CodeGen/X86/known-signbits-vector.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/known-signbits-vector.ll +++ llvm/trunk/test/CodeGen/X86/known-signbits-vector.ll @@ -28,10 +28,9 @@ ; X32-NEXT: movswl {{[0-9]+}}(%esp), %eax ; X32-NEXT: movsbl {{[0-9]+}}(%esp), %ecx ; X32-NEXT: vmovd %ecx, %xmm0 -; X32-NEXT: vpinsrd $2, %eax, %xmm0, %xmm0 -; X32-NEXT: vmovd {{.*#+}} xmm1 = mem[0],zero,zero,zero -; X32-NEXT: vpinsrd $2, {{[0-9]+}}(%esp), %xmm1, %xmm1 -; X32-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[0,2] +; X32-NEXT: vpinsrd $1, %eax, %xmm0, %xmm0 +; X32-NEXT: vpinsrd $2, {{[0-9]+}}(%esp), %xmm0, %xmm0 +; X32-NEXT: vpinsrd $3, {{[0-9]+}}(%esp), %xmm0, %xmm0 ; X32-NEXT: vcvtdq2ps %xmm0, %xmm0 ; X32-NEXT: retl ; Index: llvm/trunk/test/CodeGen/X86/vec_cast.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/vec_cast.ll +++ llvm/trunk/test/CodeGen/X86/vec_cast.ll @@ -28,10 +28,10 @@ define <3 x i32> @b(<3 x i16> %a) nounwind { ; CHECK-LIN-LABEL: b: ; CHECK-LIN: # %bb.0: -; CHECK-LIN-NEXT: movd %edi, %xmm0 -; CHECK-LIN-NEXT: pinsrw $1, %esi, %xmm0 -; CHECK-LIN-NEXT: pinsrw $2, %edx, %xmm0 -; CHECK-LIN-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3] +; CHECK-LIN-NEXT: pxor %xmm0, %xmm0 +; CHECK-LIN-NEXT: pinsrw $1, %edi, %xmm0 +; CHECK-LIN-NEXT: pinsrw $3, %esi, %xmm0 +; CHECK-LIN-NEXT: pinsrw $5, %edx, %xmm0 ; CHECK-LIN-NEXT: psrad $16, %xmm0 ; CHECK-LIN-NEXT: retq ; @@ -40,10 +40,10 @@ ; CHECK-WIN-NEXT: # kill: def $r8w killed $r8w def $r8d ; CHECK-WIN-NEXT: # kill: def $dx killed $dx def $edx ; CHECK-WIN-NEXT: # kill: def $cx killed $cx def $ecx -; CHECK-WIN-NEXT: movd %ecx, %xmm0 -; CHECK-WIN-NEXT: pinsrw $1, %edx, %xmm0 -; CHECK-WIN-NEXT: pinsrw $2, %r8d, %xmm0 -; CHECK-WIN-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3] +; CHECK-WIN-NEXT: pxor %xmm0, %xmm0 +; CHECK-WIN-NEXT: pinsrw $1, %ecx, %xmm0 +; CHECK-WIN-NEXT: pinsrw $3, %edx, %xmm0 +; CHECK-WIN-NEXT: pinsrw $5, %r8d, %xmm0 ; CHECK-WIN-NEXT: psrad $16, %xmm0 ; CHECK-WIN-NEXT: retq %c = sext <3 x i16> %a to <3 x i32> @@ -88,11 +88,10 @@ define <3 x i32> @e(<3 x i16> %a) nounwind { ; CHECK-LIN-LABEL: e: ; CHECK-LIN: # %bb.0: -; CHECK-LIN-NEXT: movd %edi, %xmm0 -; CHECK-LIN-NEXT: pinsrw $1, %esi, %xmm0 -; CHECK-LIN-NEXT: pinsrw $2, %edx, %xmm0 -; CHECK-LIN-NEXT: pxor %xmm1, %xmm1 -; CHECK-LIN-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] +; CHECK-LIN-NEXT: pxor %xmm0, %xmm0 +; CHECK-LIN-NEXT: pinsrw $0, %edi, %xmm0 +; CHECK-LIN-NEXT: pinsrw $2, %esi, %xmm0 +; CHECK-LIN-NEXT: pinsrw $4, %edx, %xmm0 ; CHECK-LIN-NEXT: retq ; ; CHECK-WIN-LABEL: e: @@ -100,11 +99,10 @@ ; CHECK-WIN-NEXT: # kill: def $r8w killed $r8w def $r8d ; CHECK-WIN-NEXT: # kill: def $dx killed $dx def $edx ; CHECK-WIN-NEXT: # kill: def $cx killed $cx def $ecx -; CHECK-WIN-NEXT: movd %ecx, %xmm0 -; CHECK-WIN-NEXT: pinsrw $1, %edx, %xmm0 -; CHECK-WIN-NEXT: pinsrw $2, %r8d, %xmm0 -; CHECK-WIN-NEXT: pxor %xmm1, %xmm1 -; CHECK-WIN-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] +; CHECK-WIN-NEXT: pxor %xmm0, %xmm0 +; CHECK-WIN-NEXT: pinsrw $0, %ecx, %xmm0 +; CHECK-WIN-NEXT: pinsrw $2, %edx, %xmm0 +; CHECK-WIN-NEXT: pinsrw $4, %r8d, %xmm0 ; CHECK-WIN-NEXT: retq %c = zext <3 x i16> %a to <3 x i32> ret <3 x i32> %c Index: llvm/trunk/test/CodeGen/X86/vec_int_to_fp.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/vec_int_to_fp.ll +++ llvm/trunk/test/CodeGen/X86/vec_int_to_fp.ll @@ -1259,29 +1259,29 @@ ; ; VEX-LABEL: sitofp_2i64_to_4f32_zero: ; VEX: # %bb.0: -; VEX-NEXT: vpextrq $1, %xmm0, %rax -; VEX-NEXT: vcvtsi2ssq %rax, %xmm1, %xmm1 ; VEX-NEXT: vmovq %xmm0, %rax +; VEX-NEXT: vcvtsi2ssq %rax, %xmm1, %xmm1 +; VEX-NEXT: vpextrq $1, %xmm0, %rax ; VEX-NEXT: vcvtsi2ssq %rax, %xmm2, %xmm0 -; VEX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],zero,zero +; VEX-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0],xmm0[0],zero,zero ; VEX-NEXT: retq ; ; AVX512F-LABEL: sitofp_2i64_to_4f32_zero: ; AVX512F: # %bb.0: -; AVX512F-NEXT: vpextrq $1, %xmm0, %rax -; AVX512F-NEXT: vcvtsi2ssq %rax, %xmm1, %xmm1 ; AVX512F-NEXT: vmovq %xmm0, %rax +; AVX512F-NEXT: vcvtsi2ssq %rax, %xmm1, %xmm1 +; AVX512F-NEXT: vpextrq $1, %xmm0, %rax ; AVX512F-NEXT: vcvtsi2ssq %rax, %xmm2, %xmm0 -; AVX512F-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],zero,zero +; AVX512F-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0],xmm0[0],zero,zero ; AVX512F-NEXT: retq ; ; AVX512VL-LABEL: sitofp_2i64_to_4f32_zero: ; AVX512VL: # %bb.0: -; AVX512VL-NEXT: vpextrq $1, %xmm0, %rax -; AVX512VL-NEXT: vcvtsi2ssq %rax, %xmm1, %xmm1 ; AVX512VL-NEXT: vmovq %xmm0, %rax +; AVX512VL-NEXT: vcvtsi2ssq %rax, %xmm1, %xmm1 +; AVX512VL-NEXT: vpextrq $1, %xmm0, %rax ; AVX512VL-NEXT: vcvtsi2ssq %rax, %xmm2, %xmm0 -; AVX512VL-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[2,3] +; AVX512VL-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] ; AVX512VL-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero ; AVX512VL-NEXT: retq ; @@ -2058,7 +2058,7 @@ ; ; VEX-LABEL: uitofp_2i64_to_2f32: ; VEX: # %bb.0: -; VEX-NEXT: vpextrq $1, %xmm0, %rax +; VEX-NEXT: vmovq %xmm0, %rax ; VEX-NEXT: testq %rax, %rax ; VEX-NEXT: js .LBB40_1 ; VEX-NEXT: # %bb.2: @@ -2072,12 +2072,12 @@ ; VEX-NEXT: vcvtsi2ssq %rax, %xmm1, %xmm1 ; VEX-NEXT: vaddss %xmm1, %xmm1, %xmm1 ; VEX-NEXT: .LBB40_3: -; VEX-NEXT: vmovq %xmm0, %rax +; VEX-NEXT: vpextrq $1, %xmm0, %rax ; VEX-NEXT: testq %rax, %rax ; VEX-NEXT: js .LBB40_4 ; VEX-NEXT: # %bb.5: ; VEX-NEXT: vcvtsi2ssq %rax, %xmm2, %xmm0 -; VEX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],zero,zero +; VEX-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0],xmm0[0],zero,zero ; VEX-NEXT: retq ; VEX-NEXT: .LBB40_4: ; VEX-NEXT: movq %rax, %rcx @@ -2086,25 +2086,25 @@ ; VEX-NEXT: orq %rcx, %rax ; VEX-NEXT: vcvtsi2ssq %rax, %xmm2, %xmm0 ; VEX-NEXT: vaddss %xmm0, %xmm0, %xmm0 -; VEX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],zero,zero +; VEX-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0],xmm0[0],zero,zero ; VEX-NEXT: retq ; ; AVX512F-LABEL: uitofp_2i64_to_2f32: ; AVX512F: # %bb.0: -; AVX512F-NEXT: vpextrq $1, %xmm0, %rax -; AVX512F-NEXT: vcvtusi2ssq %rax, %xmm1, %xmm1 ; AVX512F-NEXT: vmovq %xmm0, %rax +; AVX512F-NEXT: vcvtusi2ssq %rax, %xmm1, %xmm1 +; AVX512F-NEXT: vpextrq $1, %xmm0, %rax ; AVX512F-NEXT: vcvtusi2ssq %rax, %xmm2, %xmm0 -; AVX512F-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],zero,zero +; AVX512F-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0],xmm0[0],zero,zero ; AVX512F-NEXT: retq ; ; AVX512VL-LABEL: uitofp_2i64_to_2f32: ; AVX512VL: # %bb.0: -; AVX512VL-NEXT: vpextrq $1, %xmm0, %rax -; AVX512VL-NEXT: vcvtusi2ssq %rax, %xmm1, %xmm1 ; AVX512VL-NEXT: vmovq %xmm0, %rax +; AVX512VL-NEXT: vcvtusi2ssq %rax, %xmm1, %xmm1 +; AVX512VL-NEXT: vpextrq $1, %xmm0, %rax ; AVX512VL-NEXT: vcvtusi2ssq %rax, %xmm2, %xmm0 -; AVX512VL-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[2,3] +; AVX512VL-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] ; AVX512VL-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero ; AVX512VL-NEXT: retq ; Index: llvm/trunk/test/CodeGen/X86/widen_conv-3.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/widen_conv-3.ll +++ llvm/trunk/test/CodeGen/X86/widen_conv-3.ll @@ -47,15 +47,16 @@ ; X86-SSE2-NEXT: movd %edx, %xmm0 ; X86-SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] ; X86-SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3] -; X86-SSE2-NEXT: movzbl 2(%ecx), %ecx ; X86-SSE2-NEXT: movdqa %xmm0, (%esp) -; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-SSE2-NEXT: movl (%esp), %edx +; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-SSE2-NEXT: shll $8, %esi ; X86-SSE2-NEXT: shll $8, %edx -; X86-SSE2-NEXT: movzbl (%esp), %esi -; X86-SSE2-NEXT: orl %edx, %esi -; X86-SSE2-NEXT: movd %esi, %xmm0 -; X86-SSE2-NEXT: pinsrw $1, %ecx, %xmm0 -; X86-SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] +; X86-SSE2-NEXT: movd %edx, %xmm0 +; X86-SSE2-NEXT: pinsrw $1, %esi, %xmm0 +; X86-SSE2-NEXT: movzbl 2(%ecx), %ecx +; X86-SSE2-NEXT: shll $8, %ecx +; X86-SSE2-NEXT: pinsrw $2, %ecx, %xmm0 ; X86-SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3] ; X86-SSE2-NEXT: psrad $24, %xmm0 ; X86-SSE2-NEXT: cvtdq2ps %xmm0, %xmm0 @@ -95,15 +96,16 @@ ; X64-SSE2-NEXT: movq %rax, %xmm0 ; X64-SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] ; X64-SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3] -; X64-SSE2-NEXT: movzbl 2(%rsi), %eax ; X64-SSE2-NEXT: movdqa %xmm0, -{{[0-9]+}}(%rsp) +; X64-SSE2-NEXT: movl -{{[0-9]+}}(%rsp), %eax ; X64-SSE2-NEXT: movl -{{[0-9]+}}(%rsp), %ecx ; X64-SSE2-NEXT: shll $8, %ecx -; X64-SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx -; X64-SSE2-NEXT: orl %ecx, %edx -; X64-SSE2-NEXT: movd %edx, %xmm0 -; X64-SSE2-NEXT: pinsrw $1, %eax, %xmm0 -; X64-SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] +; X64-SSE2-NEXT: shll $8, %eax +; X64-SSE2-NEXT: movd %eax, %xmm0 +; X64-SSE2-NEXT: pinsrw $1, %ecx, %xmm0 +; X64-SSE2-NEXT: movzbl 2(%rsi), %eax +; X64-SSE2-NEXT: shll $8, %eax +; X64-SSE2-NEXT: pinsrw $2, %eax, %xmm0 ; X64-SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3] ; X64-SSE2-NEXT: psrad $24, %xmm0 ; X64-SSE2-NEXT: cvtdq2ps %xmm0, %xmm0 Index: llvm/trunk/test/CodeGen/X86/widen_conv-4.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/widen_conv-4.ll +++ llvm/trunk/test/CodeGen/X86/widen_conv-4.ll @@ -77,7 +77,6 @@ ; X86-SSE2: # %bb.0: # %entry ; X86-SSE2-NEXT: pushl %ebp ; X86-SSE2-NEXT: movl %esp, %ebp -; X86-SSE2-NEXT: pushl %esi ; X86-SSE2-NEXT: andl $-16, %esp ; X86-SSE2-NEXT: subl $32, %esp ; X86-SSE2-NEXT: movl 8(%ebp), %eax @@ -88,15 +87,11 @@ ; X86-SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3] ; X86-SSE2-NEXT: movzbl 2(%ecx), %ecx ; X86-SSE2-NEXT: movdqa %xmm0, (%esp) -; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %edx -; X86-SSE2-NEXT: shll $8, %edx -; X86-SSE2-NEXT: movzbl (%esp), %esi -; X86-SSE2-NEXT: orl %edx, %esi -; X86-SSE2-NEXT: movd %esi, %xmm0 -; X86-SSE2-NEXT: pinsrw $1, %ecx, %xmm0 -; X86-SSE2-NEXT: pxor %xmm1, %xmm1 -; X86-SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] -; X86-SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] +; X86-SSE2-NEXT: movzbl (%esp), %edx +; X86-SSE2-NEXT: movd %edx, %xmm0 +; X86-SSE2-NEXT: movzbl {{[0-9]+}}(%esp), %edx +; X86-SSE2-NEXT: pinsrw $2, %edx, %xmm0 +; X86-SSE2-NEXT: pinsrw $4, %ecx, %xmm0 ; X86-SSE2-NEXT: cvtdq2ps %xmm0, %xmm0 ; X86-SSE2-NEXT: movss %xmm0, (%eax) ; X86-SSE2-NEXT: movaps %xmm0, %xmm1 @@ -104,8 +99,7 @@ ; X86-SSE2-NEXT: movss %xmm1, 8(%eax) ; X86-SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1,2,3] ; X86-SSE2-NEXT: movss %xmm0, 4(%eax) -; X86-SSE2-NEXT: leal -4(%ebp), %esp -; X86-SSE2-NEXT: popl %esi +; X86-SSE2-NEXT: movl %ebp, %esp ; X86-SSE2-NEXT: popl %ebp ; X86-SSE2-NEXT: retl ; @@ -135,15 +129,11 @@ ; X64-SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3] ; X64-SSE2-NEXT: movzbl 2(%rsi), %eax ; X64-SSE2-NEXT: movdqa %xmm0, -{{[0-9]+}}(%rsp) -; X64-SSE2-NEXT: movl -{{[0-9]+}}(%rsp), %ecx -; X64-SSE2-NEXT: shll $8, %ecx -; X64-SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx -; X64-SSE2-NEXT: orl %ecx, %edx -; X64-SSE2-NEXT: movd %edx, %xmm0 -; X64-SSE2-NEXT: pinsrw $1, %eax, %xmm0 -; X64-SSE2-NEXT: pxor %xmm1, %xmm1 -; X64-SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] -; X64-SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] +; X64-SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx +; X64-SSE2-NEXT: movd %ecx, %xmm0 +; X64-SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx +; X64-SSE2-NEXT: pinsrw $2, %ecx, %xmm0 +; X64-SSE2-NEXT: pinsrw $4, %eax, %xmm0 ; X64-SSE2-NEXT: cvtdq2ps %xmm0, %xmm0 ; X64-SSE2-NEXT: movlps %xmm0, (%rdi) ; X64-SSE2-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1]