Index: lib/CodeGen/SelectionDAG/DAGCombiner.cpp =================================================================== --- lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -14977,6 +14977,14 @@ } } + // Simplify source vector based on extraction index. + if (ConstEltNo) { + APInt DemandedElt = APInt::getOneBitSet(VT.getVectorNumElements(), + ConstEltNo->getZExtValue()); + if (SimplifyDemandedVectorElts(InVec, DemandedElt)) + return SDValue(N, 0); + } + bool BCNumEltsChanged = false; EVT ExtVT = VT.getVectorElementType(); EVT LVT = ExtVT; Index: test/CodeGen/AArch64/aarch64-be-bv.ll =================================================================== --- test/CodeGen/AArch64/aarch64-be-bv.ll +++ test/CodeGen/AArch64/aarch64-be-bv.ll @@ -5,7 +5,7 @@ ; CHECK-LABEL: movi_modimm_t1: define i16 @movi_modimm_t1() nounwind { ; CHECK: ld1 { v[[REG1:[0-9]+]].8h }, [x{{[0-9]+}}] - ; CHECK-NEXT: movi v[[REG2:[0-9]+]].4s, #1 + ; CHECK-NEXT: movi v[[REG2:[0-9]+]].8h, #1 ; CHECK-NEXT: add v[[REG1]].8h, v[[REG1]].8h, v[[REG2]].8h ; CHECK-NEXT: umov w{{[0-9]+}}, v[[REG1]].h[0] %in = load <8 x i16>, <8 x i16>* @vec_v8i16 @@ -17,7 +17,7 @@ ; CHECK-LABEL: movi_modimm_t2: define i16 @movi_modimm_t2() nounwind { ; CHECK: ld1 { v[[REG1:[0-9]+]].8h }, [x{{[0-9]+}}] - ; CHECK-NEXT: movi v[[REG2:[0-9]+]].4s, #1, lsl #8 + ; CHECK-NEXT: movi v[[REG2:[0-9]+]].8h, #1, lsl #8 ; CHECK-NEXT: add v[[REG1]].8h, v[[REG1]].8h, v[[REG2]].8h ; CHECK-NEXT: umov w{{[0-9]+}}, v[[REG1]].h[0] %in = load <8 x i16>, <8 x i16>* @vec_v8i16 @@ -29,8 +29,6 @@ ; CHECK-LABEL: movi_modimm_t3: define i16 @movi_modimm_t3() nounwind { ; CHECK: ld1 { v[[REG1:[0-9]+]].8h }, [x{{[0-9]+}}] - ; CHECK-NEXT: movi v[[REG2:[0-9]+]].4s, #1, lsl #16 - ; CHECK-NEXT: add v[[REG1]].8h, v[[REG1]].8h, v[[REG2]].8h ; CHECK-NEXT: umov w{{[0-9]+}}, v[[REG1]].h[0] %in = load <8 x i16>, <8 x i16>* @vec_v8i16 %rv = add <8 x i16> %in, @@ -41,8 +39,6 @@ ; CHECK-LABEL: movi_modimm_t4: define i16 @movi_modimm_t4() nounwind { ; CHECK: ld1 { v[[REG1:[0-9]+]].8h }, [x{{[0-9]+}}] - ; CHECK-NEXT: movi v[[REG2:[0-9]+]].4s, #1, lsl #24 - ; CHECK-NEXT: add v[[REG1]].8h, v[[REG1]].8h, v[[REG2]].8h ; CHECK-NEXT: umov w{{[0-9]+}}, v[[REG1]].h[0] %in = load <8 x i16>, <8 x i16>* @vec_v8i16 %rv = add <8 x i16> %in, @@ -77,7 +73,7 @@ ; CHECK-LABEL: movi_modimm_t7: define i16 @movi_modimm_t7() nounwind { ; CHECK: ld1 { v[[REG1:[0-9]+]].8h }, [x{{[0-9]+}}] - ; CHECK-NEXT: movi v[[REG2:[0-9]+]].4s, #1, msl #8 + ; CHECK-NEXT: mvni v[[REG2:[0-9]+]].8h, #254, lsl #8 ; CHECK-NEXT: add v[[REG1]].8h, v[[REG1]].8h, v[[REG2]].8h ; CHECK-NEXT: umov w{{[0-9]+}}, v[[REG1]].h[0] %in = load <8 x i16>, <8 x i16>* @vec_v8i16 @@ -89,7 +85,7 @@ ; CHECK-LABEL: movi_modimm_t8: define i16 @movi_modimm_t8() nounwind { ; CHECK: ld1 { v[[REG1:[0-9]+]].8h }, [x{{[0-9]+}}] - ; CHECK-NEXT: movi v[[REG2:[0-9]+]].4s, #1, msl #16 + ; CHECK-NEXT: movi v[[REG2:[0-9]+]].2d, #0xffffffffffffffff ; CHECK-NEXT: add v[[REG1]].8h, v[[REG1]].8h, v[[REG2]].8h ; CHECK-NEXT: umov w{{[0-9]+}}, v[[REG1]].h[0] %in = load <8 x i16>, <8 x i16>* @vec_v8i16 @@ -113,7 +109,7 @@ ; CHECK-LABEL: movi_modimm_t10: define i16 @movi_modimm_t10() nounwind { ; CHECK: ld1 { v[[REG1:[0-9]+]].8h }, [x{{[0-9]+}}] - ; CHECK-NEXT: movi v[[REG2:[0-9]+]].2d, #0x00ffff0000ffff + ; CHECK-NEXT: movi v[[REG2:[0-9]+]].2d, #0xffffffffffffffff ; CHECK-NEXT: add v[[REG1]].8h, v[[REG1]].8h, v[[REG2]].8h ; CHECK-NEXT: umov w{{[0-9]+}}, v[[REG1]].h[0] %in = load <8 x i16>, <8 x i16>* @vec_v8i16 @@ -125,8 +121,6 @@ ; CHECK-LABEL: fmov_modimm_t11: define i16 @fmov_modimm_t11() nounwind { ; CHECK: ld1 { v[[REG1:[0-9]+]].8h }, [x{{[0-9]+}}] - ; CHECK-NEXT: fmov v[[REG2:[0-9]+]].4s, #3.00000000 - ; CHECK-NEXT: add v[[REG1]].8h, v[[REG1]].8h, v[[REG2]].8h ; CHECK-NEXT: umov w{{[0-9]+}}, v[[REG1]].h[0] %in = load <8 x i16>, <8 x i16>* @vec_v8i16 %rv = add <8 x i16> %in, @@ -137,8 +131,6 @@ ; CHECK-LABEL: fmov_modimm_t12: define i16 @fmov_modimm_t12() nounwind { ; CHECK: ld1 { v[[REG1:[0-9]+]].8h }, [x{{[0-9]+}}] - ; CHECK-NEXT: fmov v[[REG2:[0-9]+]].2d, #0.17968750 - ; CHECK-NEXT: add v[[REG1]].8h, v[[REG1]].8h, v[[REG2]].8h ; CHECK-NEXT: umov w{{[0-9]+}}, v[[REG1]].h[0] %in = load <8 x i16>, <8 x i16>* @vec_v8i16 %rv = add <8 x i16> %in, @@ -149,7 +141,7 @@ ; CHECK-LABEL: mvni_modimm_t1: define i16 @mvni_modimm_t1() nounwind { ; CHECK: ld1 { v[[REG1:[0-9]+]].8h }, [x{{[0-9]+}}] - ; CHECK-NEXT: mvni v[[REG2:[0-9]+]].4s, #1 + ; CHECK-NEXT: mvni v[[REG2:[0-9]+]].8h, #1 ; CHECK-NEXT: add v[[REG1]].8h, v[[REG1]].8h, v[[REG2]].8h ; CHECK-NEXT: umov w{{[0-9]+}}, v[[REG1]].h[0] %in = load <8 x i16>, <8 x i16>* @vec_v8i16 @@ -161,7 +153,7 @@ ; CHECK-LABEL: mvni_modimm_t2: define i16 @mvni_modimm_t2() nounwind { ; CHECK: ld1 { v[[REG1:[0-9]+]].8h }, [x{{[0-9]+}}] - ; CHECK-NEXT: mvni v[[REG2:[0-9]+]].4s, #1, lsl #8 + ; CHECK-NEXT: mvni v[[REG2:[0-9]+]].8h, #1, lsl #8 ; CHECK-NEXT: add v[[REG1]].8h, v[[REG1]].8h, v[[REG2]].8h ; CHECK-NEXT: umov w{{[0-9]+}}, v[[REG1]].h[0] %in = load <8 x i16>, <8 x i16>* @vec_v8i16 @@ -173,7 +165,7 @@ ; CHECK-LABEL: mvni_modimm_t3: define i16 @mvni_modimm_t3() nounwind { ; CHECK: ld1 { v[[REG1:[0-9]+]].8h }, [x{{[0-9]+}}] - ; CHECK-NEXT: mvni v[[REG2:[0-9]+]].4s, #1, lsl #16 + ; CHECK-NEXT: movi v[[REG2:[0-9]+]].2d, #0xffffffffffffffff ; CHECK-NEXT: add v[[REG1]].8h, v[[REG1]].8h, v[[REG2]].8h ; CHECK-NEXT: umov w{{[0-9]+}}, v[[REG1]].h[0] %in = load <8 x i16>, <8 x i16>* @vec_v8i16 @@ -185,7 +177,7 @@ ; CHECK-LABEL: mvni_modimm_t4: define i16 @mvni_modimm_t4() nounwind { ; CHECK: ld1 { v[[REG1:[0-9]+]].8h }, [x{{[0-9]+}}] - ; CHECK-NEXT: mvni v[[REG2:[0-9]+]].4s, #1, lsl #24 + ; CHECK-NEXT: movi v[[REG2:[0-9]+]].2d, #0xffffffffffffffff ; CHECK-NEXT: add v[[REG1]].8h, v[[REG1]].8h, v[[REG2]].8h ; CHECK-NEXT: umov w{{[0-9]+}}, v[[REG1]].h[0] %in = load <8 x i16>, <8 x i16>* @vec_v8i16 @@ -221,7 +213,7 @@ ; CHECK-LABEL: mvni_modimm_t7: define i16 @mvni_modimm_t7() nounwind { ; CHECK: ld1 { v[[REG1:[0-9]+]].8h }, [x{{[0-9]+}}] - ; CHECK-NEXT: mvni v[[REG2:[0-9]+]].4s, #1, msl #8 + ; CHECK-NEXT: movi v[[REG2:[0-9]+]].8h, #254, lsl #8 ; CHECK-NEXT: add v[[REG1]].8h, v[[REG1]].8h, v[[REG2]].8h ; CHECK-NEXT: umov w{{[0-9]+}}, v[[REG1]].h[0] %in = load <8 x i16>, <8 x i16>* @vec_v8i16 @@ -233,8 +225,6 @@ ; CHECK-LABEL: mvni_modimm_t8: define i16 @mvni_modimm_t8() nounwind { ; CHECK: ld1 { v[[REG1:[0-9]+]].8h }, [x{{[0-9]+}}] - ; CHECK-NEXT: mvni v[[REG2:[0-9]+]].4s, #1, msl #16 - ; CHECK-NEXT: add v[[REG1]].8h, v[[REG1]].8h, v[[REG2]].8h ; CHECK-NEXT: umov w{{[0-9]+}}, v[[REG1]].h[0] %in = load <8 x i16>, <8 x i16>* @vec_v8i16 %rv = add <8 x i16> %in, Index: test/CodeGen/AMDGPU/indirect-addressing-si.ll =================================================================== --- test/CodeGen/AMDGPU/indirect-addressing-si.ll +++ test/CodeGen/AMDGPU/indirect-addressing-si.ll @@ -480,38 +480,28 @@ ; GCN-LABEL: {{^}}multi_same_block: -; GCN-DAG: v_mov_b32_e32 v[[VEC0_ELT0:[0-9]+]], 0x41880000 -; GCN-DAG: v_mov_b32_e32 v{{[0-9]+}}, 0x41900000 -; GCN-DAG: v_mov_b32_e32 v[[VEC0_ELT2:[0-9]+]], 0x41980000 -; GCN-DAG: v_mov_b32_e32 v{{[0-9]+}}, 0x41a00000 -; GCN-DAG: v_mov_b32_e32 v{{[0-9]+}}, 0x41a80000 -; GCN-DAG: v_mov_b32_e32 v{{[0-9]+}}, 0x41b00000 -; GCN-DAG: s_load_dword [[ARG:s[0-9]+]] -; IDXMODE-DAG: s_add_i32 [[ARG_ADD:s[0-9]+]], [[ARG]], -16 - -; MOVREL-DAG: s_add_i32 m0, [[ARG]], -16 -; MOVREL: v_movreld_b32_e32 v[[VEC0_ELT0]], 4.0 -; GCN-NOT: m0 - -; IDXMODE: s_set_gpr_idx_on [[ARG_ADD]], dst -; IDXMODE: v_mov_b32_e32 v[[VEC0_ELT0]], 4.0 -; IDXMODE: s_set_gpr_idx_off - -; GCN: v_mov_b32_e32 v[[VEC0_ELT2]], 0x4188cccd -; GCN-DAG: v_mov_b32_e32 v{{[0-9]+}}, 0x4190cccd -; GCN-DAG: v_mov_b32_e32 v{{[0-9]+}}, 0x4198cccd -; GCN-DAG: v_mov_b32_e32 v{{[0-9]+}}, 0x41a0cccd -; GCN-DAG: v_mov_b32_e32 v{{[0-9]+}}, 0x41a8cccd -; GCN-DAG: v_mov_b32_e32 v{{[0-9]+}}, 0x41b0cccd +; GCN: s_load_dword [[ARG:s[0-9]+]] -; MOVREL: v_movreld_b32_e32 v[[VEC0_ELT2]], -4.0 - -; IDXMODE: s_set_gpr_idx_on [[ARG_ADD]], dst -; IDXMODE: v_mov_b32_e32 v[[VEC0_ELT2]], -4.0 +; MOVREL: v_mov_b32_e32 v{{[0-9]+}}, 0x41900000 +; MOVREL: v_mov_b32_e32 v{{[0-9]+}}, 0x41b0cccd +; MOVREL: s_waitcnt +; MOVREL: s_add_i32 m0, [[ARG]], -16 +; MOVREL: v_movreld_b32_e32 v{{[0-9]+}}, 4.0 +; MOVREL: v_movreld_b32_e32 v{{[0-9]+}}, -4.0 +; MOVREL: s_mov_b32 m0, -1 + + +; IDXMODE: v_mov_b32_e32 v{{[0-9]+}}, 0x41900000 +; IDXMODE: s_waitcnt +; IDXMODE: s_add_i32 [[ARG]], [[ARG]], -16 +; IDXMODE: s_set_gpr_idx_on [[ARG]], dst +; IDXMODE: v_mov_b32_e32 v{{[0-9]+}}, 4.0 +; IDXMODE: s_set_gpr_idx_off +; IDXMODE: v_mov_b32_e32 v{{[0-9]+}}, 0x41b0cccd +; IDXMODE: s_set_gpr_idx_on [[ARG]], dst +; IDXMODE: v_mov_b32_e32 v{{[0-9]+}}, -4.0 ; IDXMODE: s_set_gpr_idx_off -; PREGFX9: s_mov_b32 m0, -1 -; GFX9-NOT: s_mov_b32 m0 ; GCN: ds_write_b32 ; GCN: ds_write_b32 ; GCN: s_endpgm Index: test/CodeGen/ARM/func-argpassing-endian.ll =================================================================== --- test/CodeGen/ARM/func-argpassing-endian.ll +++ test/CodeGen/ARM/func-argpassing-endian.ll @@ -1,32 +1,57 @@ -; RUN: llc -verify-machineinstrs < %s -mtriple=arm-eabi -mattr=v7,neon | FileCheck --check-prefix=CHECK --check-prefix=CHECK-LE %s -; RUN: llc -verify-machineinstrs < %s -mtriple=armeb-eabi -mattr=v7,neon | FileCheck --check-prefix=CHECK --check-prefix=CHECK-BE %s +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -verify-machineinstrs < %s -mtriple=arm-eabi -mattr=v7,neon | FileCheck %s --check-prefixes=CHECK,CHECK-LE +; RUN: llc -verify-machineinstrs < %s -mtriple=armeb-eabi -mattr=v7,neon | FileCheck %s --check-prefixes=CHECK,CHECK-BE @var32 = global i32 0 @vardouble = global double 0.0 define void @arg_longint( i64 %val ) { -; CHECK-LABEL: arg_longint: -; CHECK-LE: str r0, [r1] -; CHECK-BE: str r1, [r0] - %tmp = trunc i64 %val to i32 +; CHECK-LE-LABEL: arg_longint: +; CHECK-LE: @ %bb.0: +; CHECK-LE-NEXT: movw r1, :lower16:var32 +; CHECK-LE-NEXT: movt r1, :upper16:var32 +; CHECK-LE-NEXT: str r0, [r1] +; CHECK-LE-NEXT: bx lr +; +; CHECK-BE-LABEL: arg_longint: +; CHECK-BE: @ %bb.0: +; CHECK-BE-NEXT: movw r0, :lower16:var32 +; CHECK-BE-NEXT: movt r0, :upper16:var32 +; CHECK-BE-NEXT: str r1, [r0] +; CHECK-BE-NEXT: bx lr + %tmp = trunc i64 %val to i32 store i32 %tmp, i32* @var32 ret void } define void @arg_double( double %val ) { ; CHECK-LABEL: arg_double: -; CHECK: strd r0, r1, [r2] +; CHECK: @ %bb.0: +; CHECK-NEXT: movw r2, :lower16:vardouble +; CHECK-NEXT: movt r2, :upper16:vardouble +; CHECK-NEXT: strd r0, r1, [r2] +; CHECK-NEXT: bx lr store double %val, double* @vardouble ret void } define void @arg_v4i32(<4 x i32> %vec ) { -; CHECK-LABEL: arg_v4i32: -; CHECK-LE: vmov {{d[0-9]+}}, r2, r3 -; CHECK-LE: vmov [[ARG_V4I32_REG:d[0-9]+]], r0, r1 -; CHECK-BE: vmov {{d[0-9]+}}, r3, r2 -; CHECK-BE: vmov [[ARG_V4I32_REG:d[0-9]+]], r1, r0 -; CHECK: vst1.32 {[[ARG_V4I32_REG]][0]}, [r0:32] +; CHECK-LE-LABEL: arg_v4i32: +; CHECK-LE: @ %bb.0: +; CHECK-LE-NEXT: vmov d16, r0, r1 +; CHECK-LE-NEXT: movw r0, :lower16:var32 +; CHECK-LE-NEXT: movt r0, :upper16:var32 +; CHECK-LE-NEXT: vst1.32 {d16[0]}, [r0:32] +; CHECK-LE-NEXT: bx lr +; +; CHECK-BE-LABEL: arg_v4i32: +; CHECK-BE: @ %bb.0: +; CHECK-BE-NEXT: vmov d16, r1, r0 +; CHECK-BE-NEXT: movw r0, :lower16:var32 +; CHECK-BE-NEXT: movt r0, :upper16:var32 +; CHECK-BE-NEXT: vrev64.32 q8, q8 +; CHECK-BE-NEXT: vst1.32 {d16[0]}, [r0:32] +; CHECK-BE-NEXT: bx lr %tmp = extractelement <4 x i32> %vec, i32 0 store i32 %tmp, i32* @var32 ret void @@ -34,78 +59,220 @@ define void @arg_v2f64(<2 x double> %vec ) { ; CHECK-LABEL: arg_v2f64: -; CHECK: strd r0, r1, [r2] +; CHECK: @ %bb.0: +; CHECK-NEXT: movw r2, :lower16:vardouble +; CHECK-NEXT: movt r2, :upper16:vardouble +; CHECK-NEXT: strd r0, r1, [r2] +; CHECK-NEXT: bx lr %tmp = extractelement <2 x double> %vec, i32 0 store double %tmp, double* @vardouble ret void } define i64 @return_longint() { -; CHECK-LABEL: return_longint: -; CHECK-LE: mov r0, #42 -; CHECK-LE: mov r1, #0 -; CHECK-BE: mov r0, #0 -; CHECK-BE: mov r1, #42 +; CHECK-LE-LABEL: return_longint: +; CHECK-LE: @ %bb.0: +; CHECK-LE-NEXT: mov r0, #42 +; CHECK-LE-NEXT: mov r1, #0 +; CHECK-LE-NEXT: bx lr +; +; CHECK-BE-LABEL: return_longint: +; CHECK-BE: @ %bb.0: +; CHECK-BE-NEXT: mov r0, #0 +; CHECK-BE-NEXT: mov r1, #42 +; CHECK-BE-NEXT: bx lr ret i64 42 } define double @return_double() { -; CHECK-LABEL: return_double: -; CHECK-LE: vmov r0, r1, {{d[0-9]+}} -; CHECK-BE: vmov r1, r0, {{d[0-9]+}} +; CHECK-LE-LABEL: return_double: +; CHECK-LE: @ %bb.0: +; CHECK-LE-NEXT: vmov.f64 d16, #1.000000e+00 +; CHECK-LE-NEXT: vmov r0, r1, d16 +; CHECK-LE-NEXT: bx lr +; +; CHECK-BE-LABEL: return_double: +; CHECK-BE: @ %bb.0: +; CHECK-BE-NEXT: vmov.f64 d16, #1.000000e+00 +; CHECK-BE-NEXT: vmov r1, r0, d16 +; CHECK-BE-NEXT: bx lr ret double 1.0 } define <4 x i32> @return_v4i32() { -; CHECK-LABEL: return_v4i32: -; CHECK-LE: vmov r0, r1, {{d[0-9]+}} -; CHECK-LE: vmov r2, r3, {{d[0-9]+}} -; CHECK-BE: vmov r1, r0, {{d[0-9]+}} -; CHECK-BE: vmov r3, r2, {{d[0-9]+}} +; CHECK-LE-LABEL: return_v4i32: +; CHECK-LE: @ %bb.0: +; CHECK-LE-NEXT: adr r0, .LCPI6_0 +; CHECK-LE-NEXT: vld1.64 {d16, d17}, [r0:128] +; CHECK-LE-NEXT: vmov r0, r1, d16 +; CHECK-LE-NEXT: vmov r2, r3, d17 +; CHECK-LE-NEXT: bx lr +; CHECK-LE-NEXT: .p2align 4 +; CHECK-LE-NEXT: @ %bb.1: +; CHECK-LE-NEXT: .LCPI6_0: +; CHECK-LE-NEXT: .long 42 @ double 9.1245819032257467E-313 +; CHECK-LE-NEXT: .long 43 +; CHECK-LE-NEXT: .long 44 @ double 9.5489810615176143E-313 +; CHECK-LE-NEXT: .long 45 +; +; CHECK-BE-LABEL: return_v4i32: +; CHECK-BE: @ %bb.0: +; CHECK-BE-NEXT: adr r0, .LCPI6_0 +; CHECK-BE-NEXT: vld1.64 {d16, d17}, [r0:128] +; CHECK-BE-NEXT: vmov r1, r0, d16 +; CHECK-BE-NEXT: vmov r3, r2, d17 +; CHECK-BE-NEXT: bx lr +; CHECK-BE-NEXT: .p2align 4 +; CHECK-BE-NEXT: @ %bb.1: +; CHECK-BE-NEXT: .LCPI6_0: +; CHECK-BE-NEXT: .long 42 @ double 8.912382324178626E-313 +; CHECK-BE-NEXT: .long 43 +; CHECK-BE-NEXT: .long 44 @ double 9.3367814824704935E-313 +; CHECK-BE-NEXT: .long 45 ret < 4 x i32> < i32 42, i32 43, i32 44, i32 45 > } define <2 x double> @return_v2f64() { -; CHECK-LABEL: return_v2f64: -; CHECK-LE: vmov r0, r1, {{d[0-9]+}} -; CHECK-LE: vmov r2, r3, {{d[0-9]+}} -; CHECK-BE: vmov r1, r0, {{d[0-9]+}} -; CHECK-BE: vmov r3, r2, {{d[0-9]+}} +; CHECK-LE-LABEL: return_v2f64: +; CHECK-LE: @ %bb.0: +; CHECK-LE-NEXT: vldr d16, .LCPI7_0 +; CHECK-LE-NEXT: vldr d17, .LCPI7_1 +; CHECK-LE-NEXT: vmov r0, r1, d16 +; CHECK-LE-NEXT: vmov r2, r3, d17 +; CHECK-LE-NEXT: bx lr +; CHECK-LE-NEXT: .p2align 3 +; CHECK-LE-NEXT: @ %bb.1: +; CHECK-LE-NEXT: .LCPI7_0: +; CHECK-LE-NEXT: .long 1374389535 @ double 3.1400000000000001 +; CHECK-LE-NEXT: .long 1074339512 +; CHECK-LE-NEXT: .LCPI7_1: +; CHECK-LE-NEXT: .long 1374389535 @ double 6.2800000000000002 +; CHECK-LE-NEXT: .long 1075388088 +; +; CHECK-BE-LABEL: return_v2f64: +; CHECK-BE: @ %bb.0: +; CHECK-BE-NEXT: vldr d16, .LCPI7_0 +; CHECK-BE-NEXT: vldr d17, .LCPI7_1 +; CHECK-BE-NEXT: vmov r1, r0, d16 +; CHECK-BE-NEXT: vmov r3, r2, d17 +; CHECK-BE-NEXT: bx lr +; CHECK-BE-NEXT: .p2align 3 +; CHECK-BE-NEXT: @ %bb.1: +; CHECK-BE-NEXT: .LCPI7_0: +; CHECK-BE-NEXT: .long 1074339512 @ double 3.1400000000000001 +; CHECK-BE-NEXT: .long 1374389535 +; CHECK-BE-NEXT: .LCPI7_1: +; CHECK-BE-NEXT: .long 1075388088 @ double 6.2800000000000002 +; CHECK-BE-NEXT: .long 1374389535 ret <2 x double> < double 3.14, double 6.28 > } define void @caller_arg_longint() { -; CHECK-LABEL: caller_arg_longint: -; CHECK-LE: mov r0, #42 -; CHECK-LE: mov r1, #0 -; CHECK-BE: mov r0, #0 -; CHECK-BE: mov r1, #42 +; CHECK-LE-LABEL: caller_arg_longint: +; CHECK-LE: @ %bb.0: +; CHECK-LE-NEXT: .save {r11, lr} +; CHECK-LE-NEXT: push {r11, lr} +; CHECK-LE-NEXT: mov r0, #42 +; CHECK-LE-NEXT: mov r1, #0 +; CHECK-LE-NEXT: bl arg_longint +; CHECK-LE-NEXT: pop {r11, pc} +; +; CHECK-BE-LABEL: caller_arg_longint: +; CHECK-BE: @ %bb.0: +; CHECK-BE-NEXT: .save {r11, lr} +; CHECK-BE-NEXT: push {r11, lr} +; CHECK-BE-NEXT: mov r0, #0 +; CHECK-BE-NEXT: mov r1, #42 +; CHECK-BE-NEXT: bl arg_longint +; CHECK-BE-NEXT: pop {r11, pc} call void @arg_longint( i64 42 ) ret void } define void @caller_arg_double() { -; CHECK-LABEL: caller_arg_double: -; CHECK-LE: vmov r0, r1, {{d[0-9]+}} -; CHECK-BE: vmov r1, r0, {{d[0-9]+}} +; CHECK-LE-LABEL: caller_arg_double: +; CHECK-LE: @ %bb.0: +; CHECK-LE-NEXT: .save {r11, lr} +; CHECK-LE-NEXT: push {r11, lr} +; CHECK-LE-NEXT: vmov.f64 d16, #1.000000e+00 +; CHECK-LE-NEXT: vmov r0, r1, d16 +; CHECK-LE-NEXT: bl arg_double +; CHECK-LE-NEXT: pop {r11, pc} +; +; CHECK-BE-LABEL: caller_arg_double: +; CHECK-BE: @ %bb.0: +; CHECK-BE-NEXT: .save {r11, lr} +; CHECK-BE-NEXT: push {r11, lr} +; CHECK-BE-NEXT: vmov.f64 d16, #1.000000e+00 +; CHECK-BE-NEXT: vmov r1, r0, d16 +; CHECK-BE-NEXT: bl arg_double +; CHECK-BE-NEXT: pop {r11, pc} call void @arg_double( double 1.0 ) ret void } define void @caller_return_longint() { -; CHECK-LABEL: caller_return_longint: -; CHECK-LE: str r0, [r1] -; CHECK-BE: str r1, [r0] +; CHECK-LE-LABEL: caller_return_longint: +; CHECK-LE: @ %bb.0: +; CHECK-LE-NEXT: .save {r11, lr} +; CHECK-LE-NEXT: push {r11, lr} +; CHECK-LE-NEXT: bl return_longint +; CHECK-LE-NEXT: movw r1, :lower16:var32 +; CHECK-LE-NEXT: movt r1, :upper16:var32 +; CHECK-LE-NEXT: str r0, [r1] +; CHECK-LE-NEXT: pop {r11, pc} +; +; CHECK-BE-LABEL: caller_return_longint: +; CHECK-BE: @ %bb.0: +; CHECK-BE-NEXT: .save {r11, lr} +; CHECK-BE-NEXT: push {r11, lr} +; CHECK-BE-NEXT: bl return_longint +; CHECK-BE-NEXT: movw r0, :lower16:var32 +; CHECK-BE-NEXT: movt r0, :upper16:var32 +; CHECK-BE-NEXT: str r1, [r0] +; CHECK-BE-NEXT: pop {r11, pc} %val = call i64 @return_longint() - %tmp = trunc i64 %val to i32 + %tmp = trunc i64 %val to i32 store i32 %tmp, i32* @var32 ret void } define void @caller_return_double() { -; CHECK-LABEL: caller_return_double: -; CHECK-LE: vmov {{d[0-9]+}}, r0, r1 -; CHECK-BE: vmov {{d[0-9]+}}, r1, r0 +; CHECK-LE-LABEL: caller_return_double: +; CHECK-LE: @ %bb.0: +; CHECK-LE-NEXT: .save {r11, lr} +; CHECK-LE-NEXT: push {r11, lr} +; CHECK-LE-NEXT: bl return_double +; CHECK-LE-NEXT: vmov d17, r0, r1 +; CHECK-LE-NEXT: vldr d16, .LCPI11_0 +; CHECK-LE-NEXT: movw r0, :lower16:vardouble +; CHECK-LE-NEXT: vadd.f64 d16, d17, d16 +; CHECK-LE-NEXT: movt r0, :upper16:vardouble +; CHECK-LE-NEXT: vstr d16, [r0] +; CHECK-LE-NEXT: pop {r11, pc} +; CHECK-LE-NEXT: .p2align 3 +; CHECK-LE-NEXT: @ %bb.1: +; CHECK-LE-NEXT: .LCPI11_0: +; CHECK-LE-NEXT: .long 1374389535 @ double 3.1400000000000001 +; CHECK-LE-NEXT: .long 1074339512 +; +; CHECK-BE-LABEL: caller_return_double: +; CHECK-BE: @ %bb.0: +; CHECK-BE-NEXT: .save {r11, lr} +; CHECK-BE-NEXT: push {r11, lr} +; CHECK-BE-NEXT: bl return_double +; CHECK-BE-NEXT: vmov d17, r1, r0 +; CHECK-BE-NEXT: vldr d16, .LCPI11_0 +; CHECK-BE-NEXT: movw r0, :lower16:vardouble +; CHECK-BE-NEXT: vadd.f64 d16, d17, d16 +; CHECK-BE-NEXT: movt r0, :upper16:vardouble +; CHECK-BE-NEXT: vstr d16, [r0] +; CHECK-BE-NEXT: pop {r11, pc} +; CHECK-BE-NEXT: .p2align 3 +; CHECK-BE-NEXT: @ %bb.1: +; CHECK-BE-NEXT: .LCPI11_0: +; CHECK-BE-NEXT: .long 1074339512 @ double 3.1400000000000001 +; CHECK-BE-NEXT: .long 1374389535 %val = call double @return_double( ) %tmp = fadd double %val, 3.14 store double %tmp, double* @vardouble @@ -114,7 +281,14 @@ define void @caller_return_v2f64() { ; CHECK-LABEL: caller_return_v2f64: -; CHECK: strd r0, r1, [r2] +; CHECK: @ %bb.0: +; CHECK-NEXT: .save {r11, lr} +; CHECK-NEXT: push {r11, lr} +; CHECK-NEXT: bl return_v2f64 +; CHECK-NEXT: movw r2, :lower16:vardouble +; CHECK-NEXT: movt r2, :upper16:vardouble +; CHECK-NEXT: strd r0, r1, [r2] +; CHECK-NEXT: pop {r11, pc} %val = call <2 x double> @return_v2f64( ) %tmp = extractelement <2 x double> %val, i32 0 store double %tmp, double* @vardouble Index: test/CodeGen/SystemZ/vec-combine-01.ll =================================================================== --- test/CodeGen/SystemZ/vec-combine-01.ll +++ test/CodeGen/SystemZ/vec-combine-01.ll @@ -43,12 +43,14 @@ ; ...and again in a case where there's also a splat and a bitcast. define i16 @f3(<4 x i32> %v1, <4 x i32> %v2, <2 x i64> %v3) { ; CHECK-LABEL: f3: -; CHECK-NOT: vrepg -; CHECK-NOT: vpk -; CHECK-DAG: vaf [[REG:%v[0-9]+]], %v24, %v26 -; CHECK-DAG: vlgvh {{%r[0-5]}}, [[REG]], 6 -; CHECK-DAG: vlgvh {{%r[0-5]}}, %v28, 3 -; CHECK: br %r14 +; CHECK: # %bb.0: +; CHECK-NEXT: vaf %v0, %v24, %v26 +; CHECK-NEXT: vrepf %v0, %v0, 3 +; CHECK-NEXT: vlgvh %r0, %v0, 2 +; CHECK-NEXT: vlgvh %r2, %v28, 3 +; CHECK-NEXT: ar %r2, %r0 +; CHECK-NEXT: # kill: def $r2l killed $r2l killed $r2d +; CHECK-NEXT: br %r14 %add = add <4 x i32> %v1, %v2 %splat = shufflevector <2 x i64> %v3, <2 x i64> undef, <2 x i32> @@ -87,12 +89,14 @@ ; ...and again with a merge high. define i16 @f5(<4 x i32> %v1, <4 x i32> %v2, <2 x i64> %v3) { ; CHECK-LABEL: f5: -; CHECK-NOT: vrepg -; CHECK-NOT: vmr -; CHECK-DAG: vaf [[REG:%v[0-9]+]], %v24, %v26 -; CHECK-DAG: vlgvh {{%r[0-5]}}, [[REG]], 2 -; CHECK-DAG: vlgvh {{%r[0-5]}}, %v28, 3 -; CHECK: br %r14 +; CHECK: # %bb.0: +; CHECK-NEXT: vaf %v0, %v24, %v26 +; CHECK-NEXT: vrepf %v0, %v0, 1 +; CHECK-NEXT: vlgvh %r0, %v0, 4 +; CHECK-NEXT: vlgvh %r2, %v28, 3 +; CHECK-NEXT: ar %r2, %r0 +; CHECK-NEXT: # kill: def $r2l killed $r2l killed $r2d +; CHECK-NEXT: br %r14 %add = add <4 x i32> %v1, %v2 %splat = shufflevector <2 x i64> %v3, <2 x i64> undef, <2 x i32> Index: test/CodeGen/X86/dagcombine-cse.ll =================================================================== --- test/CodeGen/X86/dagcombine-cse.ll +++ test/CodeGen/X86/dagcombine-cse.ll @@ -31,7 +31,6 @@ ; X64-NEXT: shlq $32, %rcx ; X64-NEXT: orq %rax, %rcx ; X64-NEXT: movq %rcx, %xmm0 -; X64-NEXT: movq {{.*#+}} xmm0 = xmm0[0],zero ; X64-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,1,1,2,4,5,6,7] ; X64-NEXT: movd %xmm0, %eax ; X64-NEXT: retq Index: test/CodeGen/X86/extractelement-load.ll =================================================================== --- test/CodeGen/X86/extractelement-load.ll +++ test/CodeGen/X86/extractelement-load.ll @@ -85,8 +85,7 @@ ; X32-SSE2-LABEL: t4: ; X32-SSE2: # %bb.0: ; X32-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-SSE2-NEXT: movapd (%eax), %xmm0 -; X32-SSE2-NEXT: shufpd {{.*#+}} xmm0 = xmm0[1,0] +; X32-SSE2-NEXT: movhps {{.*#+}} xmm0 = xmm0[0,1],mem[0,1] ; X32-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1] ; X32-SSE2-NEXT: movd %xmm1, %eax ; X32-SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,1,2,3] Index: test/CodeGen/X86/known-bits-vector.ll =================================================================== --- test/CodeGen/X86/known-bits-vector.ll +++ test/CodeGen/X86/known-bits-vector.ll @@ -24,10 +24,9 @@ ; X32-LABEL: knownbits_mask_extract_uitofp: ; X32: # %bb.0: ; X32-NEXT: pushl %eax -; X32-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; X32-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3],xmm0[4,5,6,7] +; X32-NEXT: vpmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero ; X32-NEXT: vmovd %xmm0, %eax -; X32-NEXT: vcvtsi2ssl %eax, %xmm2, %xmm0 +; X32-NEXT: vcvtsi2ssl %eax, %xmm1, %xmm0 ; X32-NEXT: vmovss %xmm0, (%esp) ; X32-NEXT: flds (%esp) ; X32-NEXT: popl %eax @@ -35,10 +34,9 @@ ; ; X64-LABEL: knownbits_mask_extract_uitofp: ; X64: # %bb.0: -; X64-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; X64-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3],xmm0[4,5,6,7] +; X64-NEXT: vpmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero ; X64-NEXT: vmovq %xmm0, %rax -; X64-NEXT: vcvtsi2ssl %eax, %xmm2, %xmm0 +; X64-NEXT: vcvtsi2ssl %eax, %xmm1, %xmm0 ; X64-NEXT: retq %1 = and <2 x i64> %a0, %2 = extractelement <2 x i64> %1, i32 0 Index: test/CodeGen/X86/oddshuffles.ll =================================================================== --- test/CodeGen/X86/oddshuffles.ll +++ test/CodeGen/X86/oddshuffles.ll @@ -68,41 +68,29 @@ define void @v3i32(<2 x i32> %a, <2 x i32> %b, <3 x i32>* %p) nounwind { ; SSE2-LABEL: v3i32: ; SSE2: # %bb.0: -; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm0[0,2,2,3] -; SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1] -; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1] -; SSE2-NEXT: movd %xmm0, 8(%rdi) -; SSE2-NEXT: movq %xmm2, (%rdi) +; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm0[2,3,0,1] +; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] +; SSE2-NEXT: movd %xmm2, 8(%rdi) +; SSE2-NEXT: movq %xmm0, (%rdi) ; SSE2-NEXT: retq ; ; SSE42-LABEL: v3i32: ; SSE42: # %bb.0: -; SSE42-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,0,1,1] -; SSE42-NEXT: pblendw {{.*#+}} xmm1 = xmm0[0,1],xmm1[2,3],xmm0[4,5,6,7] -; SSE42-NEXT: pextrd $2, %xmm0, 8(%rdi) -; SSE42-NEXT: movq %xmm1, (%rdi) +; SSE42-NEXT: extractps $2, %xmm0, 8(%rdi) +; SSE42-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] +; SSE42-NEXT: movlps %xmm0, (%rdi) ; SSE42-NEXT: retq ; -; AVX1-LABEL: v3i32: -; AVX1: # %bb.0: -; AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm1[0,0,1,1] -; AVX1-NEXT: vblendps {{.*#+}} xmm1 = xmm0[0],xmm1[1],xmm0[2,3] -; AVX1-NEXT: vextractps $2, %xmm0, 8(%rdi) -; AVX1-NEXT: vmovlps %xmm1, (%rdi) -; AVX1-NEXT: retq -; -; AVX2-LABEL: v3i32: -; AVX2: # %bb.0: -; AVX2-NEXT: vbroadcastss %xmm1, %xmm1 -; AVX2-NEXT: vblendps {{.*#+}} xmm1 = xmm0[0],xmm1[1],xmm0[2,3] -; AVX2-NEXT: vextractps $2, %xmm0, 8(%rdi) -; AVX2-NEXT: vmovlps %xmm1, (%rdi) -; AVX2-NEXT: retq +; AVX-LABEL: v3i32: +; AVX: # %bb.0: +; AVX-NEXT: vunpcklps {{.*#+}} xmm1 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] +; AVX-NEXT: vextractps $2, %xmm0, 8(%rdi) +; AVX-NEXT: vmovlps %xmm1, (%rdi) +; AVX-NEXT: retq ; ; XOP-LABEL: v3i32: ; XOP: # %bb.0: -; XOP-NEXT: vpermilps {{.*#+}} xmm1 = xmm1[0,0,1,1] -; XOP-NEXT: vblendps {{.*#+}} xmm1 = xmm0[0],xmm1[1],xmm0[2,3] +; XOP-NEXT: vunpcklps {{.*#+}} xmm1 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] ; XOP-NEXT: vextractps $2, %xmm0, 8(%rdi) ; XOP-NEXT: vmovlps %xmm1, (%rdi) ; XOP-NEXT: retq @@ -114,10 +102,9 @@ define void @v5i16(<4 x i16> %a, <4 x i16> %b, <5 x i16>* %p) nounwind { ; SSE2-LABEL: v5i16: ; SSE2: # %bb.0: +; SSE2-NEXT: pshuflw {{.*#+}} xmm2 = xmm0[0,2,2,3,4,5,6,7] ; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,1,2,3] ; SSE2-NEXT: pshuflw {{.*#+}} xmm1 = xmm1[2,0,2,3,4,5,6,7] -; SSE2-NEXT: pshuflw {{.*#+}} xmm2 = xmm0[0,2,2,3,4,5,6,7] -; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[0,3,2,3] ; SSE2-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3] ; SSE2-NEXT: pextrw $6, %xmm0, %eax ; SSE2-NEXT: movw %ax, 8(%rdi) @@ -126,10 +113,9 @@ ; ; SSE42-LABEL: v5i16: ; SSE42: # %bb.0: +; SSE42-NEXT: pshuflw {{.*#+}} xmm2 = xmm0[0,2,2,3,4,5,6,7] ; SSE42-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,1,2,3] ; SSE42-NEXT: pshuflw {{.*#+}} xmm1 = xmm1[2,0,2,3,4,5,6,7] -; SSE42-NEXT: pshuflw {{.*#+}} xmm2 = xmm0[0,2,2,3,4,5,6,7] -; SSE42-NEXT: pshufd {{.*#+}} xmm2 = xmm2[0,3,2,3] ; SSE42-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3] ; SSE42-NEXT: pextrw $6, %xmm0, 8(%rdi) ; SSE42-NEXT: movq %xmm2, (%rdi) @@ -137,10 +123,9 @@ ; ; AVX1-LABEL: v5i16: ; AVX1: # %bb.0: +; AVX1-NEXT: vpshuflw {{.*#+}} xmm2 = xmm0[0,2,2,3,4,5,6,7] ; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[2,1,2,3] ; AVX1-NEXT: vpshuflw {{.*#+}} xmm1 = xmm1[2,0,2,3,4,5,6,7] -; AVX1-NEXT: vpshuflw {{.*#+}} xmm2 = xmm0[0,2,2,3,4,5,6,7] -; AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[0,3,2,3] ; AVX1-NEXT: vpunpcklwd {{.*#+}} xmm1 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3] ; AVX1-NEXT: vpextrw $6, %xmm0, 8(%rdi) ; AVX1-NEXT: vmovq %xmm1, (%rdi) @@ -148,10 +133,9 @@ ; ; AVX2-SLOW-LABEL: v5i16: ; AVX2-SLOW: # %bb.0: +; AVX2-SLOW-NEXT: vpshuflw {{.*#+}} xmm2 = xmm0[0,2,2,3,4,5,6,7] ; AVX2-SLOW-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[2,1,2,3] ; AVX2-SLOW-NEXT: vpshuflw {{.*#+}} xmm1 = xmm1[2,0,2,3,4,5,6,7] -; AVX2-SLOW-NEXT: vpshuflw {{.*#+}} xmm2 = xmm0[0,2,2,3,4,5,6,7] -; AVX2-SLOW-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[0,3,2,3] ; AVX2-SLOW-NEXT: vpunpcklwd {{.*#+}} xmm1 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3] ; AVX2-SLOW-NEXT: vpextrw $6, %xmm0, 8(%rdi) ; AVX2-SLOW-NEXT: vmovq %xmm1, (%rdi) @@ -160,7 +144,7 @@ ; AVX2-FAST-LABEL: v5i16: ; AVX2-FAST: # %bb.0: ; AVX2-FAST-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[4,5,8,9,4,5,6,7,8,9,10,11,12,13,14,15] -; AVX2-FAST-NEXT: vpshufb {{.*#+}} xmm2 = xmm0[0,1,4,5,12,13,14,15,8,9,10,11,12,13,14,15] +; AVX2-FAST-NEXT: vpshuflw {{.*#+}} xmm2 = xmm0[0,2,2,3,4,5,6,7] ; AVX2-FAST-NEXT: vpunpcklwd {{.*#+}} xmm1 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3] ; AVX2-FAST-NEXT: vpextrw $6, %xmm0, 8(%rdi) ; AVX2-FAST-NEXT: vmovq %xmm1, (%rdi) @@ -168,7 +152,7 @@ ; ; XOP-LABEL: v5i16: ; XOP: # %bb.0: -; XOP-NEXT: vpperm {{.*#+}} xmm1 = xmm0[0,1],xmm1[4,5],xmm0[4,5],xmm1[8,9],xmm0[12,13],xmm1[4,5],xmm0[14,15],xmm1[6,7] +; XOP-NEXT: vpperm {{.*#+}} xmm1 = xmm0[0,1],xmm1[4,5],xmm0[4,5],xmm1[8,9],xmm0[4,5],xmm1[4,5],xmm0[6,7],xmm1[6,7] ; XOP-NEXT: vpextrw $6, %xmm0, 8(%rdi) ; XOP-NEXT: vmovq %xmm1, (%rdi) ; XOP-NEXT: retq @@ -377,23 +361,24 @@ ; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm1[0,1,2,2] ; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm0[0,1,0,3] ; SSE2-NEXT: punpckhdq {{.*#+}} xmm3 = xmm3[2],xmm2[2],xmm3[3],xmm2[3] -; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,0],xmm1[3,0] -; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[0,3] +; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm1[3,1,2,3] +; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,2,3] +; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1] ; SSE2-NEXT: movd %xmm1, 24(%rdi) -; SSE2-NEXT: movlps %xmm0, 16(%rdi) +; SSE2-NEXT: movq %xmm0, 16(%rdi) ; SSE2-NEXT: movdqa %xmm3, (%rdi) ; SSE2-NEXT: retq ; ; SSE42-LABEL: v7i32: ; SSE42: # %bb.0: -; SSE42-NEXT: movdqa %xmm1, %xmm2 -; SSE42-NEXT: pblendw {{.*#+}} xmm2 = xmm2[0,1],xmm0[2,3],xmm2[4,5,6,7] -; SSE42-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5],xmm0[6,7] -; SSE42-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,3,2] -; SSE42-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,3,0,3] +; SSE42-NEXT: movdqa %xmm0, %xmm2 +; SSE42-NEXT: pblendw {{.*#+}} xmm2 = xmm2[0,1,2,3],xmm1[4,5],xmm2[6,7] +; SSE42-NEXT: pshufd {{.*#+}} xmm2 = xmm2[0,2,3,2] +; SSE42-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5,6,7] +; SSE42-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,3,2,3] ; SSE42-NEXT: movd %xmm1, 24(%rdi) -; SSE42-NEXT: movq %xmm2, 16(%rdi) -; SSE42-NEXT: movdqa %xmm0, (%rdi) +; SSE42-NEXT: movq %xmm0, 16(%rdi) +; SSE42-NEXT: movdqa %xmm2, (%rdi) ; SSE42-NEXT: retq ; ; AVX1-LABEL: v7i32: Index: test/CodeGen/X86/scalar_widen_div.ll =================================================================== --- test/CodeGen/X86/scalar_widen_div.ll +++ test/CodeGen/X86/scalar_widen_div.ll @@ -427,7 +427,6 @@ ; CHECK-NEXT: pextrd $2, %xmm1, %r8d ; CHECK-NEXT: cltd ; CHECK-NEXT: idivl %r8d -; CHECK-NEXT: pinsrd $2, %eax, %xmm2 ; CHECK-NEXT: movl %eax, 8(%rdi,%rcx) ; CHECK-NEXT: movq %xmm2, (%rdi,%rcx) ; CHECK-NEXT: addq $16, %rcx Index: test/CodeGen/X86/vec_shift7.ll =================================================================== --- test/CodeGen/X86/vec_shift7.ll +++ test/CodeGen/X86/vec_shift7.ll @@ -7,12 +7,9 @@ define i64 @test1(<2 x i64> %a) { ; X32-LABEL: test1: ; X32: # %bb.0: # %entry -; X32-NEXT: movdqa %xmm0, %xmm1 -; X32-NEXT: psllq $2, %xmm1 -; X32-NEXT: movsd {{.*#+}} xmm1 = xmm0[0],xmm1[1] -; X32-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,2,3] -; X32-NEXT: movd %xmm1, %edx ; X32-NEXT: movd %xmm0, %eax +; X32-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,2,3] +; X32-NEXT: movd %xmm0, %edx ; X32-NEXT: retl ; ; X64-LABEL: test1: