Index: lib/Target/AArch64/AArch64ISelLowering.cpp =================================================================== --- lib/Target/AArch64/AArch64ISelLowering.cpp +++ lib/Target/AArch64/AArch64ISelLowering.cpp @@ -8768,26 +8768,42 @@ if (VT.isFloatingPoint()) return SDValue(); - // Check for insert vector elements. - if (StVal.getOpcode() != ISD::INSERT_VECTOR_ELT) - return SDValue(); - // We can express a splat as store pair(s) for 2 or 4 elements. unsigned NumVecElts = VT.getVectorNumElements(); if (NumVecElts != 4 && NumVecElts != 2) return SDValue(); - SDValue SplatVal = StVal.getOperand(1); - unsigned RemainInsertElts = NumVecElts - 1; // Check that this is a splat. - while (--RemainInsertElts) { - SDValue NextInsertElt = StVal.getOperand(0); - if (NextInsertElt.getOpcode() != ISD::INSERT_VECTOR_ELT) + // Make sure that each of the relevant vector element locations are inserted + // to, i.e. 0 and 1 for v2i64 and 0, 1, 2, 3 for v4i32. + std::bitset<4> IndexNotInserted((1 << NumVecElts) - 1); + SDValue SplatVal; + for (unsigned I = 0; I < NumVecElts; ++I) { + // Check for insert vector elements. + if (StVal.getOpcode() != ISD::INSERT_VECTOR_ELT) return SDValue(); - if (NextInsertElt.getOperand(1) != SplatVal) + + // Check that same value is inserted at each vector element. + if (I == 0) + SplatVal = StVal.getOperand(1); + else if (StVal.getOperand(1) != SplatVal) + return SDValue(); + + // Check insert element index. + ConstantSDNode *CIndex = dyn_cast(StVal.getOperand(2)); + if (!CIndex) return SDValue(); - StVal = NextInsertElt; + uint64_t IndexVal = CIndex->getZExtValue(); + if (IndexVal >= NumVecElts) + return SDValue(); + IndexNotInserted.reset(IndexVal); + + StVal = StVal.getOperand(0); } + // Check that all vector element locations were inserted to. + if (IndexNotInserted.any()) + return SDValue(); + unsigned OrigAlignment = St->getAlignment(); unsigned EltOffset = NumVecElts == 4 ? 4 : 8; unsigned Alignment = std::min(OrigAlignment, EltOffset); Index: test/CodeGen/AArch64/arm64-stp.ll =================================================================== --- test/CodeGen/AArch64/arm64-stp.ll +++ test/CodeGen/AArch64/arm64-stp.ll @@ -98,6 +98,30 @@ ret void } +; Check that a non-splat store that is storing a vector created by 4 +; insertelements that is not a splat vector does not get split. +define void @nosplat_v4i32(i32 %v, i32 *%p) { +entry: + +; CHECK-LABEL: nosplat_v4i32: +; CHECK: str w0, +; CHECK: ldr q[[REG1:[0-9]+]], +; CHECK-DAG: ins v[[REG1]].s[1], w0 +; CHECK-DAG: ins v[[REG1]].s[2], w0 +; CHECK-DAG: ins v[[REG1]].s[3], w0 +; CHECK: ext v[[REG2:[0-9]+]].16b, v[[REG1]].16b, v[[REG1]].16b, #8 +; CHECK: stp d[[REG1]], d[[REG2]], [x1] +; CHECK: ret + + %p17 = insertelement <4 x i32> undef, i32 %v, i32 %v + %p18 = insertelement <4 x i32> %p17, i32 %v, i32 1 + %p19 = insertelement <4 x i32> %p18, i32 %v, i32 2 + %p20 = insertelement <4 x i32> %p19, i32 %v, i32 3 + %p21 = bitcast i32* %p to <4 x i32>* + store <4 x i32> %p20, <4 x i32>* %p21, align 4 + ret void +} + ; Read of %b to compute %tmp2 shouldn't prevent formation of stp ; CHECK-LABEL: stp_int_rar_hazard ; CHECK: ldr [[REG:w[0-9]+]], [x2, #8]