Index: lib/CodeGen/SelectionDAG/DAGCombiner.cpp =================================================================== --- lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -13916,26 +13916,17 @@ if ((MemVT != Val.getValueType()) && (Val.getOpcode() == ISD::EXTRACT_VECTOR_ELT || Val.getOpcode() == ISD::EXTRACT_SUBVECTOR)) { - SDValue Vec = Val.getOperand(0); EVT MemVTScalarTy = MemVT.getScalarType(); - SDValue Idx = Val.getOperand(1); // We may need to add a bitcast here to get types to line up. - if (MemVTScalarTy != Vec.getValueType()) { - unsigned Elts = Vec.getValueType().getSizeInBits() / - MemVTScalarTy.getSizeInBits(); - if (Val.getValueType().isVector() && MemVT.isVector()) { - unsigned IdxC = cast(Idx)->getZExtValue(); - unsigned NewIdx = - ((uint64_t)IdxC * MemVT.getVectorNumElements()) / Elts; - Idx = DAG.getConstant(NewIdx, SDLoc(Val), Idx.getValueType()); - } - EVT NewVecTy = - EVT::getVectorVT(*DAG.getContext(), MemVTScalarTy, Elts); - Vec = DAG.getBitcast(NewVecTy, Vec); + if (MemVTScalarTy != Val.getValueType().getScalarType()) { + Val = DAG.getBitcast(MemVT, Val); + } else { + unsigned OpC = MemVT.isVector() ? ISD::EXTRACT_SUBVECTOR + : ISD::EXTRACT_VECTOR_ELT; + SDValue Vec = Val.getOperand(0); + SDValue Idx = Val.getOperand(1); + Val = DAG.getNode(OpC, SDLoc(Val), MemVT, Vec, Idx); } - auto OpC = (MemVT.isVector()) ? ISD::EXTRACT_SUBVECTOR - : ISD::EXTRACT_VECTOR_ELT; - Val = DAG.getNode(OpC, SDLoc(Val), MemVT, Vec, Idx); } Ops.push_back(Val); } Index: test/CodeGen/AArch64/sdag-store-merging-bug.ll =================================================================== --- /dev/null +++ test/CodeGen/AArch64/sdag-store-merging-bug.ll @@ -0,0 +1,20 @@ +; RUN: llc -o - %s -mtriple aarch64-- -mattr +slow-misaligned-128store -stop-after=isel | FileCheck %s +; Checks for a bug where selection dag store merging would construct wrong +; indices when extracting values from vectors, resulting in an invalid +; lane duplication in this case. +; The only way I could trigger stores with mismatching types getting merged was +; via the aarch64 slow-misaligned-128store code splitting stores earlier. + +; CHECK-LABEL: name: func +; CHECK: LDRQui +; CHECK-NOT: INSERT_SUBREG +; CHECK-NOT: DUP +; CHECK: STRQui +define void @func(<2 x double>* %sptr, <2 x double>* %dptr) { + %load = load <2 x double>, <2 x double>* %sptr, align 8 + ; aarch64 feature slow-misaligned-128store splits the following store. + ; store merging immediately merges it back together (but used to get the + ; merging wrong), this is the only way I was able to reproduce the bug... + store <2 x double> %load, <2 x double>* %dptr, align 4 + ret void +}