Index: lib/CodeGen/SelectionDAG/DAGCombiner.cpp =================================================================== --- lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -13845,17 +13845,28 @@ Val.getOpcode() == ISD::EXTRACT_SUBVECTOR)) { SDValue Vec = Val.getOperand(0); EVT MemVTScalarTy = MemVT.getScalarType(); + SDValue Idx = Val.getOperand(1); // We may need to add a bitcast here to get types to line up. if (MemVTScalarTy != Vec.getValueType()) { unsigned Elts = Vec.getValueType().getSizeInBits() / MemVTScalarTy.getSizeInBits(); + if (Val.getValueType().isVector()) { + unsigned IdxC = + dyn_cast(Val.getOperand(1))->getZExtValue(); + unsigned NewIdx; + if (Elts > MemVT.getVectorNumElements()) + NewIdx = IdxC / (Elts / MemVT.getVectorNumElements()); + else + NewIdx = IdxC * (MemVT.getVectorNumElements() / Elts); + Idx = DAG.getConstant(NewIdx, SDLoc(Val), Idx.getValueType()); + } EVT NewVecTy = EVT::getVectorVT(*DAG.getContext(), MemVTScalarTy, Elts); Vec = DAG.getBitcast(NewVecTy, Vec); } auto OpC = (MemVT.isVector()) ? ISD::EXTRACT_SUBVECTOR : ISD::EXTRACT_VECTOR_ELT; - Val = DAG.getNode(OpC, SDLoc(Val), MemVT, Vec, Val.getOperand(1)); + Val = DAG.getNode(OpC, SDLoc(Val), MemVT, Vec, Idx); } Ops.push_back(Val); } Index: test/CodeGen/X86/merge-vector-stores-scale-idx-crash.ll =================================================================== --- /dev/null +++ test/CodeGen/X86/merge-vector-stores-scale-idx-crash.ll @@ -0,0 +1,19 @@ +; RUN: llc < %s -mtriple=x86_64-apple-osx10.14 -mattr=+avx2 | FileCheck %s + +; Check that we don't crash due creating invalid extract_subvector indices in store merging. +; CHECK-LABEL: testfn +; CHECK: retq +define void @testfn(i32* nocapture %p) { + %v0 = getelementptr i32, i32* %p, i64 12 + %1 = bitcast i32* %v0 to <2 x i64>* + %2 = bitcast i32* %v0 to <4 x i32>* + %3 = getelementptr <2 x i64>, <2 x i64>* %1, i64 -3 + store <2 x i64> undef, <2 x i64>* %3, align 16 + %4 = shufflevector <4 x i64> zeroinitializer, <4 x i64> undef, <2 x i32> + %5 = getelementptr <2 x i64>, <2 x i64>* %1, i64 -2 + store <2 x i64> %4, <2 x i64>* %5, align 16 + %6 = shufflevector <8 x i32> zeroinitializer, <8 x i32> undef, <4 x i32> + %7 = getelementptr <4 x i32>, <4 x i32>* %2, i64 -1 + store <4 x i32> %6, <4 x i32>* %7, align 16 + ret void +}