Index: lib/CodeGen/SelectionDAG/DAGCombiner.cpp =================================================================== --- lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -10376,10 +10376,24 @@ SmallVector Opnds; unsigned BuildVecNumElts = N0.getNumOperands(); - for (unsigned i = 0; i != BuildVecNumElts; ++i) - Opnds.push_back(N0.getOperand(i)); - for (unsigned i = 0; i != BuildVecNumElts; ++i) - Opnds.push_back(N1.getOperand(i)); + EVT SclTy0 = N0.getOperand(0)->getValueType(0); + EVT SclTy1 = N1.getOperand(0)->getValueType(0); + if (SclTy0.isFloatingPoint()) { + for (unsigned i = 0; i != BuildVecNumElts; ++i) + Opnds.push_back(N0.getOperand(i)); + for (unsigned i = 0; i != BuildVecNumElts; ++i) + Opnds.push_back(N1.getOperand(i)); + } else { + // If BUILD_VECTOR are from built from integer, they may have different + // operand types. Get the smaller type and truncate all operands to it. + EVT MinTy = SclTy0.bitsLE(SclTy1) ? SclTy0 : SclTy1; + for (unsigned i = 0; i != BuildVecNumElts; ++i) + Opnds.push_back(DAG.getNode(ISD::TRUNCATE, SDLoc(N), MinTy, + N0.getOperand(i))); + for (unsigned i = 0; i != BuildVecNumElts; ++i) + Opnds.push_back(DAG.getNode(ISD::TRUNCATE, SDLoc(N), MinTy, + N1.getOperand(i))); + } return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(N), VT, Opnds); } Index: test/CodeGen/AArch64/arm64-build-vector.ll =================================================================== --- test/CodeGen/AArch64/arm64-build-vector.ll +++ test/CodeGen/AArch64/arm64-build-vector.ll @@ -42,4 +42,18 @@ %b = add <8 x i16> %a, %c = mul <8 x i16> %b, ret <8 x i16> %c - } +} + +; There is an optimization in DAG Combiner as following: +; fold (concat_vectors (BUILD_VECTOR A, B, ...), (BUILD_VECTOR C, D, ...)) +; -> (BUILD_VECTOR A, B, ..., C, D, ...) +; This case checks when A,B and C,D are different types, there should be no +; assertion failure. +define <8 x i16> @concat_2_build_vector(<4 x i16> %in0) { +; CHECK-LABEL: concat_2_build_vector: +; CHECK: movi + %vshl_n = shl <4 x i16> %in0, + %vshl_n2 = shl <4 x i16> %vshl_n, + %shuffle.i = shufflevector <4 x i16> %vshl_n2, <4 x i16> zeroinitializer, <8 x i32> + ret <8 x i16> %shuffle.i +} \ No newline at end of file