Index: lib/CodeGen/SelectionDAG/DAGCombiner.cpp =================================================================== --- lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -155,7 +155,7 @@ void deleteAndRecombine(SDNode *N); bool recursivelyDeleteUnusedNodes(SDNode *N); - + SDValue ReplaceUndefFromZeroOrOneVector(SDValue N, SDLoc DL); SDValue CombineTo(SDNode *N, const SDValue *To, unsigned NumTo, bool AddTo = true); @@ -474,6 +474,43 @@ DAG.DeleteNode(N); } +// Replace undef nodes with zero/one nodes in all zero/ones vector. +// Be careful that this function must be used after ISD::isBuildVectorAllZeros +// (N.getNode())/ISD::isBuildVectorAllOnes(N.getNode()). Also this function is +// only used for visitand/visitor now, beacuse for and/or vector1, vector2 +// (assume vector2 to be all zeros/ones), vector2 will be returned in function +// visitadd/visitor. Thus the undef nodes in vector2 are not abandoned +// (This is different from other operations, for add vector1, vector2, +// vector1 will be returned, the undef nodes in vector2 are abandoned), this +// will cause the bug described in test/CodeGen/AArch64/aarch64_tree_tests.ll +SDValue DAGCombiner::ReplaceUndefFromZeroOrOneVector(SDValue N, SDLoc DL) { + SmallVector Ops; + SDValue NotUndef; + SDValue Return; + EVT VT = N.getValueType(); + bool isbitcast = false; + assert((ISD::isBuildVectorAllZeros(N.getNode()) || + ISD::isBuildVectorAllOnes(N.getNode())) && + "SDValue N must be AllZeros/AllOnes BuildVector!"); + if (N->getOpcode() == ISD::BITCAST) { + isbitcast = true; + N = N->getOperand(0); + } + for (unsigned i = 0; i != N->getNumOperands(); i++) { + if (N->getOperand(i).getOpcode() != ISD::UNDEF) { + NotUndef = N->getOperand(i); + break; + } + } + for (unsigned i = 0; i != N->getNumOperands(); i++) { + Ops.push_back(NotUndef); + } + Return = DAG.getNode(ISD::BUILD_VECTOR, DL, N.getValueType(), Ops); + if (isbitcast) + return DAG.getNode(ISD::BITCAST, DL, VT, Return); + return Return; +} + /// isNegatibleForFree - Return 1 if we can compute the negated form of the /// specified expression for the same cost as the expression itself, or 2 if we /// can compute the negated form more cheaply than the expression itself. @@ -2674,10 +2711,11 @@ // fold (and x, 0) -> 0, vector edition if (ISD::isBuildVectorAllZeros(N0.getNode())) - return N0; + // do not return N0, because undef node may exist in N0 + return ReplaceUndefFromZeroOrOneVector(N0, SDLoc(N)); if (ISD::isBuildVectorAllZeros(N1.getNode())) - return N1; - + // do not return N1, because undef node may exist in N1 + return ReplaceUndefFromZeroOrOneVector(N1, SDLoc(N)); // fold (and x, -1) -> x, vector edition if (ISD::isBuildVectorAllOnes(N0.getNode())) return N1; @@ -3317,9 +3355,11 @@ // fold (or x, -1) -> -1, vector edition if (ISD::isBuildVectorAllOnes(N0.getNode())) - return N0; + // do not return N0, because undef node may exist in N0 + return ReplaceUndefFromZeroOrOneVector(N0, SDLoc(N)); if (ISD::isBuildVectorAllOnes(N1.getNode())) - return N1; + // do not return N1, because undef node may exist in N1 + return ReplaceUndefFromZeroOrOneVector(N1, SDLoc(N)); // fold (or (shuf A, V_0, MA), (shuf B, V_0, MB)) -> (shuf A, B, Mask1) // fold (or (shuf A, V_0, MA), (shuf B, V_0, MB)) -> (shuf B, A, Mask2) Index: test/CodeGen/AArch64/aarch64_tree_tests.ll =================================================================== --- /dev/null +++ test/CodeGen/AArch64/aarch64_tree_tests.ll @@ -0,0 +1,42 @@ +; RUN: llc < %s | FileCheck %s + +; ModuleID = 'aarch64_tree_tests.bc' +target datalayout = "e-m:e-i64:64-i128:128-n32:64-S128" +target triple = "arm64--linux-gnu" + +; CHECK-LABLE: @aarch64_tree_tests_and +; CHECK: .hword 32768 +; CHECK: .hword 32767 +; CHECK: .hword 4664 +; CHECK: .hword 32767 +; CHECK: .hword 32768 +; CHECK: .hword 32768 +; CHECK: .hword 0 +; CHECK: .hword 0 + +; Function Attrs: nounwind readnone +define <8 x i16> @aarch64_tree_tests_and(<8 x i16> %a) { +entry: + %and = and <8 x i16> , %a + %ret = add <8 x i16> %and, + ret <8 x i16> %ret +} + +; CHECK-LABLE: @aarch64_tree_tests_or +; CHECK: .hword 32768 +; CHECK: .hword 32766 +; CHECK: .hword 4664 +; CHECK: .hword 32766 +; CHECK: .hword 32768 +; CHECK: .hword 32768 +; CHECK: .hword 65535 +; CHECK: .hword 65535 + +; Function Attrs: nounwind readnone +define <8 x i16> @aarch64_tree_tests_or(<8 x i16> %a) { +entry: + %or = or <8 x i16> , %a + %ret = add <8 x i16> %or, + ret <8 x i16> %ret +} +