diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -581,6 +581,7 @@ SDValue MatchBSwapHWordLow(SDNode *N, SDValue N0, SDValue N1, bool DemandHighBits = true); SDValue MatchBSwapHWord(SDNode *N, SDValue N0, SDValue N1); + SDValue tryCombineHWordByteSwaps(SDNode *N); SDValue MatchRotatePosNeg(SDValue Shifted, SDValue Pos, SDValue Neg, SDValue InnerPos, SDValue InnerNeg, bool HasPos, unsigned PosOpcode, unsigned NegOpcode, @@ -6773,6 +6774,174 @@ return SDValue(); } +static SDValue matchTwoLevelOrAnd(SelectionDAG &DAG, SDNode *N, SDValue N0, + EVT VT) { + // Confirm N opcode is an OR + if (N->getOpcode() != ISD::OR) + return SDValue(); + + SDValue OuterAnd, Inner, Outer; + + // Confirm one operand of N is a OR and other an AND + // or(or(),and) + if (N->getOperand(0).getOpcode() == ISD::OR && + N->getOperand(1).getOpcode() == ISD::AND) + OuterAnd = N->getOperand(1); + // or(and,or()) + else if (N->getOperand(1).getOpcode() == ISD::OR && + N->getOperand(0).getOpcode() == ISD::AND) + OuterAnd = N->getOperand(0); + else + return SDValue(); + + // Check N0 has one operand AND and its first argument matches the AND operand + // of N. + int pos; + // or(or(anda, x), anda) -> or(anda, x) + // or(anda, or(anda , x)) -> or(anda, x) + if (N0.getOperand(0).getOpcode() == ISD::AND && + N0.getOperand(0).getOperand(0) == OuterAnd.getOperand(0)) + pos = 0; + // or(or(x, anda), anda) -> or(x, anda) + // or(anda, or(x, anda)) -> or(x, anda) + else if (N0.getOperand(1).getOpcode() == ISD::AND && + N0.getOperand(1).getOperand(0) == OuterAnd.getOperand(0)) + pos = 1; + else + return SDValue(); + + // Combine masks + ConstantSDNode *N1C = dyn_cast(OuterAnd.getOperand(1)); + if (!N1C) + return SDValue(); + unsigned long long Mask = N1C->getZExtValue(); + N1C = dyn_cast(N0.getOperand(pos).getOperand(1)); + if (!N1C) + return SDValue(); + Mask = Mask | N1C->getZExtValue(); + SDLoc DL(N); + + Inner = DAG.getNode(ISD::AND, DL, VT, OuterAnd.getOperand(0), + DAG.getConstant((uint64_t)Mask, DL, VT)); + + if (pos == 0) + Outer = DAG.getNode(ISD::OR, DL, VT, Inner, N0.getOperand(1)); + else + Outer = DAG.getNode(ISD::OR, DL, VT, N0.getOperand(0), Inner); + return Outer; +} + +static SDValue matchThreeLevelOrAnd(SelectionDAG &DAG, SDNode *N, SDValue N0, + SDValue N1, EVT VT) { + // Confirm N opcode is an OR + if (N->getOpcode() != ISD::OR) + return SDValue(); + + SDValue OuterAnd, Inner, Outer; + + // Confirm one operand of N is a OR and other an AND + // or(or(),and) + if (N->getOperand(0).getOpcode() == ISD::OR && + N->getOperand(1).getOpcode() == ISD::AND) + OuterAnd = N->getOperand(1); + // or(and,or()) + else if (N->getOperand(1).getOpcode() == ISD::OR && + N->getOperand(0).getOpcode() == ISD::AND) + OuterAnd = N->getOperand(0); + else + return SDValue(); + + // Check N0 has one operand OR and check its position. + int pos0; + // or(or(or(),x),and) + // or(and,or(or(),x)) + if (N0.getOperand(0).getOpcode() == ISD::OR) + pos0 = 0; + // or(or(x,or()),and) + // or(and,or(x,or())) + else if (N0.getOperand(1).getOpcode() == ISD::OR) + pos0 = 1; + else + return SDValue(); + + // Check N1 has one operand AND and its first argument matches the AND operand + // of N. + int pos; + // or(or(or(anda,y),x),anda) -> or(or(anda,y),x) + // or(anda,or(or(anda,y),x)) -> or(or(anda,y),x) + // or(or(x,or(anda,y)),anda) -> or(x,or(anda,y) + // or(anda,or(x,or(anda,y))) -> or(x,or(anda,y) + if (N1.getOperand(0).getOpcode() == ISD::AND && + N1.getOperand(0).getOperand(0) == OuterAnd.getOperand(0)) + pos = 0; + // or(or(or(y,anda),x),anda) -> or(or(y,anda),x) + // or(anda,or(or(y,anda),x)) -> or(or(y,anda),x) + // or(or(x,or(y,anda)),anda) -> or(x,or(y,anda) + // or(anda,or(x,or(y,anda))) -> or(x,or(y,anda) + else if (N1.getOperand(1).getOpcode() == ISD::AND && + N1.getOperand(1).getOperand(0) == OuterAnd.getOperand(0)) + pos = 1; + else + return SDValue(); + + // Combine masks + ConstantSDNode *N1C = dyn_cast(OuterAnd.getOperand(1)); + if (!N1C) + return SDValue(); + unsigned long long Mask = N1C->getZExtValue(); + N1C = dyn_cast(N1.getOperand(pos).getOperand(1)); + if (!N1C) + return SDValue(); + Mask = Mask | N1C->getZExtValue(); + SDLoc DL(N); + + // Construct DAG + Inner = DAG.getNode(ISD::AND, DL, VT, OuterAnd.getOperand(0), + DAG.getConstant((uint64_t)Mask, DL, VT)); + if (pos == 0) + Outer = DAG.getNode(ISD::OR, DL, VT, Inner, N1.getOperand(1)); + else + Outer = DAG.getNode(ISD::OR, DL, VT, N1.getOperand(0), Inner); + + if (pos0 == 0) + Outer = DAG.getNode(ISD::OR, DL, VT, Outer, N0.getOperand(1)); + else + Outer = DAG.getNode(ISD::OR, DL, VT, N0.getOperand(0), Outer); + return Outer; +} + +SDValue DAGCombiner::tryCombineHWordByteSwaps(SDNode *N) { + + if (!LegalOperations) + return SDValue(); + + EVT VT = N->getValueType(0); + if (VT != MVT::i64) + return SDValue(); + SDValue N0, N1; + + if (N->getOpcode() != ISD::OR) + return SDValue(); + + if (N->getOperand(0).getOpcode() == ISD::OR) + N0 = N->getOperand(0); + else if (N->getOperand(1).getOpcode() == ISD::OR) + N0 = N->getOperand(1); + else + return SDValue(); + + if (N0.getOperand(0).getOpcode() == ISD::OR) + N1 = N0.getOperand(0); + else if (N0.getOperand(1).getOpcode() == ISD::OR) + N1 = N0.getOperand(1); + else + // Check if it is two level + return matchTwoLevelOrAnd(DAG, N, N0, VT); + + // Check if it is three level + return matchThreeLevelOrAnd(DAG, N, N0, N1, VT); +} + /// OR combines for which the commuted variant will be tried as well. static SDValue visitORCommutative( SelectionDAG &DAG, SDValue N0, SDValue N1, SDNode *N) { @@ -6935,6 +7104,10 @@ if (SDValue BSwap = MatchBSwapHWordLow(N, N0, N1)) return BSwap; + // Simplify halfword bswaps in 64bit. + if (SDValue Combined = tryCombineHWordByteSwaps(N)) + return Combined; + // reassociate or if (SDValue ROR = reassociateOps(ISD::OR, SDLoc(N), N0, N1, N->getFlags())) return ROR; diff --git a/llvm/test/CodeGen/AArch64/arm64-rev.ll b/llvm/test/CodeGen/AArch64/arm64-rev.ll --- a/llvm/test/CodeGen/AArch64/arm64-rev.ll +++ b/llvm/test/CodeGen/AArch64/arm64-rev.ll @@ -703,3 +703,146 @@ %4 = or i64 %1, %3 ret i64 %4 } + +; Optimize pattern with multiple and/or to a simple pattern which can enable generation of rev16. +define i64 @test_rev16_x_hwbyteswaps_complex1(i64 %a) nounwind { +; CHECK-LABEL: test_rev16_x_hwbyteswaps_complex1: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: rev16 x0, x0 +; CHECK-NEXT: ret +; +; GISEL-LABEL: test_rev16_x_hwbyteswaps_complex1: +; GISEL: // %bb.0: // %entry +; GISEL-NEXT: lsr x8, x0, #8 +; GISEL-NEXT: lsl x9, x0, #8 +; GISEL-NEXT: and x10, x8, #0xff000000000000 +; GISEL-NEXT: and x11, x9, #0xff00000000000000 +; GISEL-NEXT: orr x10, x10, x11 +; GISEL-NEXT: and x11, x8, #0xff00000000 +; GISEL-NEXT: orr x10, x10, x11 +; GISEL-NEXT: and x11, x9, #0xff0000000000 +; GISEL-NEXT: orr x10, x10, x11 +; GISEL-NEXT: and x11, x8, #0xff0000 +; GISEL-NEXT: orr x10, x10, x11 +; GISEL-NEXT: and x11, x9, #0xff000000 +; GISEL-NEXT: orr x10, x10, x11 +; GISEL-NEXT: and x8, x8, #0xff +; GISEL-NEXT: orr x8, x10, x8 +; GISEL-NEXT: and x9, x9, #0xff00 +; GISEL-NEXT: orr x0, x8, x9 +; GISEL-NEXT: ret +entry: + %0 = lshr i64 %a, 8 + %1 = and i64 %0, 71776119061217280 + %2 = shl i64 %a, 8 + %3 = and i64 %2, -72057594037927936 + %4 = or i64 %1, %3 + %5 = and i64 %0, 1095216660480 + %6 = or i64 %4, %5 + %7 = and i64 %2, 280375465082880 + %8 = or i64 %6, %7 + %9 = and i64 %0, 16711680 + %10 = or i64 %8, %9 + %11 = and i64 %2, 4278190080 + %12 = or i64 %10, %11 + %13 = and i64 %0, 255 + %14 = or i64 %12, %13 + %15 = and i64 %2, 65280 + %16 = or i64 %14, %15 + ret i64 %16 +} + +define i64 @test_rev16_x_hwbyteswaps_complex2(i64 %a) nounwind { +; CHECK-LABEL: test_rev16_x_hwbyteswaps_complex2: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: rev16 x0, x0 +; CHECK-NEXT: ret +; +; GISEL-LABEL: test_rev16_x_hwbyteswaps_complex2: +; GISEL: // %bb.0: // %entry +; GISEL-NEXT: lsr x8, x0, #8 +; GISEL-NEXT: lsl x10, x0, #8 +; GISEL-NEXT: and x9, x8, #0xff000000000000 +; GISEL-NEXT: and x11, x8, #0xff00000000 +; GISEL-NEXT: orr x9, x9, x11 +; GISEL-NEXT: and x11, x8, #0xff0000 +; GISEL-NEXT: orr x9, x9, x11 +; GISEL-NEXT: and x8, x8, #0xff +; GISEL-NEXT: orr x8, x9, x8 +; GISEL-NEXT: and x9, x10, #0xff00000000000000 +; GISEL-NEXT: orr x8, x8, x9 +; GISEL-NEXT: and x9, x10, #0xff0000000000 +; GISEL-NEXT: orr x8, x8, x9 +; GISEL-NEXT: and x9, x10, #0xff000000 +; GISEL-NEXT: orr x8, x8, x9 +; GISEL-NEXT: and x9, x10, #0xff00 +; GISEL-NEXT: orr x0, x8, x9 +; GISEL-NEXT: ret +entry: + %0 = lshr i64 %a, 8 + %1 = and i64 %0, 71776119061217280 + %2 = shl i64 %a, 8 + %3 = and i64 %0, 1095216660480 + %4 = or i64 %1, %3 + %5 = and i64 %0, 16711680 + %6 = or i64 %4, %5 + %7 = and i64 %0, 255 + %8 = or i64 %6, %7 + %9 = and i64 %2, -72057594037927936 + %10 = or i64 %8, %9 + %11 = and i64 %2, 280375465082880 + %12 = or i64 %10, %11 + %13 = and i64 %2, 4278190080 + %14 = or i64 %12, %13 + %15 = and i64 %2, 65280 + %16 = or i64 %14, %15 + ret i64 %16 +} + +; Optimize pattern with multiple and/or to a simple pattern which can enable generation of rev16. +define i64 @test_rev16_x_hwbyteswaps_complex3(i64 %a) nounwind { +; CHECK-LABEL: test_rev16_x_hwbyteswaps_complex3: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: rev16 x0, x0 +; CHECK-NEXT: ret +; +; GISEL-LABEL: test_rev16_x_hwbyteswaps_complex3: +; GISEL: // %bb.0: // %entry +; GISEL-NEXT: lsr x8, x0, #8 +; GISEL-NEXT: lsl x9, x0, #8 +; GISEL-NEXT: and x10, x8, #0xff000000000000 +; GISEL-NEXT: and x11, x9, #0xff00000000000000 +; GISEL-NEXT: orr x10, x11, x10 +; GISEL-NEXT: and x11, x8, #0xff00000000 +; GISEL-NEXT: orr x10, x11, x10 +; GISEL-NEXT: and x11, x9, #0xff0000000000 +; GISEL-NEXT: orr x10, x11, x10 +; GISEL-NEXT: and x11, x8, #0xff0000 +; GISEL-NEXT: orr x10, x11, x10 +; GISEL-NEXT: and x11, x9, #0xff000000 +; GISEL-NEXT: orr x10, x11, x10 +; GISEL-NEXT: and x8, x8, #0xff +; GISEL-NEXT: orr x8, x8, x10 +; GISEL-NEXT: and x9, x9, #0xff00 +; GISEL-NEXT: orr x0, x9, x8 +; GISEL-NEXT: ret +entry: + %0 = lshr i64 %a, 8 + %1 = and i64 %0, 71776119061217280 + %2 = shl i64 %a, 8 + %3 = and i64 %2, -72057594037927936 + %4 = or i64 %3, %1 + %5 = and i64 %0, 1095216660480 + %6 = or i64 %5, %4 + %7 = and i64 %2, 280375465082880 + %8 = or i64 %7, %6 + %9 = and i64 %0, 16711680 + %10 = or i64 %9, %8 + %11 = and i64 %2, 4278190080 + %12 = or i64 %11, %10 + %13 = and i64 %0, 255 + %14 = or i64 %13, %12 + %15 = and i64 %2, 65280 + %16 = or i64 %15, %14 + ret i64 %16 +}