Index: llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp =================================================================== --- llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -581,6 +581,7 @@ SDValue MatchBSwapHWordLow(SDNode *N, SDValue N0, SDValue N1, bool DemandHighBits = true); SDValue MatchBSwapHWord(SDNode *N, SDValue N0, SDValue N1); + SDValue tryCombineHWordByteSwaps(SDNode *N); SDValue MatchRotatePosNeg(SDValue Shifted, SDValue Pos, SDValue Neg, SDValue InnerPos, SDValue InnerNeg, bool HasPos, unsigned PosOpcode, unsigned NegOpcode, @@ -6773,6 +6774,174 @@ return SDValue(); } +static SDValue matchTwoLevelOrAnd(SelectionDAG &DAG, SDNode *N, SDValue N0, + EVT VT) { + // Confirm N opcode is an OR + if (N->getOpcode() != ISD::OR) + return SDValue(); + + SDValue OuterAnd, Inner, Outer; + + // Confirm one operand of N is a OR and other an AND + // or(or(),and) + if (N->getOperand(0).getOpcode() == ISD::OR && + N->getOperand(1).getOpcode() == ISD::AND) + OuterAnd = N->getOperand(1); + // or(and,or()) + else if (N->getOperand(1).getOpcode() == ISD::OR && + N->getOperand(0).getOpcode() == ISD::AND) + OuterAnd = N->getOperand(0); + else + return SDValue(); + + // Check N0 has one operand AND and its first argument matches the AND operand + // of N. + int pos; + // or(or(anda, x), anda) -> or(anda, x) + // or(anda, or(anda , x)) -> or(anda, x) + if (N0.getOperand(0).getOpcode() == ISD::AND && + N0.getOperand(0).getOperand(0) == OuterAnd.getOperand(0)) + pos = 0; + // or(or(x, anda), anda) -> or(x, anda) + // or(anda, or(x, anda)) -> or(x, anda) + else if (N0.getOperand(1).getOpcode() == ISD::AND && + N0.getOperand(1).getOperand(0) == OuterAnd.getOperand(0)) + pos = 1; + else + return SDValue(); + + // Combine masks + ConstantSDNode *N1C = dyn_cast(OuterAnd.getOperand(1)); + if (!N1C) + return SDValue(); + unsigned long long Mask = N1C->getZExtValue(); + N1C = dyn_cast(N0.getOperand(pos).getOperand(1)); + if (!N1C) + return SDValue(); + Mask = Mask | N1C->getZExtValue(); + SDLoc DL(N); + + Inner = DAG.getNode(ISD::AND, DL, VT, OuterAnd.getOperand(0), + DAG.getConstant((uint64_t)Mask, DL, VT)); + + if (pos == 0) + Outer = DAG.getNode(ISD::OR, DL, VT, Inner, N0.getOperand(1)); + else + Outer = DAG.getNode(ISD::OR, DL, VT, N0.getOperand(0), Inner); + return Outer; +} + +static SDValue matchThreeLevelOrAnd(SelectionDAG &DAG, SDNode *N, SDValue N0, + SDValue N1, EVT VT) { + // Confirm N opcode is an OR + if (N->getOpcode() != ISD::OR) + return SDValue(); + + SDValue OuterAnd, Inner, Outer; + + // Confirm one operand of N is a OR and other an AND + // or(or(),and) + if (N->getOperand(0).getOpcode() == ISD::OR && + N->getOperand(1).getOpcode() == ISD::AND) + OuterAnd = N->getOperand(1); + // or(and,or()) + else if (N->getOperand(1).getOpcode() == ISD::OR && + N->getOperand(0).getOpcode() == ISD::AND) + OuterAnd = N->getOperand(0); + else + return SDValue(); + + // Check N0 has one operand OR and check its position. + int pos0; + // or(or(or(),x),and) + // or(and,or(or(),x)) + if (N0.getOperand(0).getOpcode() == ISD::OR) + pos0 = 0; + // or(or(x,or()),and) + // or(and,or(x,or())) + else if (N0.getOperand(1).getOpcode() == ISD::OR) + pos0 = 1; + else + return SDValue(); + + // Check N1 has one operand AND and its first argument matches the AND operand + // of N. + int pos; + // or(or(or(anda,y),x),anda) -> or(or(anda,y),x) + // or(anda,or(or(anda,y),x)) -> or(or(anda,y),x) + // or(or(x,or(anda,y)),anda) -> or(x,or(anda,y) + // or(anda,or(x,or(anda,y))) -> or(x,or(anda,y) + if (N1.getOperand(0).getOpcode() == ISD::AND && + N1.getOperand(0).getOperand(0) == OuterAnd.getOperand(0)) + pos = 0; + // or(or(or(y,anda),x),anda) -> or(or(y,anda),x) + // or(anda,or(or(y,anda),x)) -> or(or(y,anda),x) + // or(or(x,or(y,anda)),anda) -> or(x,or(y,anda) + // or(anda,or(x,or(y,anda))) -> or(x,or(y,anda) + else if (N1.getOperand(1).getOpcode() == ISD::AND && + N1.getOperand(1).getOperand(0) == OuterAnd.getOperand(0)) + pos = 1; + else + return SDValue(); + + // Combine masks + ConstantSDNode *N1C = dyn_cast(OuterAnd.getOperand(1)); + if (!N1C) + return SDValue(); + unsigned long long Mask = N1C->getZExtValue(); + N1C = dyn_cast(N1.getOperand(pos).getOperand(1)); + if (!N1C) + return SDValue(); + Mask = Mask | N1C->getZExtValue(); + SDLoc DL(N); + + // Construct DAG + Inner = DAG.getNode(ISD::AND, DL, VT, OuterAnd.getOperand(0), + DAG.getConstant((uint64_t)Mask, DL, VT)); + if (pos == 0) + Outer = DAG.getNode(ISD::OR, DL, VT, Inner, N1.getOperand(1)); + else + Outer = DAG.getNode(ISD::OR, DL, VT, N1.getOperand(0), Inner); + + if (pos0 == 0) + Outer = DAG.getNode(ISD::OR, DL, VT, Outer, N0.getOperand(1)); + else + Outer = DAG.getNode(ISD::OR, DL, VT, N0.getOperand(0), Outer); + return Outer; +} + +SDValue DAGCombiner::tryCombineHWordByteSwaps(SDNode *N) { + + if (!LegalOperations) + return SDValue(); + + EVT VT = N->getValueType(0); + if (VT != MVT::i64) + return SDValue(); + SDValue N0, N1; + + if (N->getOpcode() != ISD::OR) + return SDValue(); + + if (N->getOperand(0).getOpcode() == ISD::OR) + N0 = N->getOperand(0); + else if (N->getOperand(1).getOpcode() == ISD::OR) + N0 = N->getOperand(1); + else + return SDValue(); + + if (N0.getOperand(0).getOpcode() == ISD::OR) + N1 = N0.getOperand(0); + else if (N0.getOperand(1).getOpcode() == ISD::OR) + N1 = N0.getOperand(1); + else + // Check if it is two level + return matchTwoLevelOrAnd(DAG, N, N0, VT); + + // Check if it is three level + return matchThreeLevelOrAnd(DAG, N, N0, N1, VT); +} + /// OR combines for which the commuted variant will be tried as well. static SDValue visitORCommutative( SelectionDAG &DAG, SDValue N0, SDValue N1, SDNode *N) { @@ -6935,6 +7104,10 @@ if (SDValue BSwap = MatchBSwapHWordLow(N, N0, N1)) return BSwap; + // Simplify halfword bswaps in 64bit. + if (SDValue Combined = tryCombineHWordByteSwaps(N)) + return Combined; + // reassociate or if (SDValue ROR = reassociateOps(ISD::OR, SDLoc(N), N0, N1, N->getFlags())) return ROR; Index: llvm/test/CodeGen/AArch64/arm64-rev.ll =================================================================== --- llvm/test/CodeGen/AArch64/arm64-rev.ll +++ llvm/test/CodeGen/AArch64/arm64-rev.ll @@ -707,21 +707,7 @@ define i64 @test_rev16_x_hwbyteswaps_complex1(i64 %a) nounwind { ; CHECK-LABEL: test_rev16_x_hwbyteswaps_complex1: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: lsr x8, x0, #48 -; CHECK-NEXT: lsr x9, x0, #8 -; CHECK-NEXT: lsr x10, x0, #32 -; CHECK-NEXT: and x11, x9, #0xff000000000000 -; CHECK-NEXT: lsr x12, x0, #16 -; CHECK-NEXT: bfi x11, x8, #56, #8 -; CHECK-NEXT: and x8, x9, #0xff00000000 -; CHECK-NEXT: orr x8, x11, x8 -; CHECK-NEXT: and x9, x9, #0xff0000 -; CHECK-NEXT: bfi x8, x10, #40, #8 -; CHECK-NEXT: orr x8, x8, x9 -; CHECK-NEXT: ubfiz x9, x0, #8, #8 -; CHECK-NEXT: bfi x8, x12, #24, #8 -; CHECK-NEXT: bfxil x8, x0, #8, #8 -; CHECK-NEXT: orr x0, x8, x9 +; CHECK-NEXT: rev16 x0, x0 ; CHECK-NEXT: ret ; ; GISEL-LABEL: test_rev16_x_hwbyteswaps_complex1: @@ -768,16 +754,7 @@ define i64 @test_rev16_x_hwbyteswaps_complex2(i64 %a) nounwind { ; CHECK-LABEL: test_rev16_x_hwbyteswaps_complex2: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: lsr x9, x0, #48 -; CHECK-NEXT: lsr x10, x0, #32 -; CHECK-NEXT: lsr x8, x0, #8 -; CHECK-NEXT: lsr x11, x0, #16 -; CHECK-NEXT: and x8, x8, #0xff00ff00ff00ff -; CHECK-NEXT: bfi x8, x9, #56, #8 -; CHECK-NEXT: bfi x8, x10, #40, #8 -; CHECK-NEXT: bfi x8, x11, #24, #8 -; CHECK-NEXT: bfi x8, x0, #8, #8 -; CHECK-NEXT: mov x0, x8 +; CHECK-NEXT: rev16 x0, x0 ; CHECK-NEXT: ret ; ; GISEL-LABEL: test_rev16_x_hwbyteswaps_complex2: @@ -825,21 +802,7 @@ define i64 @test_rev16_x_hwbyteswaps_complex3(i64 %a) nounwind { ; CHECK-LABEL: test_rev16_x_hwbyteswaps_complex3: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: lsr x8, x0, #48 -; CHECK-NEXT: lsr x9, x0, #8 -; CHECK-NEXT: lsr x10, x0, #32 -; CHECK-NEXT: and x11, x9, #0xff000000000000 -; CHECK-NEXT: lsr x12, x0, #16 -; CHECK-NEXT: bfi x11, x8, #56, #8 -; CHECK-NEXT: and x8, x9, #0xff00000000 -; CHECK-NEXT: orr x8, x8, x11 -; CHECK-NEXT: and x9, x9, #0xff0000 -; CHECK-NEXT: bfi x8, x10, #40, #8 -; CHECK-NEXT: orr x8, x9, x8 -; CHECK-NEXT: ubfiz x9, x0, #8, #8 -; CHECK-NEXT: bfi x8, x12, #24, #8 -; CHECK-NEXT: bfxil x8, x0, #8, #8 -; CHECK-NEXT: orr x0, x9, x8 +; CHECK-NEXT: rev16 x0, x0 ; CHECK-NEXT: ret ; ; GISEL-LABEL: test_rev16_x_hwbyteswaps_complex3: