Index: llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp =================================================================== --- llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -581,6 +581,7 @@ SDValue MatchBSwapHWordLow(SDNode *N, SDValue N0, SDValue N1, bool DemandHighBits = true); SDValue MatchBSwapHWord(SDNode *N, SDValue N0, SDValue N1); + SDValue tryCombineHWordByteSwaps(SDNode *N); SDValue MatchRotatePosNeg(SDValue Shifted, SDValue Pos, SDValue Neg, SDValue InnerPos, SDValue InnerNeg, bool HasPos, unsigned PosOpcode, unsigned NegOpcode, @@ -6782,6 +6783,122 @@ return SDValue(); } +SDValue DAGCombiner::tryCombineHWordByteSwaps(SDNode *N) { + + if (!LegalOperations) + return SDValue(); + + EVT VT = N->getValueType(0); + if (VT != MVT::i64) + return SDValue(); + if (N->getOpcode() != ISD::OR) + return SDValue(); + + // Do two level + SDValue OuterAnd; + SDValue N0, N1; + + // Confirm one operand of N is a OR and other an AND + // or(or(),and) + if (N->getOperand(0).getOpcode() == ISD::OR && + N->getOperand(1).getOpcode() == ISD::AND) { + N0 = N->getOperand(0); + OuterAnd = N->getOperand(1); + } + // or(and,or()) + else if (N->getOperand(1).getOpcode() == ISD::OR && + N->getOperand(0).getOpcode() == ISD::AND) { + N0 = N->getOperand(1); + OuterAnd = N->getOperand(0); + } else + return SDValue(); + + // Check N0 has one operand AND and its first argument matches the AND operand + // of N. + int Innerpos, Outerpos = 0; + // or(or(anda, x), anda) -> or(anda, x) + // or(anda, or(anda , x)) -> or(anda, x) + if (N0.getOperand(0).getOpcode() == ISD::AND && + N0.getOperand(0).getOperand(0) == OuterAnd.getOperand(0)) + Innerpos = 0; + // or(or(x, anda), anda) -> or(x, anda) + // or(anda, or(x, anda)) -> or(x, anda) + else if (N0.getOperand(1).getOpcode() == ISD::AND && + N0.getOperand(1).getOperand(0) == OuterAnd.getOperand(0)) + Innerpos = 1; + else + // Check if it extends to 3 level + { + // or(or(or(),x),and) + // or(and,or(or(),x)) + if (N0.getOperand(0).getOpcode() == ISD::OR) { + N1 = N0.getOperand(0); + Outerpos = 1; + } + // or(or(x,or()),and) + // or(and,or(x,or())) + else if (N0.getOperand(1).getOpcode() == ISD::OR) { + N1 = N0.getOperand(1); + Outerpos = 2; + } else + return SDValue(); + + // or(or(or(anda,y),x),anda) -> or(or(anda,y),x) + // or(anda,or(or(anda,y),x)) -> or(or(anda,y),x) + // or(or(x,or(anda,y)),anda) -> or(x,or(anda,y) + // or(anda,or(x,or(anda,y))) -> or(x,or(anda,y) + if (N1.getOperand(0).getOpcode() == ISD::AND && + N1.getOperand(0).getOperand(0) == OuterAnd.getOperand(0)) + Innerpos = 0; + // or(or(or(y,anda),x),anda) -> or(or(y,anda),x) + // or(anda,or(or(y,anda),x)) -> or(or(y,anda),x) + // or(or(x,or(y,anda)),anda) -> or(x,or(y,anda) + // or(anda,or(x,or(y,anda))) -> or(x,or(y,anda) + else if (N1.getOperand(1).getOpcode() == ISD::AND && + N1.getOperand(1).getOperand(0) == OuterAnd.getOperand(0)) + Innerpos = 1; + else + return SDValue(); + } + + // Combine masks + ConstantSDNode *N1C = dyn_cast(OuterAnd.getOperand(1)); + SDValue In; + if (!N1C) + return SDValue(); + unsigned long long Mask = N1C->getZExtValue(); + if (Outerpos) { + N1C = dyn_cast(N1.getOperand(Innerpos).getOperand(1)); + In = N1.getOperand(1 - Innerpos); + } else { + N1C = dyn_cast(N0.getOperand(Innerpos).getOperand(1)); + In = N0.getOperand(1 - Innerpos); + } + + if (!N1C) + return SDValue(); + Mask = Mask | N1C->getZExtValue(); + SDLoc DL(N); + + SDValue Inner, Outer; + Inner = DAG.getNode(ISD::AND, DL, VT, OuterAnd.getOperand(0), + DAG.getConstant((uint64_t)Mask, DL, VT)); + + // Maintain the natural order and delete root node. + if (Innerpos == 1) + Outer = DAG.getNode(ISD::OR, DL, VT, In, Inner); + else + Outer = DAG.getNode(ISD::OR, DL, VT, Inner, In); + + // Additional or if 3 level pattern. + if (Outerpos == 1) + Outer = DAG.getNode(ISD::OR, DL, VT, Outer, N0.getOperand(1)); + else if (Outerpos == 2) + Outer = DAG.getNode(ISD::OR, DL, VT, N0.getOperand(0), Outer); + + return Outer; +} + /// OR combines for which the commuted variant will be tried as well. static SDValue visitORCommutative( SelectionDAG &DAG, SDValue N0, SDValue N1, SDNode *N) { @@ -6944,6 +7061,10 @@ if (SDValue BSwap = MatchBSwapHWordLow(N, N0, N1)) return BSwap; + // Simplify halfword bswaps in 64bit. + if (SDValue Combined = tryCombineHWordByteSwaps(N)) + return Combined; + // reassociate or if (SDValue ROR = reassociateOps(ISD::OR, SDLoc(N), N0, N1, N->getFlags())) return ROR; Index: llvm/test/CodeGen/AArch64/arm64-rev.ll =================================================================== --- llvm/test/CodeGen/AArch64/arm64-rev.ll +++ llvm/test/CodeGen/AArch64/arm64-rev.ll @@ -707,21 +707,7 @@ define i64 @test_rev16_x_hwbyteswaps_complex1(i64 %a) nounwind { ; CHECK-LABEL: test_rev16_x_hwbyteswaps_complex1: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: lsr x8, x0, #48 -; CHECK-NEXT: lsr x9, x0, #8 -; CHECK-NEXT: lsr x10, x0, #32 -; CHECK-NEXT: and x11, x9, #0xff000000000000 -; CHECK-NEXT: lsr x12, x0, #16 -; CHECK-NEXT: bfi x11, x8, #56, #8 -; CHECK-NEXT: and x8, x9, #0xff00000000 -; CHECK-NEXT: orr x8, x11, x8 -; CHECK-NEXT: and x9, x9, #0xff0000 -; CHECK-NEXT: bfi x8, x10, #40, #8 -; CHECK-NEXT: orr x8, x8, x9 -; CHECK-NEXT: ubfiz x9, x0, #8, #8 -; CHECK-NEXT: bfi x8, x12, #24, #8 -; CHECK-NEXT: bfxil x8, x0, #8, #8 -; CHECK-NEXT: orr x0, x8, x9 +; CHECK-NEXT: rev16 x0, x0 ; CHECK-NEXT: ret ; ; GISEL-LABEL: test_rev16_x_hwbyteswaps_complex1: @@ -768,16 +754,7 @@ define i64 @test_rev16_x_hwbyteswaps_complex2(i64 %a) nounwind { ; CHECK-LABEL: test_rev16_x_hwbyteswaps_complex2: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: lsr x9, x0, #48 -; CHECK-NEXT: lsr x10, x0, #32 -; CHECK-NEXT: lsr x8, x0, #8 -; CHECK-NEXT: lsr x11, x0, #16 -; CHECK-NEXT: and x8, x8, #0xff00ff00ff00ff -; CHECK-NEXT: bfi x8, x9, #56, #8 -; CHECK-NEXT: bfi x8, x10, #40, #8 -; CHECK-NEXT: bfi x8, x11, #24, #8 -; CHECK-NEXT: bfi x8, x0, #8, #8 -; CHECK-NEXT: mov x0, x8 +; CHECK-NEXT: rev16 x0, x0 ; CHECK-NEXT: ret ; ; GISEL-LABEL: test_rev16_x_hwbyteswaps_complex2: @@ -825,21 +802,7 @@ define i64 @test_rev16_x_hwbyteswaps_complex3(i64 %a) nounwind { ; CHECK-LABEL: test_rev16_x_hwbyteswaps_complex3: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: lsr x8, x0, #48 -; CHECK-NEXT: lsr x9, x0, #8 -; CHECK-NEXT: lsr x10, x0, #32 -; CHECK-NEXT: and x11, x9, #0xff000000000000 -; CHECK-NEXT: lsr x12, x0, #16 -; CHECK-NEXT: bfi x11, x8, #56, #8 -; CHECK-NEXT: and x8, x9, #0xff00000000 -; CHECK-NEXT: orr x8, x8, x11 -; CHECK-NEXT: and x9, x9, #0xff0000 -; CHECK-NEXT: bfi x8, x10, #40, #8 -; CHECK-NEXT: orr x8, x9, x8 -; CHECK-NEXT: ubfiz x9, x0, #8, #8 -; CHECK-NEXT: bfi x8, x12, #24, #8 -; CHECK-NEXT: bfxil x8, x0, #8, #8 -; CHECK-NEXT: orr x0, x9, x8 +; CHECK-NEXT: rev16 x0, x0 ; CHECK-NEXT: ret ; ; GISEL-LABEL: test_rev16_x_hwbyteswaps_complex3: @@ -888,10 +851,8 @@ ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: lsr x8, x0, #24 ; CHECK-NEXT: lsr x9, x0, #8 -; CHECK-NEXT: and x10, x9, #0xff000000000000 -; CHECK-NEXT: bfi x10, x8, #32, #8 -; CHECK-NEXT: and x8, x9, #0xff0000 -; CHECK-NEXT: orr x0, x10, x8 +; CHECK-NEXT: and x0, x9, #0xff000000ff0000 +; CHECK-NEXT: bfi x0, x8, #32, #8 ; CHECK-NEXT: ret ; ; GISEL-LABEL: test_or_and_combine1: @@ -919,13 +880,11 @@ ; CHECK-LABEL: test_or_and_combine2: ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: lsr x8, x0, #8 -; CHECK-NEXT: lsl x10, x0, #8 -; CHECK-NEXT: and x9, x8, #0xff000000000000 -; CHECK-NEXT: and x8, x8, #0xff0000 -; CHECK-NEXT: orr x9, x9, x10 -; CHECK-NEXT: and x10, x10, #0xff00000000 -; CHECK-NEXT: orr x9, x9, x10 -; CHECK-NEXT: orr x0, x9, x8 +; CHECK-NEXT: lsl x9, x0, #8 +; CHECK-NEXT: and x8, x8, #0xff000000ff0000 +; CHECK-NEXT: and x10, x9, #0xff00000000 +; CHECK-NEXT: orr x8, x8, x9 +; CHECK-NEXT: orr x0, x8, x10 ; CHECK-NEXT: ret ; ; GISEL-LABEL: test_or_and_combine2: