Index: llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp =================================================================== --- llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -5647,6 +5647,43 @@ return false; } +// Match this pattern: +// (or (and (shl (A, 8)), 0xff00ff00), (and (srl (A, 8)), 0x00ff00ff)) +// And rewrite this to: +// (rotr (bswap A), 16) +static SDValue MatchBSwapHWordOrAndAnd(const TargetLowering &TLI, + SelectionDAG &DAG, SDNode *N, SDValue N0, + SDValue N1, EVT VT, EVT ShiftAmountTy) { + if (!TLI.isOperationLegalOrCustom(ISD::ROTR, VT)) + return SDValue(); + if (N0.getOpcode() != ISD::AND || N1.getOpcode() != ISD::AND) + return SDValue(); + ConstantSDNode *Mask1 = dyn_cast(N0.getOperand(1)); + ConstantSDNode *Mask2 = dyn_cast(N1.getOperand(1)); + if (!Mask1 || !Mask2) + return SDValue(); + if (Mask1->getAPIntValue() != 0xff00ff00 || + Mask2->getAPIntValue() != 0x00ff00ff) + return SDValue(); + SDValue Shift1 = N0.getOperand(0); + SDValue Shift2 = N1.getOperand(0); + if (Shift1.getOpcode() != ISD::SHL || Shift2.getOpcode() != ISD::SRL) + return SDValue(); + ConstantSDNode *Const1 = dyn_cast(Shift1.getOperand(1)); + ConstantSDNode *Const2 = dyn_cast(Shift2.getOperand(1)); + if (!Const1 || !Const2) + return SDValue(); + if (Const1->getAPIntValue() != 8 || Const2->getAPIntValue() != 8) + return SDValue(); + if (Shift1.getOperand(0) != Shift2.getOperand(0)) + return SDValue(); + + SDLoc DL(N); + SDValue BSwap = DAG.getNode(ISD::BSWAP, DL, VT, Shift1.getOperand(0)); + SDValue ShAmt = DAG.getConstant(16, DL, ShiftAmountTy); + return DAG.getNode(ISD::ROTR, DL, VT, BSwap, ShAmt); +} + /// Match a 32-bit packed halfword bswap. That is /// ((x & 0x000000ff) << 8) | /// ((x & 0x0000ff00) >> 8) | @@ -5663,6 +5700,10 @@ if (!TLI.isOperationLegalOrCustom(ISD::BSWAP, VT)) return SDValue(); + if (SDValue BSwap = MatchBSwapHWordOrAndAnd(TLI, DAG, N, N0, N1, VT, + getShiftAmountTy(VT))) + return BSwap; + // Look for either // (or (bswaphpair), (bswaphpair)) // (or (or (bswaphpair), (and)), (and)) Index: llvm/test/CodeGen/Thumb2/thumb2-rev16.ll =================================================================== --- llvm/test/CodeGen/Thumb2/thumb2-rev16.ll +++ llvm/test/CodeGen/Thumb2/thumb2-rev16.ll @@ -1,11 +1,13 @@ -; XFAIL: * -; fixme rev16 pattern is not matching - -; RUN: llc < %s -mtriple=thumb-- -mcpu=arm1156t2-s -mattr=+thumb2 | grep "rev16\W*r[0-9]*,\W*r[0-9]*" | count 1 +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=thumbv7m-none-eabi -o - | FileCheck %s ; 0xff00ff00 = 4278255360 ; 0x00ff00ff = 16711935 define i32 @f1(i32 %a) { +; CHECK-LABEL: f1: +; CHECK: @ %bb.0: +; CHECK-NEXT: rev16 r0, r0 +; CHECK-NEXT: bx lr %l8 = shl i32 %a, 8 %r8 = lshr i32 %a, 8 %mask_l8 = and i32 %l8, 4278255360 @@ -14,11 +16,23 @@ ret i32 %tmp } +; FIXME: this rev16 pattern is not matching + ; 0xff000000 = 4278190080 ; 0x00ff0000 = 16711680 ; 0x0000ff00 = 65280 ; 0x000000ff = 255 define i32 @f2(i32 %a) { +; CHECK-LABEL: f2: +; CHECK: @ %bb.0: +; CHECK-NEXT: mov.w r1, #16711680 +; CHECK-NEXT: and r2, r0, #16711680 +; CHECK-NEXT: and.w r1, r1, r0, lsr #8 +; CHECK-NEXT: orr.w r1, r1, r2, lsl #8 +; CHECK-NEXT: ubfx r2, r0, #8, #8 +; CHECK-NEXT: bfi r2, r0, #8, #8 +; CHECK-NEXT: adds r0, r2, r1 +; CHECK-NEXT: bx lr %l8 = shl i32 %a, 8 %r8 = lshr i32 %a, 8 %masklo_l8 = and i32 %l8, 65280