Index: llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp =================================================================== --- llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -5648,6 +5648,45 @@ return false; } +// Match this pattern: +// (or (and (shl (A, 8)), 0xff00ff00), (and (srl (A, 8)), 0x00ff00ff)) +// And rewrite this to: +// (rotr (bswap A), 16) +static SDValue matchBSwapHWordOrAndAnd(const TargetLowering &TLI, + SelectionDAG &DAG, SDNode *N, SDValue N0, + SDValue N1, EVT VT, EVT ShiftAmountTy) { + assert(N->getOpcode() == ISD::OR && VT == MVT::i32 && + "MatchBSwapHWordOrAndAnd: expecting i32"); + if (!TLI.isOperationLegalOrCustom(ISD::ROTR, VT)) + return SDValue(); + if (N0.getOpcode() != ISD::AND || N1.getOpcode() != ISD::AND) + return SDValue(); + ConstantSDNode *Mask1 = isConstOrConstSplat(N0.getOperand(1)); + ConstantSDNode *Mask2 = isConstOrConstSplat(N1.getOperand(1)); + if (!Mask1 || !Mask2) + return SDValue(); + if (Mask1->getAPIntValue() != 0xff00ff00 || + Mask2->getAPIntValue() != 0x00ff00ff) + return SDValue(); + SDValue Shift1 = N0.getOperand(0); + SDValue Shift2 = N1.getOperand(0); + if (Shift1.getOpcode() != ISD::SHL || Shift2.getOpcode() != ISD::SRL) + return SDValue(); + ConstantSDNode *ShiftAmt1 = isConstOrConstSplat(Shift1.getOperand(1)); + ConstantSDNode *ShiftAmt2 = isConstOrConstSplat(Shift2.getOperand(1)); + if (!ShiftAmt1 || !ShiftAmt2) + return SDValue(); + if (ShiftAmt1->getAPIntValue() != 8 || ShiftAmt2->getAPIntValue() != 8) + return SDValue(); + if (Shift1.getOperand(0) != Shift2.getOperand(0)) + return SDValue(); + + SDLoc DL(N); + SDValue BSwap = DAG.getNode(ISD::BSWAP, DL, VT, Shift1.getOperand(0)); + SDValue ShAmt = DAG.getConstant(16, DL, ShiftAmountTy); + return DAG.getNode(ISD::ROTR, DL, VT, BSwap, ShAmt); +} + /// Match a 32-bit packed halfword bswap. That is /// ((x & 0x000000ff) << 8) | /// ((x & 0x0000ff00) >> 8) | @@ -5664,6 +5703,16 @@ if (!TLI.isOperationLegalOrCustom(ISD::BSWAP, VT)) return SDValue(); + if (SDValue BSwap = matchBSwapHWordOrAndAnd(TLI, DAG, N, N0, N1, VT, + getShiftAmountTy(VT))) + return BSwap; + + // Try again with commuted operands. + if (SDValue BSwap = matchBSwapHWordOrAndAnd(TLI, DAG, N, N1, N0, VT, + getShiftAmountTy(VT))) + return BSwap; + + // Look for either // (or (bswaphpair), (bswaphpair)) // (or (or (bswaphpair), (and)), (and)) Index: llvm/test/CodeGen/Thumb2/thumb2-rev16.ll =================================================================== --- llvm/test/CodeGen/Thumb2/thumb2-rev16.ll +++ llvm/test/CodeGen/Thumb2/thumb2-rev16.ll @@ -3,23 +3,116 @@ ; 0xff00ff00 = 4278255360 ; 0x00ff00ff = 16711935 -define i32 @f1(i32 %a) { -; CHECK-LABEL: f1: +define i32 @rev16(i32 %a) { +; CHECK-LABEL: rev16: +; CHECK: @ %bb.0: +; CHECK-NEXT: rev16 r0, r0 +; CHECK-NEXT: bx lr + %l8 = shl i32 %a, 8 + %r8 = lshr i32 %a, 8 + %mask_l8 = and i32 %l8, 4278255360 + %mask_r8 = and i32 %r8, 16711935 + %tmp = or i32 %mask_l8, %mask_r8 + ret i32 %tmp +} + +define i32 @not_rev16(i32 %a) { +; CHECK-LABEL: not_rev16: +; CHECK: @ %bb.0: +; CHECK-NEXT: mov.w r1, #65280 +; CHECK-NEXT: and.w r1, r1, r0, lsr #8 +; CHECK-NEXT: and r0, r0, #65280 +; CHECK-NEXT: orr.w r0, r1, r0, lsl #8 +; CHECK-NEXT: bx lr + %l8 = shl i32 %a, 8 + %r8 = lshr i32 %a, 8 + %mask_r8 = and i32 %r8, 4278255360 + %mask_l8 = and i32 %l8, 16711935 + %tmp = or i32 %mask_r8, %mask_l8 + ret i32 %tmp +} + +define i32 @bswap_ror_commuted(i32 %a) { +; CHECK-LABEL: bswap_ror_commuted: +; CHECK: @ %bb.0: +; CHECK-NEXT: rev16 r0, r0 +; CHECK-NEXT: bx lr + %l8 = shl i32 %a, 8 + %r8 = lshr i32 %a, 8 + %mask_l8 = and i32 %l8, 4278255360 + %mask_r8 = and i32 %r8, 16711935 + %tmp = or i32 %mask_r8, %mask_l8 + ret i32 %tmp +} + +define i32 @different_shift_amount(i32 %a) { +; CHECK-LABEL: different_shift_amount: ; CHECK: @ %bb.0: ; CHECK-NEXT: mov.w r1, #16711935 -; CHECK-NEXT: mov.w r2, #-16711936 +; CHECK-NEXT: movw r2, #65024 ; CHECK-NEXT: and.w r1, r1, r0, lsr #8 -; CHECK-NEXT: and.w r0, r2, r0, lsl #8 +; CHECK-NEXT: movt r2, #65280 +; CHECK-NEXT: and.w r0, r2, r0, lsl #9 ; CHECK-NEXT: add r0, r1 ; CHECK-NEXT: bx lr + %l8 = shl i32 %a, 9 + %r8 = lshr i32 %a, 8 + %mask_l8 = and i32 %l8, 4278255360 + %mask_r8 = and i32 %r8, 16711935 + %tmp = or i32 %mask_l8, %mask_r8 + ret i32 %tmp +} + +define i32 @different_constant(i32 %a) { +; CHECK-LABEL: different_constant: +; CHECK: @ %bb.0: +; CHECK-NEXT: mov.w r1, #16711935 +; CHECK-NEXT: and.w r0, r1, r0, lsr #8 +; CHECK-NEXT: bx lr %l8 = shl i32 %a, 8 %r8 = lshr i32 %a, 8 + %mask_l8 = and i32 %l8, 42 + %mask_r8 = and i32 %r8, 16711935 + %tmp = or i32 %mask_l8, %mask_r8 + ret i32 %tmp +} + +define i32 @different_op(i32 %a) { +; CHECK-LABEL: different_op: +; CHECK: @ %bb.0: +; CHECK-NEXT: mov.w r1, #16711935 +; CHECK-NEXT: movw r2, #256 +; CHECK-NEXT: and.w r1, r1, r0, lsr #8 +; CHECK-NEXT: movt r2, #255 +; CHECK-NEXT: add.w r0, r2, r0, lsl #8 +; CHECK-NEXT: orrs r0, r1 +; CHECK-NEXT: bx lr + %l8 = shl i32 %a, 8 + %r8 = lshr i32 %a, 8 + %mask_l8 = sub i32 %l8, 4278255360 + %mask_r8 = and i32 %r8, 16711935 + %tmp = or i32 %mask_l8, %mask_r8 + ret i32 %tmp +} + +define i32 @different_vars(i32 %a, i32 %b) { +; CHECK-LABEL: different_vars: +; CHECK: @ %bb.0: +; CHECK-NEXT: mov.w r2, #16711935 +; CHECK-NEXT: and.w r1, r2, r1, lsr #8 +; CHECK-NEXT: mov.w r2, #-16711936 +; CHECK-NEXT: and.w r0, r2, r0, lsl #8 +; CHECK-NEXT: add r0, r1 +; CHECK-NEXT: bx lr + %l8 = shl i32 %a, 8 + %r8 = lshr i32 %b, 8 %mask_l8 = and i32 %l8, 4278255360 %mask_r8 = and i32 %r8, 16711935 %tmp = or i32 %mask_l8, %mask_r8 ret i32 %tmp } + ; FIXME: this rev16 pattern is not matching ; 0xff000000 = 4278190080