diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -5648,6 +5648,48 @@ return false; } +// Match this pattern: +// (or (and (shl (A, 8)), 0xff00ff00), (and (srl (A, 8)), 0x00ff00ff)) +// And rewrite this to: +// (rotr (bswap A), 16) +static SDValue matchBSwapHWordOrAndAnd(const TargetLowering &TLI, + SelectionDAG &DAG, SDNode *N, SDValue N0, + SDValue N1, EVT VT, EVT ShiftAmountTy) { + assert(N->getOpcode() == ISD::OR && VT == MVT::i32 && + "MatchBSwapHWordOrAndAnd: expecting i32"); + if (!TLI.isOperationLegalOrCustom(ISD::ROTR, VT)) + return SDValue(); + if (N0.getOpcode() != ISD::AND || N1.getOpcode() != ISD::AND) + return SDValue(); + // TODO: this is too restrictive; lifting this restriction requires more tests + if (!N0->hasOneUse() || !N1->hasOneUse()) + return SDValue(); + ConstantSDNode *Mask0 = isConstOrConstSplat(N0.getOperand(1)); + ConstantSDNode *Mask1 = isConstOrConstSplat(N1.getOperand(1)); + if (!Mask0 || !Mask1) + return SDValue(); + if (Mask0->getAPIntValue() != 0xff00ff00 || + Mask1->getAPIntValue() != 0x00ff00ff) + return SDValue(); + SDValue Shift0 = N0.getOperand(0); + SDValue Shift1 = N1.getOperand(0); + if (Shift0.getOpcode() != ISD::SHL || Shift1.getOpcode() != ISD::SRL) + return SDValue(); + ConstantSDNode *ShiftAmt0 = isConstOrConstSplat(Shift0.getOperand(1)); + ConstantSDNode *ShiftAmt1 = isConstOrConstSplat(Shift1.getOperand(1)); + if (!ShiftAmt0 || !ShiftAmt1) + return SDValue(); + if (ShiftAmt0->getAPIntValue() != 8 || ShiftAmt1->getAPIntValue() != 8) + return SDValue(); + if (Shift0.getOperand(0) != Shift1.getOperand(0)) + return SDValue(); + + SDLoc DL(N); + SDValue BSwap = DAG.getNode(ISD::BSWAP, DL, VT, Shift0.getOperand(0)); + SDValue ShAmt = DAG.getConstant(16, DL, ShiftAmountTy); + return DAG.getNode(ISD::ROTR, DL, VT, BSwap, ShAmt); +} + /// Match a 32-bit packed halfword bswap. That is /// ((x & 0x000000ff) << 8) | /// ((x & 0x0000ff00) >> 8) | @@ -5664,6 +5706,16 @@ if (!TLI.isOperationLegalOrCustom(ISD::BSWAP, VT)) return SDValue(); + if (SDValue BSwap = matchBSwapHWordOrAndAnd(TLI, DAG, N, N0, N1, VT, + getShiftAmountTy(VT))) + return BSwap; + + // Try again with commuted operands. + if (SDValue BSwap = matchBSwapHWordOrAndAnd(TLI, DAG, N, N1, N0, VT, + getShiftAmountTy(VT))) + return BSwap; + + // Look for either // (or (bswaphpair), (bswaphpair)) // (or (or (bswaphpair), (and)), (and)) diff --git a/llvm/test/CodeGen/Thumb2/thumb2-rev16.ll b/llvm/test/CodeGen/Thumb2/thumb2-rev16.ll --- a/llvm/test/CodeGen/Thumb2/thumb2-rev16.ll +++ b/llvm/test/CodeGen/Thumb2/thumb2-rev16.ll @@ -1,24 +1,157 @@ -; XFAIL: * -; fixme rev16 pattern is not matching - -; RUN: llc < %s -mtriple=thumb-- -mcpu=arm1156t2-s -mattr=+thumb2 | grep "rev16\W*r[0-9]*,\W*r[0-9]*" | count 1 +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=thumbv7m-none-eabi -o - | FileCheck %s ; 0xff00ff00 = 4278255360 ; 0x00ff00ff = 16711935 -define i32 @f1(i32 %a) { +define i32 @rev16(i32 %a) { +; CHECK-LABEL: rev16: +; CHECK: @ %bb.0: +; CHECK-NEXT: rev16 r0, r0 +; CHECK-NEXT: bx lr + %l8 = shl i32 %a, 8 + %r8 = lshr i32 %a, 8 + %mask_l8 = and i32 %l8, 4278255360 + %mask_r8 = and i32 %r8, 16711935 + %tmp = or i32 %mask_l8, %mask_r8 + ret i32 %tmp +} + +define i32 @not_rev16(i32 %a) { +; CHECK-LABEL: not_rev16: +; CHECK: @ %bb.0: +; CHECK-NEXT: mov.w r1, #65280 +; CHECK-NEXT: and.w r1, r1, r0, lsr #8 +; CHECK-NEXT: and r0, r0, #65280 +; CHECK-NEXT: orr.w r0, r1, r0, lsl #8 +; CHECK-NEXT: bx lr + %l8 = shl i32 %a, 8 + %r8 = lshr i32 %a, 8 + %mask_r8 = and i32 %r8, 4278255360 + %mask_l8 = and i32 %l8, 16711935 + %tmp = or i32 %mask_r8, %mask_l8 + ret i32 %tmp +} + +define i32 @extra_maskop_uses2(i32 %a) { +; CHECK-LABEL: extra_maskop_uses2: +; CHECK: @ %bb.0: +; CHECK-NEXT: mov.w r1, #-16711936 +; CHECK-NEXT: mov.w r2, #16711935 +; CHECK-NEXT: and.w r1, r1, r0, lsl #8 +; CHECK-NEXT: and.w r0, r2, r0, lsr #8 +; CHECK-NEXT: adds r2, r0, r1 +; CHECK-NEXT: muls r0, r1, r0 +; CHECK-NEXT: muls r0, r2, r0 +; CHECK-NEXT: bx lr + %l8 = shl i32 %a, 8 + %r8 = lshr i32 %a, 8 + %mask_l8 = and i32 %l8, 4278255360 + %mask_r8 = and i32 %r8, 16711935 + %or = or i32 %mask_r8, %mask_l8 + %mul = mul i32 %mask_r8, %mask_l8 ; another use of the mask ops + %r = mul i32 %mul, %or ; and use that result + ret i32 %r +} + + +define i32 @bswap_ror_commuted(i32 %a) { +; CHECK-LABEL: bswap_ror_commuted: +; CHECK: @ %bb.0: +; CHECK-NEXT: rev16 r0, r0 +; CHECK-NEXT: bx lr + %l8 = shl i32 %a, 8 + %r8 = lshr i32 %a, 8 + %mask_l8 = and i32 %l8, 4278255360 + %mask_r8 = and i32 %r8, 16711935 + %tmp = or i32 %mask_r8, %mask_l8 + ret i32 %tmp +} + +define i32 @different_shift_amount(i32 %a) { +; CHECK-LABEL: different_shift_amount: +; CHECK: @ %bb.0: +; CHECK-NEXT: mov.w r1, #16711935 +; CHECK-NEXT: movw r2, #65024 +; CHECK-NEXT: and.w r1, r1, r0, lsr #8 +; CHECK-NEXT: movt r2, #65280 +; CHECK-NEXT: and.w r0, r2, r0, lsl #9 +; CHECK-NEXT: add r0, r1 +; CHECK-NEXT: bx lr + %l8 = shl i32 %a, 9 + %r8 = lshr i32 %a, 8 + %mask_l8 = and i32 %l8, 4278255360 + %mask_r8 = and i32 %r8, 16711935 + %tmp = or i32 %mask_l8, %mask_r8 + ret i32 %tmp +} + +define i32 @different_constant(i32 %a) { +; CHECK-LABEL: different_constant: +; CHECK: @ %bb.0: +; CHECK-NEXT: mov.w r1, #16711935 +; CHECK-NEXT: and.w r0, r1, r0, lsr #8 +; CHECK-NEXT: bx lr %l8 = shl i32 %a, 8 %r8 = lshr i32 %a, 8 + %mask_l8 = and i32 %l8, 42 + %mask_r8 = and i32 %r8, 16711935 + %tmp = or i32 %mask_l8, %mask_r8 + ret i32 %tmp +} + +define i32 @different_op(i32 %a) { +; CHECK-LABEL: different_op: +; CHECK: @ %bb.0: +; CHECK-NEXT: mov.w r1, #16711935 +; CHECK-NEXT: movw r2, #256 +; CHECK-NEXT: and.w r1, r1, r0, lsr #8 +; CHECK-NEXT: movt r2, #255 +; CHECK-NEXT: add.w r0, r2, r0, lsl #8 +; CHECK-NEXT: orrs r0, r1 +; CHECK-NEXT: bx lr + %l8 = shl i32 %a, 8 + %r8 = lshr i32 %a, 8 + %mask_l8 = sub i32 %l8, 4278255360 + %mask_r8 = and i32 %r8, 16711935 + %tmp = or i32 %mask_l8, %mask_r8 + ret i32 %tmp +} + +define i32 @different_vars(i32 %a, i32 %b) { +; CHECK-LABEL: different_vars: +; CHECK: @ %bb.0: +; CHECK-NEXT: mov.w r2, #16711935 +; CHECK-NEXT: and.w r1, r2, r1, lsr #8 +; CHECK-NEXT: mov.w r2, #-16711936 +; CHECK-NEXT: and.w r0, r2, r0, lsl #8 +; CHECK-NEXT: add r0, r1 +; CHECK-NEXT: bx lr + %l8 = shl i32 %a, 8 + %r8 = lshr i32 %b, 8 %mask_l8 = and i32 %l8, 4278255360 %mask_r8 = and i32 %r8, 16711935 %tmp = or i32 %mask_l8, %mask_r8 ret i32 %tmp } + +; FIXME: this rev16 pattern is not matching + ; 0xff000000 = 4278190080 ; 0x00ff0000 = 16711680 ; 0x0000ff00 = 65280 ; 0x000000ff = 255 define i32 @f2(i32 %a) { +; CHECK-LABEL: f2: +; CHECK: @ %bb.0: +; CHECK-NEXT: mov.w r1, #16711680 +; CHECK-NEXT: and r2, r0, #16711680 +; CHECK-NEXT: and.w r1, r1, r0, lsr #8 +; CHECK-NEXT: orr.w r1, r1, r2, lsl #8 +; CHECK-NEXT: ubfx r2, r0, #8, #8 +; CHECK-NEXT: bfi r2, r0, #8, #8 +; CHECK-NEXT: adds r0, r2, r1 +; CHECK-NEXT: bx lr %l8 = shl i32 %a, 8 %r8 = lshr i32 %a, 8 %masklo_l8 = and i32 %l8, 65280 diff --git a/llvm/test/CodeGen/X86/rev16.ll b/llvm/test/CodeGen/X86/rev16.ll --- a/llvm/test/CodeGen/X86/rev16.ll +++ b/llvm/test/CodeGen/X86/rev16.ll @@ -8,23 +8,15 @@ ; X86-LABEL: rev16: ; X86: # %bb.0: ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: movl %eax, %ecx -; X86-NEXT: shll $8, %ecx -; X86-NEXT: shrl $8, %eax -; X86-NEXT: andl $-16711936, %ecx # imm = 0xFF00FF00 -; X86-NEXT: andl $16711935, %eax # imm = 0xFF00FF -; X86-NEXT: orl %ecx, %eax +; X86-NEXT: bswapl %eax +; X86-NEXT: rorl $16, %eax ; X86-NEXT: retl ; ; X64-LABEL: rev16: ; X64: # %bb.0: -; X64-NEXT: # kill: def $edi killed $edi def $rdi ; X64-NEXT: movl %edi, %eax -; X64-NEXT: shll $8, %eax -; X64-NEXT: shrl $8, %edi -; X64-NEXT: andl $-16711936, %eax # imm = 0xFF00FF00 -; X64-NEXT: andl $16711935, %edi # imm = 0xFF00FF -; X64-NEXT: addl %edi, %eax +; X64-NEXT: bswapl %eax +; X64-NEXT: rorl $16, %eax ; X64-NEXT: retq %l8 = shl i32 %a, 8 %r8 = lshr i32 %a, 8 @@ -104,23 +96,15 @@ ; X86-LABEL: bswap_ror_commuted: ; X86: # %bb.0: ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: movl %eax, %ecx -; X86-NEXT: shll $8, %ecx -; X86-NEXT: shrl $8, %eax -; X86-NEXT: andl $-16711936, %ecx # imm = 0xFF00FF00 -; X86-NEXT: andl $16711935, %eax # imm = 0xFF00FF -; X86-NEXT: orl %ecx, %eax +; X86-NEXT: bswapl %eax +; X86-NEXT: rorl $16, %eax ; X86-NEXT: retl ; ; X64-LABEL: bswap_ror_commuted: ; X64: # %bb.0: -; X64-NEXT: # kill: def $edi killed $edi def $rdi ; X64-NEXT: movl %edi, %eax -; X64-NEXT: shll $8, %eax -; X64-NEXT: shrl $8, %edi -; X64-NEXT: andl $-16711936, %eax # imm = 0xFF00FF00 -; X64-NEXT: andl $16711935, %edi # imm = 0xFF00FF -; X64-NEXT: addl %edi, %eax +; X64-NEXT: bswapl %eax +; X64-NEXT: rorl $16, %eax ; X64-NEXT: retq %l8 = shl i32 %a, 8 %r8 = lshr i32 %a, 8 @@ -241,8 +225,6 @@ ret i32 %tmp } -; TODO: another pattern that we are currently not matching -; ; 0xff000000 = 4278190080 ; 0x00ff0000 = 16711680 ; 0x0000ff00 = 65280