diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -9981,6 +9981,42 @@ : DAG.getZExtOrTrunc(Result, DL, WideVT)); } +// fold (bswap (logic_op(bswap(x),y))) -> logic_op(x,bswap(y)) +// This helper function accept SDNode with opcode ISD::BSWAP and ISD::BITREVERSE +static SDValue foldBitOrderCrossLogicOp(SDNode *N, SelectionDAG &DAG) { + unsigned Opcode = N->getOpcode(); + if (Opcode != ISD::BSWAP && Opcode != ISD::BITREVERSE) + return SDValue(); + + SDValue N0 = N->getOperand(0); + EVT VT = N->getValueType(0); + SDLoc DL(N); + if (ISD::isBitwiseLogicOp(N0.getOpcode()) && N0.hasOneUse()) { + SDValue OldLHS = N0.getOperand(0); + SDValue OldRHS = N0.getOperand(1); + + // If both operands are bswap/bitreverse, ignore the multiuse + // Otherwise need to ensure logic_op and bswap/bitreverse(x) have one use. + if (OldLHS.getOpcode() == Opcode && OldRHS.getOpcode() == Opcode) { + return DAG.getNode(N0.getOpcode(), DL, VT, OldLHS.getOperand(0), + OldRHS.getOperand(0)); + } + + if (OldLHS.getOpcode() == Opcode && OldLHS.hasOneUse()) { + SDValue NewBitReorder = DAG.getNode(Opcode, DL, VT, OldRHS); + return DAG.getNode(N0.getOpcode(), DL, VT, OldLHS.getOperand(0), + NewBitReorder); + } + + if (OldRHS.getOpcode() == Opcode && OldRHS.hasOneUse()) { + SDValue NewBitReorder = DAG.getNode(Opcode, DL, VT, OldLHS); + return DAG.getNode(N0.getOpcode(), DL, VT, NewBitReorder, + OldRHS.getOperand(0)); + } + } + return SDValue(); +} + SDValue DAGCombiner::visitSRA(SDNode *N) { SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); @@ -10766,6 +10802,9 @@ } } + if (SDValue V = foldBitOrderCrossLogicOp(N, DAG)) + return V; + return SDValue(); } diff --git a/llvm/test/CodeGen/ARM/combine-bswap.ll b/llvm/test/CodeGen/ARM/combine-bswap.ll --- a/llvm/test/CodeGen/ARM/combine-bswap.ll +++ b/llvm/test/CodeGen/ARM/combine-bswap.ll @@ -8,9 +8,8 @@ define i32 @bs_and_lhs_bs32(i32 %a, i32 %b) #0 { ; CHECK-LABEL: bs_and_lhs_bs32: ; CHECK: @ %bb.0: -; CHECK-NEXT: rev r0, r0 +; CHECK-NEXT: rev r1, r1 ; CHECK-NEXT: ands r0, r1 -; CHECK-NEXT: rev r0, r0 ; CHECK-NEXT: bx lr %1 = tail call i32 @llvm.bswap.i32(i32 %a) %2 = and i32 %1, %b @@ -21,12 +20,10 @@ define i64 @bs_or_rhs_bs64(i64 %a, i64 %b) #0 { ; CHECK-LABEL: bs_or_rhs_bs64: ; CHECK: @ %bb.0: -; CHECK-NEXT: rev r2, r2 -; CHECK-NEXT: orrs r1, r2 -; CHECK-NEXT: rev r2, r1 -; CHECK-NEXT: rev r1, r3 -; CHECK-NEXT: orrs r0, r1 -; CHECK-NEXT: rev r1, r0 +; CHECK-NEXT: rev r1, r1 +; CHECK-NEXT: rev r0, r0 +; CHECK-NEXT: orrs r2, r1 +; CHECK-NEXT: orr.w r1, r0, r3 ; CHECK-NEXT: mov r0, r2 ; CHECK-NEXT: bx lr %1 = tail call i64 @llvm.bswap.i64(i64 %b) @@ -38,10 +35,9 @@ define i32 @bs_and_all_operand_multiuse(i32 %a, i32 %b) #0 { ; CHECK-LABEL: bs_and_all_operand_multiuse: ; CHECK: @ %bb.0: -; CHECK-NEXT: rev r1, r1 -; CHECK-NEXT: rev r0, r0 ; CHECK-NEXT: and.w r2, r0, r1 -; CHECK-NEXT: rev r2, r2 +; CHECK-NEXT: rev r0, r0 +; CHECK-NEXT: rev r1, r1 ; CHECK-NEXT: muls r0, r2, r0 ; CHECK-NEXT: muls r0, r1, r0 ; CHECK-NEXT: bx lr diff --git a/llvm/test/CodeGen/X86/combine-bswap.ll b/llvm/test/CodeGen/X86/combine-bswap.ll --- a/llvm/test/CodeGen/X86/combine-bswap.ll +++ b/llvm/test/CodeGen/X86/combine-bswap.ll @@ -261,15 +261,13 @@ ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NEXT: bswapl %eax ; X86-NEXT: andl {{[0-9]+}}(%esp), %eax -; X86-NEXT: bswapl %eax ; X86-NEXT: retl ; ; X64-LABEL: bs_and_lhs_bs32: ; X64: # %bb.0: -; X64-NEXT: movl %edi, %eax -; X64-NEXT: bswapl %eax -; X64-NEXT: andl %esi, %eax +; X64-NEXT: movl %esi, %eax ; X64-NEXT: bswapl %eax +; X64-NEXT: andl %edi, %eax ; X64-NEXT: retq %1 = tail call i32 @llvm.bswap.i32(i32 %a) %2 = and i32 %1, %b @@ -280,22 +278,19 @@ define i64 @bs_or_lhs_bs64(i64 %a, i64 %b) #0 { ; X86-LABEL: bs_or_lhs_bs64: ; X86: # %bb.0: -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NEXT: bswapl %eax -; X86-NEXT: bswapl %edx -; X86-NEXT: orl {{[0-9]+}}(%esp), %edx ; X86-NEXT: orl {{[0-9]+}}(%esp), %eax -; X86-NEXT: bswapl %eax ; X86-NEXT: bswapl %edx +; X86-NEXT: orl {{[0-9]+}}(%esp), %edx ; X86-NEXT: retl ; ; X64-LABEL: bs_or_lhs_bs64: ; X64: # %bb.0: -; X64-NEXT: movq %rdi, %rax -; X64-NEXT: bswapq %rax -; X64-NEXT: orq %rsi, %rax +; X64-NEXT: movq %rsi, %rax ; X64-NEXT: bswapq %rax +; X64-NEXT: orq %rdi, %rax ; X64-NEXT: retq %1 = tail call i64 @llvm.bswap.i64(i64 %a) %2 = or i64 %1, %b @@ -306,22 +301,19 @@ define i64 @bs_xor_rhs_bs64(i64 %a, i64 %b) #0 { ; X86-LABEL: bs_xor_rhs_bs64: ; X86: # %bb.0: -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NEXT: bswapl %eax -; X86-NEXT: bswapl %edx -; X86-NEXT: xorl {{[0-9]+}}(%esp), %edx ; X86-NEXT: xorl {{[0-9]+}}(%esp), %eax -; X86-NEXT: bswapl %eax ; X86-NEXT: bswapl %edx +; X86-NEXT: xorl {{[0-9]+}}(%esp), %edx ; X86-NEXT: retl ; ; X64-LABEL: bs_xor_rhs_bs64: ; X64: # %bb.0: -; X64-NEXT: movq %rsi, %rax -; X64-NEXT: bswapq %rax -; X64-NEXT: xorq %rdi, %rax +; X64-NEXT: movq %rdi, %rax ; X64-NEXT: bswapq %rax +; X64-NEXT: xorq %rsi, %rax ; X64-NEXT: retq %1 = tail call i64 @llvm.bswap.i64(i64 %b) %2 = xor i64 %a, %1 @@ -332,25 +324,23 @@ define i32 @bs_and_all_operand_multiuse(i32 %a, i32 %b) #0 { ; X86-LABEL: bs_and_all_operand_multiuse: ; X86: # %bb.0: -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-NEXT: bswapl %ecx -; X86-NEXT: bswapl %eax -; X86-NEXT: movl %ecx, %edx -; X86-NEXT: andl %eax, %edx +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl %eax, %edx ; X86-NEXT: bswapl %edx -; X86-NEXT: imull %ecx, %eax +; X86-NEXT: andl %ecx, %eax +; X86-NEXT: bswapl %ecx ; X86-NEXT: imull %edx, %eax +; X86-NEXT: imull %ecx, %eax ; X86-NEXT: retl ; ; X64-LABEL: bs_and_all_operand_multiuse: ; X64: # %bb.0: -; X64-NEXT: bswapl %edi -; X64-NEXT: bswapl %esi ; X64-NEXT: movl %edi, %eax -; X64-NEXT: andl %esi, %eax ; X64-NEXT: bswapl %eax -; X64-NEXT: imull %edi, %esi +; X64-NEXT: andl %esi, %edi +; X64-NEXT: bswapl %esi +; X64-NEXT: imull %edi, %eax ; X64-NEXT: imull %esi, %eax ; X64-NEXT: retq %1 = tail call i32 @llvm.bswap.i32(i32 %a)