Index: lib/CodeGen/SelectionDAG/DAGCombiner.cpp =================================================================== --- lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -3766,7 +3766,7 @@ EVT VT = N->getValueType(0); if (VT != MVT::i64 && VT != MVT::i32 && VT != MVT::i16) return SDValue(); - if (!TLI.isOperationLegal(ISD::BSWAP, VT)) + if (!TLI.isOperationLegalOrCustom(ISD::BSWAP, VT)) return SDValue(); // Recognize (and (shl a, 8), 0xff), (and (srl a, 8), 0xff00) @@ -3878,27 +3878,36 @@ if (Opc != ISD::AND && Opc != ISD::SHL && Opc != ISD::SRL) return false; - ConstantSDNode *N1C = dyn_cast(N.getOperand(1)); + SDValue N0 = N.getOperand(0); + unsigned Opc0 = N0.getOpcode(); + if (Opc0 != ISD::AND && Opc0 != ISD::SHL && Opc0 != ISD::SRL) + return false; + + ConstantSDNode *N1C = nullptr; + // SHL or SRL: look upstream for AND mask operand + if (Opc == ISD::AND) + N1C = dyn_cast(N.getOperand(1)); + else if (Opc0 == ISD::AND) + N1C = dyn_cast(N0.getOperand(1)); if (!N1C) return false; - unsigned Num; + unsigned mask; switch (N1C->getZExtValue()) { default: return false; - case 0xFF: Num = 0; break; - case 0xFF00: Num = 1; break; - case 0xFF0000: Num = 2; break; - case 0xFF000000: Num = 3; break; + case 0xFF: mask = 0; break; + case 0xFF00: mask = 1; break; + case 0xFF0000: mask = 2; break; + case 0xFF000000: mask = 3; break; } // Look for (x & 0xff) << 8 as well as ((x << 8) & 0xff00). - SDValue N0 = N.getOperand(0); if (Opc == ISD::AND) { - if (Num == 0 || Num == 2) { + if (mask == 0 || mask == 2) { // (x >> 8) & 0xff // (x >> 8) & 0xff0000 - if (N0.getOpcode() != ISD::SRL) + if (Opc0 != ISD::SRL) return false; ConstantSDNode *C = dyn_cast(N0.getOperand(1)); if (!C || C->getZExtValue() != 8) @@ -3906,7 +3915,7 @@ } else { // (x << 8) & 0xff00 // (x << 8) & 0xff000000 - if (N0.getOpcode() != ISD::SHL) + if (Opc0 != ISD::SHL) return false; ConstantSDNode *C = dyn_cast(N0.getOperand(1)); if (!C || C->getZExtValue() != 8) @@ -3915,7 +3924,7 @@ } else if (Opc == ISD::SHL) { // (x & 0xff) << 8 // (x & 0xff0000) << 8 - if (Num != 0 && Num != 2) + if (mask != 0 && mask != 2) return false; ConstantSDNode *C = dyn_cast(N.getOperand(1)); if (!C || C->getZExtValue() != 8) @@ -3923,17 +3932,17 @@ } else { // Opc == ISD::SRL // (x & 0xff00) >> 8 // (x & 0xff000000) >> 8 - if (Num != 1 && Num != 3) + if (mask != 1 && mask != 3) return false; ConstantSDNode *C = dyn_cast(N.getOperand(1)); if (!C || C->getZExtValue() != 8) return false; } - if (Parts[Num]) + if (Parts[mask]) return false; - Parts[Num] = N0.getOperand(0).getNode(); + Parts[mask] = N0.getOperand(0).getNode(); return true; } @@ -3950,7 +3959,7 @@ EVT VT = N->getValueType(0); if (VT != MVT::i32) return SDValue(); - if (!TLI.isOperationLegal(ISD::BSWAP, VT)) + if (!TLI.isOperationLegalOrCustom(ISD::BSWAP, VT)) return SDValue(); // Look for either @@ -3965,18 +3974,15 @@ if (N1.getOpcode() == ISD::OR && N00.getNumOperands() == 2 && N01.getNumOperands() == 2) { // (or (or (and), (and)), (or (and), (and))) - SDValue N000 = N00.getOperand(0); - if (!isBSwapHWordElement(N000, Parts)) + if (!isBSwapHWordElement(N00, Parts)) return SDValue(); - - SDValue N001 = N00.getOperand(1); - if (!isBSwapHWordElement(N001, Parts)) + if (!isBSwapHWordElement(N01, Parts)) return SDValue(); - SDValue N010 = N01.getOperand(0); - if (!isBSwapHWordElement(N010, Parts)) + SDValue N10 = N1.getOperand(0); + if (!isBSwapHWordElement(N10, Parts)) return SDValue(); - SDValue N011 = N01.getOperand(1); - if (!isBSwapHWordElement(N011, Parts)) + SDValue N11 = N1.getOperand(1); + if (!isBSwapHWordElement(N11, Parts)) return SDValue(); } else { // (or (or (or (and), (and)), (and)), (and)) Index: test/CodeGen/X86/bswap_tree.ll =================================================================== --- test/CodeGen/X86/bswap_tree.ll +++ test/CodeGen/X86/bswap_tree.ll @@ -1,55 +1,38 @@ -; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; Check reconstructing bswap from shifted masks and tree of ORs + ; RUN: llc < %s -mtriple=i686-unknown-unknown | FileCheck %s ; RUN: llc < %s -mtriple=x86_64-unknown-unknown | FileCheck %s --check-prefix=CHECK64 -; Check reconstructing bswap from shifted masks and tree of ORs - ; Match a 32-bit packed halfword bswap. That is ; ((x & 0x000000ff) << 8) | ; ((x & 0x0000ff00) >> 8) | ; ((x & 0x00ff0000) << 8) | ; ((x & 0xff000000) >> 8) ; => (rotl (bswap x), 16) -define i32 @test1(i32 %x) nounwind { +define i32 @test1(i32 %x) { ; CHECK-LABEL: test1: -; CHECK: # BB#0: -; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ecx -; CHECK-NEXT: movl %ecx, %edx -; CHECK-NEXT: andl $16711680, %edx # imm = 0xFF0000 -; CHECK-NEXT: movl %ecx, %eax -; CHECK-NEXT: andl $-16777216, %eax # imm = 0xFF000000 -; CHECK-NEXT: shll $8, %edx -; CHECK-NEXT: shrl $8, %eax -; CHECK-NEXT: bswapl %ecx -; CHECK-NEXT: shrl $16, %ecx -; CHECK-NEXT: orl %edx, %eax -; CHECK-NEXT: orl %ecx, %eax -; CHECK-NEXT: retl -; +; CHECK: bswapl [[REG:%.*]] +; CHECK-NEXT: roll $16, [[REG]] +; CHECK-NEXT: retl + ; CHECK64-LABEL: test1: -; CHECK64: # BB#0: -; CHECK64-NEXT: # kill: %EDI %EDI %RDI -; CHECK64-NEXT: movl %edi, %eax -; CHECK64-NEXT: andl $16711680, %eax # imm = 0xFF0000 -; CHECK64-NEXT: movl %edi, %ecx -; CHECK64-NEXT: andl $-16777216, %ecx # imm = 0xFF000000 -; CHECK64-NEXT: shll $8, %eax -; CHECK64-NEXT: shrl $8, %ecx -; CHECK64-NEXT: bswapl %edi -; CHECK64-NEXT: shrl $16, %edi -; CHECK64-NEXT: orl %eax, %ecx -; CHECK64-NEXT: leal (%rcx,%rdi), %eax -; CHECK64-NEXT: retq +; CHECK64: bswapl [[REG:%.*]] +; CHECK64-NEXT: roll $16, [[REG]] +; CHECK64: retq + %byte0 = and i32 %x, 255 ; 0x000000ff %byte1 = and i32 %x, 65280 ; 0x0000ff00 %byte2 = and i32 %x, 16711680 ; 0x00ff0000 %byte3 = and i32 %x, 4278190080 ; 0xff000000 + %tmp0 = shl i32 %byte0, 8 %tmp1 = lshr i32 %byte1, 8 %tmp2 = shl i32 %byte2, 8 %tmp3 = lshr i32 %byte3, 8 + %or0 = or i32 %tmp0, %tmp1 %or1 = or i32 %tmp2, %tmp3 + %result = or i32 %or0, %or1 ret i32 %result } @@ -59,47 +42,30 @@ ; ((x >> 8) & 0x000000ff) | ; ((x << 8) & 0xff000000) | ; ((x >> 8) & 0x00ff0000) -define i32 @test2(i32 %x) nounwind { +define i32 @test2(i32 %x) { ; CHECK-LABEL: test2: -; CHECK: # BB#0: -; CHECK-NEXT: pushl %esi -; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax -; CHECK-NEXT: movl %eax, %ecx -; CHECK-NEXT: shll $8, %ecx -; CHECK-NEXT: shrl $8, %eax -; CHECK-NEXT: movzwl %cx, %edx -; CHECK-NEXT: movzbl %al, %esi -; CHECK-NEXT: andl $-16777216, %ecx # imm = 0xFF000000 -; CHECK-NEXT: andl $16711680, %eax # imm = 0xFF0000 -; CHECK-NEXT: orl %edx, %esi -; CHECK-NEXT: orl %ecx, %eax -; CHECK-NEXT: orl %esi, %eax -; CHECK-NEXT: popl %esi -; CHECK-NEXT: retl -; +; CHECK: bswapl [[REG:%.*]] +; CHECK-NEXT: roll $16, [[REG]] +; CHECK-NEXT: retl + ; CHECK64-LABEL: test2: -; CHECK64: # BB#0: -; CHECK64-NEXT: movl %edi, %ecx -; CHECK64-NEXT: shll $8, %ecx -; CHECK64-NEXT: shrl $8, %edi -; CHECK64-NEXT: movzwl %cx, %edx -; CHECK64-NEXT: movzbl %dil, %eax -; CHECK64-NEXT: andl $-16777216, %ecx # imm = 0xFF000000 -; CHECK64-NEXT: andl $16711680, %edi # imm = 0xFF0000 -; CHECK64-NEXT: orl %edx, %eax -; CHECK64-NEXT: orl %ecx, %edi -; CHECK64-NEXT: orl %edi, %eax -; CHECK64-NEXT: retq +; CHECK64: bswapl [[REG:%.*]] +; CHECK64-NEXT: roll $16, [[REG]] +; CHECK64: retq + %byte1 = shl i32 %x, 8 %byte0 = lshr i32 %x, 8 %byte3 = shl i32 %x, 8 %byte2 = lshr i32 %x, 8 + %tmp1 = and i32 %byte1, 65280 ; 0x0000ff00 %tmp0 = and i32 %byte0, 255 ; 0x000000ff %tmp3 = and i32 %byte3, 4278190080 ; 0xff000000 %tmp2 = and i32 %byte2, 16711680 ; 0x00ff0000 + %or0 = or i32 %tmp0, %tmp1 %or1 = or i32 %tmp2, %tmp3 + %result = or i32 %or0, %or1 ret i32 %result } Index: test/CodeGen/X86/bswap_tree2.ll =================================================================== --- test/CodeGen/X86/bswap_tree2.ll +++ test/CodeGen/X86/bswap_tree2.ll @@ -1,50 +1,44 @@ -; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; Check a few invalid patterns for halfword bswap pattern matching + ; RUN: llc < %s -mtriple=i686-unknown-unknown | FileCheck %s ; RUN: llc < %s -mtriple=x86_64-unknown-unknown | FileCheck %s --check-prefix=CHECK64 -; Check a few invalid patterns for halfword bswap pattern matching - ; Don't match a near-miss 32-bit packed halfword bswap ; (with only half of the swap tree valid). - define i32 @test1(i32 %x) nounwind { + define i32 @test1(i32 %x) { ; CHECK-LABEL: test1: -; CHECK: # BB#0: -; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ecx -; CHECK-NEXT: movl %ecx, %edx -; CHECK-NEXT: andl $16711680, %edx # imm = 0xFF0000 -; CHECK-NEXT: movl %ecx, %eax -; CHECK-NEXT: orl $-16777216, %eax # imm = 0xFF000000 -; CHECK-NEXT: shll $8, %edx -; CHECK-NEXT: shrl $8, %eax -; CHECK-NEXT: bswapl %ecx -; CHECK-NEXT: shrl $16, %ecx -; CHECK-NEXT: orl %edx, %eax -; CHECK-NEXT: orl %ecx, %eax +; CHECK: orl $-16777216 +; CHECK-NEXT: shll $8 +; CHECK-NEXT: shrl $8 +; CHECK-NEXT: bswapl +; CHECK-NEXT: shrl $16 +; CHECK-NEXT: orl +; CHECK-NEXT: orl ; CHECK-NEXT: retl -; + ; CHECK64-LABEL: test1: -; CHECK64: # BB#0: -; CHECK64-NEXT: movl %edi, %ecx -; CHECK64-NEXT: andl $16711680, %ecx # imm = 0xFF0000 -; CHECK64-NEXT: movl %edi, %eax -; CHECK64-NEXT: orl $-16777216, %eax # imm = 0xFF000000 -; CHECK64-NEXT: shll $8, %ecx -; CHECK64-NEXT: shrl $8, %eax -; CHECK64-NEXT: bswapl %edi -; CHECK64-NEXT: shrl $16, %edi -; CHECK64-NEXT: orl %ecx, %eax -; CHECK64-NEXT: orl %edi, %eax +; CHECK64: orl $-16777216 +; CHECK64-NEXT: shll $8 +; CHECK64-NEXT: shrl $8 +; CHECK64-NEXT: bswapl +; CHECK64-NEXT: shrl $16 +; CHECK64-NEXT: orl +; CHECK64-NEXT: orl ; CHECK64-NEXT: retq + %byte0 = and i32 %x, 255 ; 0x000000ff %byte1 = and i32 %x, 65280 ; 0x0000ff00 %byte2 = and i32 %x, 16711680 ; 0x00ff0000 %byte3 = or i32 %x, 4278190080 ; 0xff000000 + %tmp0 = shl i32 %byte0, 8 %tmp1 = lshr i32 %byte1, 8 %tmp2 = shl i32 %byte2, 8 %tmp3 = lshr i32 %byte3, 8 + %or0 = or i32 %tmp0, %tmp1 %or1 = or i32 %tmp2, %tmp3 + %result = or i32 %or0, %or1 ret i32 %result } @@ -55,96 +49,49 @@ ; ((x << 8) & 0x000000ff) | ; ((x << 8) & 0xff000000) | ; ((x >> 8) & 0x00ff0000) -define i32 @test2(i32 %x) nounwind { +define i32 @test2(i32 %x) { ; CHECK-LABEL: test2: -; CHECK: # BB#0: -; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ecx -; CHECK-NEXT: movl %ecx, %eax -; CHECK-NEXT: shrl $8, %eax -; CHECK-NEXT: shll $8, %ecx -; CHECK-NEXT: movl %eax, %edx -; CHECK-NEXT: andl $65280, %edx # imm = 0xFF00 -; CHECK-NEXT: andl $-16777216, %ecx # imm = 0xFF000000 -; CHECK-NEXT: andl $16711680, %eax # imm = 0xFF0000 -; CHECK-NEXT: orl %ecx, %eax -; CHECK-NEXT: orl %edx, %eax -; CHECK-NEXT: retl -; +; CHECK-NOT: bswap + ; CHECK64-LABEL: test2: -; CHECK64: # BB#0: -; CHECK64-NEXT: movl %edi, %eax -; CHECK64-NEXT: shrl $8, %eax -; CHECK64-NEXT: shll $8, %edi -; CHECK64-NEXT: movl %eax, %ecx -; CHECK64-NEXT: andl $65280, %ecx # imm = 0xFF00 -; CHECK64-NEXT: andl $-16777216, %edi # imm = 0xFF000000 -; CHECK64-NEXT: andl $16711680, %eax # imm = 0xFF0000 -; CHECK64-NEXT: orl %edi, %eax -; CHECK64-NEXT: leal (%rax,%rcx), %eax -; CHECK64-NEXT: retq +; CHECK64-NOT: bswap %byte1 = lshr i32 %x, 8 %byte0 = shl i32 %x, 8 %byte3 = shl i32 %x, 8 %byte2 = lshr i32 %x, 8 + %tmp1 = and i32 %byte1, 65280 ; 0x0000ff00 %tmp0 = and i32 %byte0, 255 ; 0x000000ff %tmp3 = and i32 %byte3, 4278190080 ; 0xff000000 %tmp2 = and i32 %byte2, 16711680 ; 0x00ff0000 + %or0 = or i32 %tmp0, %tmp1 %or1 = or i32 %tmp2, %tmp3 + %result = or i32 %or0, %or1 ret i32 %result } ; Invalid pattern involving a unary op -define i32 @test3(float %x) nounwind { +define i32 @test3(float %x) { ; CHECK-LABEL: test3: -; CHECK: # BB#0: -; CHECK-NEXT: subl $8, %esp -; CHECK-NEXT: flds {{[0-9]+}}(%esp) -; CHECK-NEXT: fnstcw {{[0-9]+}}(%esp) -; CHECK-NEXT: movzwl {{[0-9]+}}(%esp), %eax -; CHECK-NEXT: movw $3199, {{[0-9]+}}(%esp) # imm = 0xC7F -; CHECK-NEXT: fldcw {{[0-9]+}}(%esp) -; CHECK-NEXT: movw %ax, {{[0-9]+}}(%esp) -; CHECK-NEXT: fistpl {{[0-9]+}}(%esp) -; CHECK-NEXT: fldcw {{[0-9]+}}(%esp) -; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ecx -; CHECK-NEXT: movl %ecx, %edx -; CHECK-NEXT: shll $8, %edx -; CHECK-NEXT: movl %ecx, %eax -; CHECK-NEXT: shrl $8, %eax -; CHECK-NEXT: andl $65280, %ecx # imm = 0xFF00 -; CHECK-NEXT: andl $-16777216, %edx # imm = 0xFF000000 -; CHECK-NEXT: andl $16711680, %eax # imm = 0xFF0000 -; CHECK-NEXT: orl %edx, %eax -; CHECK-NEXT: orl %ecx, %eax -; CHECK-NEXT: addl $8, %esp -; CHECK-NEXT: retl -; +; CHECK-NOT: bswap + ; CHECK64-LABEL: test3: -; CHECK64: # BB#0: -; CHECK64-NEXT: cvttss2si %xmm0, %ecx -; CHECK64-NEXT: movl %ecx, %edx -; CHECK64-NEXT: shll $8, %edx -; CHECK64-NEXT: movl %ecx, %eax -; CHECK64-NEXT: shrl $8, %eax -; CHECK64-NEXT: andl $65280, %ecx # imm = 0xFF00 -; CHECK64-NEXT: andl $-16777216, %edx # imm = 0xFF000000 -; CHECK64-NEXT: andl $16711680, %eax # imm = 0xFF0000 -; CHECK64-NEXT: orl %edx, %eax -; CHECK64-NEXT: orl %ecx, %eax -; CHECK64-NEXT: retq +; CHECK64-NOT: bswap %integer = fptosi float %x to i32 %byte0 = shl i32 %integer, 8 %byte3 = shl i32 %integer, 8 %byte2 = lshr i32 %integer, 8 + %tmp1 = and i32 %integer, 65280 ; 0x0000ff00 %tmp0 = and i32 %byte0, 255 ; 0x000000ff %tmp3 = and i32 %byte3, 4278190080 ; 0xff000000 %tmp2 = and i32 %byte2, 16711680 ; 0x00ff0000 + %or0 = or i32 %tmp0, %tmp1 %or1 = or i32 %tmp2, %tmp3 + %result = or i32 %or0, %or1 ret i32 %result }