Index: lib/CodeGen/SelectionDAG/DAGCombiner.cpp =================================================================== --- lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -3766,7 +3766,7 @@ EVT VT = N->getValueType(0); if (VT != MVT::i64 && VT != MVT::i32 && VT != MVT::i16) return SDValue(); - if (!TLI.isOperationLegal(ISD::BSWAP, VT)) + if (!TLI.isOperationLegalOrCustom(ISD::BSWAP, VT)) return SDValue(); // Recognize (and (shl a, 8), 0xff), (and (srl a, 8), 0xff00) @@ -3878,27 +3878,36 @@ if (Opc != ISD::AND && Opc != ISD::SHL && Opc != ISD::SRL) return false; - ConstantSDNode *N1C = dyn_cast(N.getOperand(1)); + SDValue N0 = N.getOperand(0); + unsigned Opc0 = N0.getOpcode(); + if (Opc0 != ISD::AND && Opc0 != ISD::SHL && Opc0 != ISD::SRL) + return false; + + ConstantSDNode *N1C = nullptr; + // SHL or SRL: look upstream for AND mask operand + if (Opc == ISD::AND) + N1C = dyn_cast(N.getOperand(1)); + else if (Opc0 == ISD::AND) + N1C = dyn_cast(N0.getOperand(1)); if (!N1C) return false; - unsigned Num; + unsigned Mask; switch (N1C->getZExtValue()) { default: return false; - case 0xFF: Num = 0; break; - case 0xFF00: Num = 1; break; - case 0xFF0000: Num = 2; break; - case 0xFF000000: Num = 3; break; + case 0xFF: Mask = 0; break; + case 0xFF00: Mask = 1; break; + case 0xFF0000: Mask = 2; break; + case 0xFF000000: Mask = 3; break; } // Look for (x & 0xff) << 8 as well as ((x << 8) & 0xff00). - SDValue N0 = N.getOperand(0); if (Opc == ISD::AND) { - if (Num == 0 || Num == 2) { + if (Mask == 0 || Mask == 2) { // (x >> 8) & 0xff // (x >> 8) & 0xff0000 - if (N0.getOpcode() != ISD::SRL) + if (Opc0 != ISD::SRL) return false; ConstantSDNode *C = dyn_cast(N0.getOperand(1)); if (!C || C->getZExtValue() != 8) @@ -3906,7 +3915,7 @@ } else { // (x << 8) & 0xff00 // (x << 8) & 0xff000000 - if (N0.getOpcode() != ISD::SHL) + if (Opc0 != ISD::SHL) return false; ConstantSDNode *C = dyn_cast(N0.getOperand(1)); if (!C || C->getZExtValue() != 8) @@ -3915,7 +3924,7 @@ } else if (Opc == ISD::SHL) { // (x & 0xff) << 8 // (x & 0xff0000) << 8 - if (Num != 0 && Num != 2) + if (Mask != 0 && Mask != 2) return false; ConstantSDNode *C = dyn_cast(N.getOperand(1)); if (!C || C->getZExtValue() != 8) @@ -3923,17 +3932,17 @@ } else { // Opc == ISD::SRL // (x & 0xff00) >> 8 // (x & 0xff000000) >> 8 - if (Num != 1 && Num != 3) + if (Mask != 1 && Mask != 3) return false; ConstantSDNode *C = dyn_cast(N.getOperand(1)); if (!C || C->getZExtValue() != 8) return false; } - if (Parts[Num]) + if (Parts[Mask]) return false; - Parts[Num] = N0.getOperand(0).getNode(); + Parts[Mask] = N0.getOperand(0).getNode(); return true; } @@ -3950,7 +3959,7 @@ EVT VT = N->getValueType(0); if (VT != MVT::i32) return SDValue(); - if (!TLI.isOperationLegal(ISD::BSWAP, VT)) + if (!TLI.isOperationLegalOrCustom(ISD::BSWAP, VT)) return SDValue(); // Look for either @@ -3965,18 +3974,15 @@ if (N1.getOpcode() == ISD::OR && N00.getNumOperands() == 2 && N01.getNumOperands() == 2) { // (or (or (and), (and)), (or (and), (and))) - SDValue N000 = N00.getOperand(0); - if (!isBSwapHWordElement(N000, Parts)) + if (!isBSwapHWordElement(N00, Parts)) return SDValue(); - - SDValue N001 = N00.getOperand(1); - if (!isBSwapHWordElement(N001, Parts)) + if (!isBSwapHWordElement(N01, Parts)) return SDValue(); - SDValue N010 = N01.getOperand(0); - if (!isBSwapHWordElement(N010, Parts)) + SDValue N10 = N1.getOperand(0); + if (!isBSwapHWordElement(N10, Parts)) return SDValue(); - SDValue N011 = N01.getOperand(1); - if (!isBSwapHWordElement(N011, Parts)) + SDValue N11 = N1.getOperand(1); + if (!isBSwapHWordElement(N11, Parts)) return SDValue(); } else { // (or (or (or (and), (and)), (and)), (and)) Index: test/CodeGen/X86/bswap_tree.ll =================================================================== --- test/CodeGen/X86/bswap_tree.ll +++ test/CodeGen/X86/bswap_tree.ll @@ -13,32 +13,16 @@ define i32 @test1(i32 %x) nounwind { ; CHECK-LABEL: test1: ; CHECK: # BB#0: -; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ecx -; CHECK-NEXT: movl %ecx, %edx -; CHECK-NEXT: andl $16711680, %edx # imm = 0xFF0000 -; CHECK-NEXT: movl %ecx, %eax -; CHECK-NEXT: andl $-16777216, %eax # imm = 0xFF000000 -; CHECK-NEXT: shll $8, %edx -; CHECK-NEXT: shrl $8, %eax -; CHECK-NEXT: bswapl %ecx -; CHECK-NEXT: shrl $16, %ecx -; CHECK-NEXT: orl %edx, %eax -; CHECK-NEXT: orl %ecx, %eax +; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax +; CHECK-NEXT: bswapl %eax +; CHECK-NEXT: roll $16, %eax ; CHECK-NEXT: retl ; ; CHECK64-LABEL: test1: ; CHECK64: # BB#0: -; CHECK64-NEXT: # kill: %EDI %EDI %RDI -; CHECK64-NEXT: movl %edi, %eax -; CHECK64-NEXT: andl $16711680, %eax # imm = 0xFF0000 -; CHECK64-NEXT: movl %edi, %ecx -; CHECK64-NEXT: andl $-16777216, %ecx # imm = 0xFF000000 -; CHECK64-NEXT: shll $8, %eax -; CHECK64-NEXT: shrl $8, %ecx ; CHECK64-NEXT: bswapl %edi -; CHECK64-NEXT: shrl $16, %edi -; CHECK64-NEXT: orl %eax, %ecx -; CHECK64-NEXT: leal (%rcx,%rdi), %eax +; CHECK64-NEXT: roll $16, %edi +; CHECK64-NEXT: movl %edi, %eax ; CHECK64-NEXT: retq %byte0 = and i32 %x, 255 ; 0x000000ff %byte1 = and i32 %x, 65280 ; 0x0000ff00 @@ -62,33 +46,16 @@ define i32 @test2(i32 %x) nounwind { ; CHECK-LABEL: test2: ; CHECK: # BB#0: -; CHECK-NEXT: pushl %esi ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax -; CHECK-NEXT: movl %eax, %ecx -; CHECK-NEXT: shll $8, %ecx -; CHECK-NEXT: shrl $8, %eax -; CHECK-NEXT: movzwl %cx, %edx -; CHECK-NEXT: movzbl %al, %esi -; CHECK-NEXT: andl $-16777216, %ecx # imm = 0xFF000000 -; CHECK-NEXT: andl $16711680, %eax # imm = 0xFF0000 -; CHECK-NEXT: orl %edx, %esi -; CHECK-NEXT: orl %ecx, %eax -; CHECK-NEXT: orl %esi, %eax -; CHECK-NEXT: popl %esi +; CHECK-NEXT: bswapl %eax +; CHECK-NEXT: roll $16, %eax ; CHECK-NEXT: retl ; ; CHECK64-LABEL: test2: ; CHECK64: # BB#0: -; CHECK64-NEXT: movl %edi, %ecx -; CHECK64-NEXT: shll $8, %ecx -; CHECK64-NEXT: shrl $8, %edi -; CHECK64-NEXT: movzwl %cx, %edx -; CHECK64-NEXT: movzbl %dil, %eax -; CHECK64-NEXT: andl $-16777216, %ecx # imm = 0xFF000000 -; CHECK64-NEXT: andl $16711680, %edi # imm = 0xFF0000 -; CHECK64-NEXT: orl %edx, %eax -; CHECK64-NEXT: orl %ecx, %edi -; CHECK64-NEXT: orl %edi, %eax +; CHECK64-NEXT: bswapl %edi +; CHECK64-NEXT: roll $16, %edi +; CHECK64-NEXT: movl %edi, %eax ; CHECK64-NEXT: retq %byte1 = shl i32 %x, 8 %byte0 = lshr i32 %x, 8