Index: lib/CodeGen/SelectionDAG/DAGCombiner.cpp =================================================================== --- lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -3766,7 +3766,7 @@ EVT VT = N->getValueType(0); if (VT != MVT::i64 && VT != MVT::i32 && VT != MVT::i16) return SDValue(); - if (!TLI.isOperationLegal(ISD::BSWAP, VT)) + if (!TLI.isOperationLegalOrCustom(ISD::BSWAP, VT)) return SDValue(); // Recognize (and (shl a, 8), 0xff), (and (srl a, 8), 0xff00) @@ -3878,27 +3878,38 @@ if (Opc != ISD::AND && Opc != ISD::SHL && Opc != ISD::SRL) return false; - ConstantSDNode *N1C = dyn_cast(N.getOperand(1)); + SDValue N0 = N.getOperand(0); + unsigned Opc0 = N0.getOpcode(); + if (Opc0 != ISD::AND && Opc0 != ISD::SHL && Opc0 != ISD::SRL) + return false; + + ConstantSDNode *N1C = nullptr; + if (Opc == ISD::AND) { + N1C = dyn_cast(N.getOperand(1)); + } + else if (Opc0 == ISD::AND) { + // SHL or SRL: look upstream for AND mask operand + N1C = dyn_cast(N0.getOperand(1)); + } if (!N1C) return false; - unsigned Num; + unsigned mask; switch (N1C->getZExtValue()) { default: return false; - case 0xFF: Num = 0; break; - case 0xFF00: Num = 1; break; - case 0xFF0000: Num = 2; break; - case 0xFF000000: Num = 3; break; + case 0xFF: mask = 0; break; + case 0xFF00: mask = 1; break; + case 0xFF0000: mask = 2; break; + case 0xFF000000: mask = 3; break; } // Look for (x & 0xff) << 8 as well as ((x << 8) & 0xff00). - SDValue N0 = N.getOperand(0); if (Opc == ISD::AND) { - if (Num == 0 || Num == 2) { + if (mask == 0 || mask == 2) { // (x >> 8) & 0xff // (x >> 8) & 0xff0000 - if (N0.getOpcode() != ISD::SRL) + if (Opc0 != ISD::SRL) return false; ConstantSDNode *C = dyn_cast(N0.getOperand(1)); if (!C || C->getZExtValue() != 8) @@ -3906,7 +3917,7 @@ } else { // (x << 8) & 0xff00 // (x << 8) & 0xff000000 - if (N0.getOpcode() != ISD::SHL) + if (Opc0 != ISD::SHL) return false; ConstantSDNode *C = dyn_cast(N0.getOperand(1)); if (!C || C->getZExtValue() != 8) @@ -3915,7 +3926,7 @@ } else if (Opc == ISD::SHL) { // (x & 0xff) << 8 // (x & 0xff0000) << 8 - if (Num != 0 && Num != 2) + if (mask != 0 && mask != 2) return false; ConstantSDNode *C = dyn_cast(N.getOperand(1)); if (!C || C->getZExtValue() != 8) @@ -3923,17 +3934,17 @@ } else { // Opc == ISD::SRL // (x & 0xff00) >> 8 // (x & 0xff000000) >> 8 - if (Num != 1 && Num != 3) + if (mask != 1 && mask != 3) return false; ConstantSDNode *C = dyn_cast(N.getOperand(1)); if (!C || C->getZExtValue() != 8) return false; } - if (Parts[Num]) + if (Parts[mask]) return false; - Parts[Num] = N0.getOperand(0).getNode(); + Parts[mask] = N0.getOperand(0).getNode(); return true; } @@ -3950,7 +3961,7 @@ EVT VT = N->getValueType(0); if (VT != MVT::i32) return SDValue(); - if (!TLI.isOperationLegal(ISD::BSWAP, VT)) + if (!TLI.isOperationLegalOrCustom(ISD::BSWAP, VT)) return SDValue(); // Look for either @@ -3965,18 +3976,15 @@ if (N1.getOpcode() == ISD::OR && N00.getNumOperands() == 2 && N01.getNumOperands() == 2) { // (or (or (and), (and)), (or (and), (and))) - SDValue N000 = N00.getOperand(0); - if (!isBSwapHWordElement(N000, Parts)) + if (!isBSwapHWordElement(N00, Parts)) return SDValue(); - - SDValue N001 = N00.getOperand(1); - if (!isBSwapHWordElement(N001, Parts)) + if (!isBSwapHWordElement(N01, Parts)) return SDValue(); - SDValue N010 = N01.getOperand(0); - if (!isBSwapHWordElement(N010, Parts)) + SDValue N10 = N1.getOperand(0); + if (!isBSwapHWordElement(N10, Parts)) return SDValue(); - SDValue N011 = N01.getOperand(1); - if (!isBSwapHWordElement(N011, Parts)) + SDValue N11 = N1.getOperand(1); + if (!isBSwapHWordElement(N11, Parts)) return SDValue(); } else { // (or (or (or (and), (and)), (and)), (and)) Index: test/CodeGen/X86/bswap_tree.ll =================================================================== --- /dev/null +++ test/CodeGen/X86/bswap_tree.ll @@ -0,0 +1,71 @@ +; Check reconstructing bswap from shifted masks and tree of ORs + +; RUN: llc < %s -march=x86 -mcpu=i686 | FileCheck %s +; RUN: llc < %s -march=x86-64 | FileCheck %s --check-prefix=CHECK64 + +; Match a 32-bit packed halfword bswap. That is +; ((x & 0x000000ff) << 8) | +; ((x & 0x0000ff00) >> 8) | +; ((x & 0x00ff0000) << 8) | +; ((x & 0xff000000) >> 8) +; => (rotl (bswap x), 16) +define i32 @test1(i32 %x) { +; CHECK-LABEL: test1: +; CHECK: bswapl [[REG:%.*]] +; CHECK-NEXT: roll $16, [[REG]] +; CHECK-NEXT: retl + +; CHECK64-LABEL: test1: +; CHECK64: bswapl [[REG:%.*]] +; CHECK64-NEXT: roll $16, [[REG]] +; CHECK64: retq + + %byte0 = and i32 %x, 255 ; 0x000000ff + %byte1 = and i32 %x, 65280 ; 0x0000ff00 + %byte2 = and i32 %x, 16711680 ; 0x00ff0000 + %byte3 = and i32 %x, 4278190080 ; 0xff000000 + + %tmp0 = shl i32 %byte0, 8 + %tmp1 = lshr i32 %byte1, 8 + %tmp2 = shl i32 %byte2, 8 + %tmp3 = lshr i32 %byte3, 8 + + %or0 = or i32 %tmp0, %tmp1 + %or1 = or i32 %tmp2, %tmp3 + + %result = or i32 %or0, %or1 + ret i32 %result +} + +; the same as test1, just shifts before the "and" +; ((x << 8) & 0x0000ff00) | +; ((x >> 8) & 0x000000ff) | +; ((x << 8) & 0xff000000) | +; ((x >> 8) & 0x00ff0000) +define i32 @test2(i32 %x) { +; CHECK-LABEL: test2: +; CHECK: bswapl [[REG:%.*]] +; CHECK-NEXT: roll $16, [[REG]] +; CHECK-NEXT: retl + +; CHECK64-LABEL: test2: +; CHECK64: bswapl [[REG:%.*]] +; CHECK64-NEXT: roll $16, [[REG]] +; CHECK64: retq + + %byte1 = shl i32 %x, 8 + %byte0 = lshr i32 %x, 8 + %byte3 = shl i32 %x, 8 + %byte2 = lshr i32 %x, 8 + + %tmp1 = and i32 %byte1, 65280 ; 0x0000ff00 + %tmp0 = and i32 %byte0, 255 ; 0x000000ff + %tmp3 = and i32 %byte3, 4278190080 ; 0xff000000 + %tmp2 = and i32 %byte2, 16711680 ; 0x00ff0000 + + %or0 = or i32 %tmp0, %tmp1 + %or1 = or i32 %tmp2, %tmp3 + + %result = or i32 %or0, %or1 + ret i32 %result +} Index: test/CodeGen/X86/bswap_tree2.ll =================================================================== --- /dev/null +++ test/CodeGen/X86/bswap_tree2.ll @@ -0,0 +1,80 @@ +; Check a few invalid patterns for halfword bswap pattern matching + +; RUN: llc < %s -march=x86 -mcpu=i686 | FileCheck %s +; RUN: llc < %s -march=x86-64 | FileCheck %s + +; Don't match a near-miss 32-bit packed halfword bswap +; (with only half of the swap tree valid). + define i32 @test1(i32 %x) { +; CHECK-LABEL: test1: +; CHECK: # BB#0: +; CHECK-NOT: bswap +; CHECK: shrl $8 +; CHECK-NEXT: bswapl +; CHECK-NEXT: shrl $16 +; CHECK-NOT: bswap + %byte0 = and i32 %x, 255 ; 0x000000ff + %byte1 = and i32 %x, 65280 ; 0x0000ff00 + %byte2 = and i32 %x, 16711680 ; 0x00ff0000 + %byte3 = or i32 %x, 4278190080 ; 0xff000000 + + %tmp0 = shl i32 %byte0, 8 + %tmp1 = lshr i32 %byte1, 8 + %tmp2 = shl i32 %byte2, 8 + %tmp3 = lshr i32 %byte3, 8 + + %or0 = or i32 %tmp0, %tmp1 + %or1 = or i32 %tmp2, %tmp3 + + %result = or i32 %or0, %or1 + ret i32 %result +} + +; Don't match a near-miss 32-bit packed halfword bswap +; (with swapped lshr/shl) +; ((x >> 8) & 0x0000ff00) | +; ((x << 8) & 0x000000ff) | +; ((x << 8) & 0xff000000) | +; ((x >> 8) & 0x00ff0000) +define i32 @test2(i32 %x) { +; CHECK-LABEL: test2: +; CHECK: # BB#0: +; CHECK-NOT: bswap + %byte1 = lshr i32 %x, 8 + %byte0 = shl i32 %x, 8 + %byte3 = shl i32 %x, 8 + %byte2 = lshr i32 %x, 8 + + %tmp1 = and i32 %byte1, 65280 ; 0x0000ff00 + %tmp0 = and i32 %byte0, 255 ; 0x000000ff + %tmp3 = and i32 %byte3, 4278190080 ; 0xff000000 + %tmp2 = and i32 %byte2, 16711680 ; 0x00ff0000 + + %or0 = or i32 %tmp0, %tmp1 + %or1 = or i32 %tmp2, %tmp3 + + %result = or i32 %or0, %or1 + ret i32 %result +} + +; Invalid pattern involving a unary op +define i32 @test3(float %x) { +; CHECK-LABEL: test3: +; CHECK: # BB#0: +; CHECK-NOT: bswap + %integer = fptosi float %x to i32 + %byte0 = shl i32 %integer, 8 + %byte3 = shl i32 %integer, 8 + %byte2 = lshr i32 %integer, 8 + + %tmp1 = and i32 %integer, 65280 ; 0x0000ff00 + %tmp0 = and i32 %byte0, 255 ; 0x000000ff + %tmp3 = and i32 %byte3, 4278190080 ; 0xff000000 + %tmp2 = and i32 %byte2, 16711680 ; 0x00ff0000 + + %or0 = or i32 %tmp0, %tmp1 + %or1 = or i32 %tmp2, %tmp3 + + %result = or i32 %or0, %or1 + ret i32 %result +}