Index: llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp =================================================================== --- llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -7188,6 +7188,69 @@ return SDValue(); } +/// If we have a shift-by-constant of a bitwise logic op that itself has a +/// shift-by-constant operand, we may be able to convert that into 2 independent +/// shifts followed by the logic op. This is a throughput improvement. +static SDValue combineShiftOfShiftedLogic(SDNode *Shift, SelectionDAG &DAG) { + // Match a one-use bitwise logic op. + SDValue LogicOp = Shift->getOperand(0); + if (!LogicOp.hasOneUse()) + return SDValue(); + + unsigned LogicOpcode = LogicOp.getOpcode(); + if (LogicOpcode != ISD::AND && LogicOpcode != ISD::OR && + LogicOpcode != ISD::XOR) + return SDValue(); + + // Find a matching one-use shift by constant. + // Logic ops are commutative, so check each operand for a match. + EVT VT = Shift->getValueType(0); + unsigned ShiftOpcode = Shift->getOpcode(); + ConstantSDNode *ShiftAmtC = isConstOrConstSplat(Shift->getOperand(1)); + assert(ShiftAmtC && "Expected a shift with constant operand"); + const APInt &C1 = ShiftAmtC->getAPIntValue(); + auto matchFirstShift = [&](SDValue V, const APInt *&C) { + if (V.getOpcode() != ShiftOpcode || !V.hasOneUse()) + return false; + + // There are no guarantees about the types of shift operands, so check that + // our constants are the same width. + ConstantSDNode *ShiftC = isConstOrConstSplat(V.getOperand(1)); + if (!ShiftC || ShiftC->getAPIntValue().getBitWidth() != C1.getBitWidth()) + return false; + + // The fold is not valid if the sum of the shift values exceeds bitwidth. + APInt ShiftSum = ShiftC->getAPIntValue() + C1; + unsigned BitWidth = VT.getScalarSizeInBits(); + if (ShiftSum.getLimitedValue(BitWidth) >= BitWidth) + return false; + + // Match complete - save the first shift amount. + C = &ShiftC->getAPIntValue(); + return true; + }; + + SDValue X, Y; + const APInt *C0; + if (matchFirstShift(LogicOp.getOperand(0), C0)) { + X = LogicOp.getOperand(0).getOperand(0); + Y = LogicOp.getOperand(1); + } else if (matchFirstShift(LogicOp.getOperand(1), C0)) { + X = LogicOp.getOperand(1).getOperand(0); + Y = LogicOp.getOperand(0); + } else { + return SDValue(); + } + + // shift (logic (shift X, C0), Y), C1 -> logic (shift X, C0+C1), (shift Y, C1) + SDLoc DL(Shift); + EVT ShiftAmtVT = Shift->getOperand(1).getValueType(); + SDValue ShiftSum = DAG.getConstant(*C0 + C1, DL, ShiftAmtVT); + SDValue NewShift1 = DAG.getNode(ShiftOpcode, DL, VT, X, ShiftSum); + SDValue NewShift2 = DAG.getNode(ShiftOpcode, DL, VT, Y, Shift->getOperand(1)); + return DAG.getNode(LogicOpcode, DL, VT, NewShift1, NewShift2); +} + /// Handle transforms common to the three shifts, when the shift amount is a /// constant. /// We are looking for: (shift being one of shl/sra/srl) @@ -7201,7 +7264,19 @@ // The inner binop must be one-use, since we want to replace it. SDNode *LHS = N->getOperand(0).getNode(); - if (!LHS->hasOneUse()) return SDValue(); + if (!LHS->hasOneUse()) + return SDValue(); + + if (!TLI.isDesirableToCommuteWithShift(N, Level)) + return SDValue(); + + // TODO: This is limited to early combining because it may reveal regressions + // otherwise. But since we just checked a target hook to see if this is + // desirable, that should have filtered out cases where this interferes + // with some other pattern matching. + if (!LegalTypes) + if (SDValue R = combineShiftOfShiftedLogic(N, DAG)) + return R; // We want to pull some binops through shifts, so that we have (and (shift)) // instead of (shift (and)), likewise for add, or, xor, etc. This sort of Index: llvm/test/CodeGen/AArch64/bitfield-insert.ll =================================================================== --- llvm/test/CodeGen/AArch64/bitfield-insert.ll +++ llvm/test/CodeGen/AArch64/bitfield-insert.ll @@ -265,12 +265,12 @@ define i32 @test_nouseful_bits(i8 %a, i32 %b) { ; CHECK-LABEL: test_nouseful_bits: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, w0 -; CHECK-NEXT: bfi w8, w8, #8, #24 -; CHECK-NEXT: mov w9, w0 -; CHECK-NEXT: bfi w9, w8, #8, #24 -; CHECK-NEXT: bfi w0, w9, #8, #24 -; CHECK-NEXT: lsl w0, w0, #8 +; CHECK-NEXT: and w8, w0, #0xff +; CHECK-NEXT: lsl w8, w8, #8 +; CHECK-NEXT: mov w9, w8 +; CHECK-NEXT: bfxil w9, w0, #0, #8 +; CHECK-NEXT: bfi w8, w9, #16, #16 +; CHECK-NEXT: mov w0, w8 ; CHECK-NEXT: ret %conv = zext i8 %a to i32 ; 0 0 0 A %shl = shl i32 %b, 8 ; B2 B1 B0 0 Index: llvm/test/CodeGen/AArch64/shift-logic.ll =================================================================== --- llvm/test/CodeGen/AArch64/shift-logic.ll +++ llvm/test/CodeGen/AArch64/shift-logic.ll @@ -4,8 +4,8 @@ define i32 @shl_and(i32 %x, i32 %y) nounwind { ; CHECK-LABEL: shl_and: ; CHECK: // %bb.0: -; CHECK-NEXT: and w8, w1, w0, lsl #5 -; CHECK-NEXT: lsl w0, w8, #7 +; CHECK-NEXT: lsl w8, w0, #12 +; CHECK-NEXT: and w0, w8, w1, lsl #7 ; CHECK-NEXT: ret %sh0 = shl i32 %x, 5 %r = and i32 %sh0, %y @@ -16,8 +16,8 @@ define i32 @shl_or(i32 %x, i32 %y) nounwind { ; CHECK-LABEL: shl_or: ; CHECK: // %bb.0: -; CHECK-NEXT: orr w8, w1, w0, lsl #5 -; CHECK-NEXT: lsl w0, w8, #7 +; CHECK-NEXT: lsl w8, w0, #12 +; CHECK-NEXT: orr w0, w8, w1, lsl #7 ; CHECK-NEXT: ret %sh0 = shl i32 %x, 5 %r = or i32 %y, %sh0 @@ -28,8 +28,8 @@ define i32 @shl_xor(i32 %x, i32 %y) nounwind { ; CHECK-LABEL: shl_xor: ; CHECK: // %bb.0: -; CHECK-NEXT: eor w8, w1, w0, lsl #5 -; CHECK-NEXT: lsl w0, w8, #7 +; CHECK-NEXT: lsl w8, w0, #12 +; CHECK-NEXT: eor w0, w8, w1, lsl #7 ; CHECK-NEXT: ret %sh0 = shl i32 %x, 5 %r = xor i32 %sh0, %y @@ -40,8 +40,8 @@ define i32 @lshr_and(i32 %x, i32 %y) nounwind { ; CHECK-LABEL: lshr_and: ; CHECK: // %bb.0: -; CHECK-NEXT: and w8, w1, w0, lsr #5 -; CHECK-NEXT: lsr w0, w8, #7 +; CHECK-NEXT: lsr w8, w0, #12 +; CHECK-NEXT: and w0, w8, w1, lsr #7 ; CHECK-NEXT: ret %sh0 = lshr i32 %x, 5 %r = and i32 %y, %sh0 @@ -52,8 +52,8 @@ define i32 @lshr_or(i32 %x, i32 %y) nounwind { ; CHECK-LABEL: lshr_or: ; CHECK: // %bb.0: -; CHECK-NEXT: orr w8, w1, w0, lsr #5 -; CHECK-NEXT: lsr w0, w8, #7 +; CHECK-NEXT: lsr w8, w0, #12 +; CHECK-NEXT: orr w0, w8, w1, lsr #7 ; CHECK-NEXT: ret %sh0 = lshr i32 %x, 5 %r = or i32 %sh0, %y @@ -64,8 +64,8 @@ define i32 @lshr_xor(i32 %x, i32 %y) nounwind { ; CHECK-LABEL: lshr_xor: ; CHECK: // %bb.0: -; CHECK-NEXT: eor w8, w1, w0, lsr #5 -; CHECK-NEXT: lsr w0, w8, #7 +; CHECK-NEXT: lsr w8, w0, #12 +; CHECK-NEXT: eor w0, w8, w1, lsr #7 ; CHECK-NEXT: ret %sh0 = lshr i32 %x, 5 %r = xor i32 %y, %sh0 @@ -77,8 +77,8 @@ define i32 @ashr_and(i32 %x, i32 %y) nounwind { ; CHECK-LABEL: ashr_and: ; CHECK: // %bb.0: -; CHECK-NEXT: and w8, w1, w0, asr #5 -; CHECK-NEXT: asr w0, w8, #7 +; CHECK-NEXT: asr w8, w0, #12 +; CHECK-NEXT: and w0, w8, w1, asr #7 ; CHECK-NEXT: ret %sh0 = ashr i32 %x, 5 %r = and i32 %y, %sh0 @@ -89,8 +89,8 @@ define i32 @ashr_or(i32 %x, i32 %y) nounwind { ; CHECK-LABEL: ashr_or: ; CHECK: // %bb.0: -; CHECK-NEXT: orr w8, w1, w0, asr #5 -; CHECK-NEXT: asr w0, w8, #7 +; CHECK-NEXT: asr w8, w0, #12 +; CHECK-NEXT: orr w0, w8, w1, asr #7 ; CHECK-NEXT: ret %sh0 = ashr i32 %x, 5 %r = or i32 %sh0, %y @@ -101,8 +101,8 @@ define i32 @ashr_xor(i32 %x, i32 %y) nounwind { ; CHECK-LABEL: ashr_xor: ; CHECK: // %bb.0: -; CHECK-NEXT: eor w8, w1, w0, asr #5 -; CHECK-NEXT: asr w0, w8, #7 +; CHECK-NEXT: asr w8, w0, #12 +; CHECK-NEXT: eor w0, w8, w1, asr #7 ; CHECK-NEXT: ret %sh0 = ashr i32 %x, 5 %r = xor i32 %y, %sh0 Index: llvm/test/CodeGen/Thumb2/2010-03-15-AsmCCClobber.ll =================================================================== --- llvm/test/CodeGen/Thumb2/2010-03-15-AsmCCClobber.ll +++ llvm/test/CodeGen/Thumb2/2010-03-15-AsmCCClobber.ll @@ -15,10 +15,10 @@ ; Make sure the cmp is not scheduled before the InlineAsm that clobbers cc. ; CHECK: bl _f2 -; CHECK: cmp {{r[0-9]+}}, #0 -; CHECK-NEXT: it eq -; CHECK-NEXT: addeq {{r[0-9]+}}, #1 -; CHECK-NEXT: lsls +; CHECK: clz {{r[0-9]+}} +; CHECK-DAG: lsrs {{r[0-9]+}} +; CHECK-DAG: lsls {{r[0-9]+}} +; CHECK-NEXT: orr.w {{r[0-9]+}} ; CHECK-NEXT: InlineAsm Start define void @test(%s1* %this, i32 %format, i32 %w, i32 %h, i32 %levels, i32* %s, i8* %data, i32* nocapture %rowbytes, void (i8*, i8*)* %release, i8* %info) nounwind { entry: Index: llvm/test/CodeGen/X86/shift-logic.ll =================================================================== --- llvm/test/CodeGen/X86/shift-logic.ll +++ llvm/test/CodeGen/X86/shift-logic.ll @@ -5,9 +5,9 @@ ; CHECK-LABEL: shl_and: ; CHECK: # %bb.0: ; CHECK-NEXT: movl %edi, %eax -; CHECK-NEXT: shll $5, %eax +; CHECK-NEXT: shll $7, %esi +; CHECK-NEXT: shll $12, %eax ; CHECK-NEXT: andl %esi, %eax -; CHECK-NEXT: shll $7, %eax ; CHECK-NEXT: retq %sh0 = shl i32 %x, 5 %r = and i32 %sh0, %y @@ -19,9 +19,9 @@ ; CHECK-LABEL: shl_or: ; CHECK: # %bb.0: ; CHECK-NEXT: movl %edi, %eax -; CHECK-NEXT: shll $5, %eax +; CHECK-NEXT: shll $7, %esi +; CHECK-NEXT: shll $12, %eax ; CHECK-NEXT: orl %esi, %eax -; CHECK-NEXT: shll $7, %eax ; CHECK-NEXT: retq %sh0 = shl i32 %x, 5 %r = or i32 %y, %sh0 @@ -33,9 +33,9 @@ ; CHECK-LABEL: shl_xor: ; CHECK: # %bb.0: ; CHECK-NEXT: movl %edi, %eax -; CHECK-NEXT: shll $5, %eax +; CHECK-NEXT: shll $7, %esi +; CHECK-NEXT: shll $12, %eax ; CHECK-NEXT: xorl %esi, %eax -; CHECK-NEXT: shll $7, %eax ; CHECK-NEXT: retq %sh0 = shl i32 %x, 5 %r = xor i32 %sh0, %y @@ -47,9 +47,9 @@ ; CHECK-LABEL: lshr_and: ; CHECK: # %bb.0: ; CHECK-NEXT: movl %edi, %eax -; CHECK-NEXT: shrl $5, %eax +; CHECK-NEXT: shrl $7, %esi +; CHECK-NEXT: shrl $12, %eax ; CHECK-NEXT: andl %esi, %eax -; CHECK-NEXT: shrl $7, %eax ; CHECK-NEXT: retq %sh0 = lshr i32 %x, 5 %r = and i32 %y, %sh0 @@ -61,9 +61,9 @@ ; CHECK-LABEL: lshr_or: ; CHECK: # %bb.0: ; CHECK-NEXT: movl %edi, %eax -; CHECK-NEXT: shrl $5, %eax +; CHECK-NEXT: shrl $7, %esi +; CHECK-NEXT: shrl $12, %eax ; CHECK-NEXT: orl %esi, %eax -; CHECK-NEXT: shrl $7, %eax ; CHECK-NEXT: retq %sh0 = lshr i32 %x, 5 %r = or i32 %sh0, %y @@ -75,9 +75,9 @@ ; CHECK-LABEL: lshr_xor: ; CHECK: # %bb.0: ; CHECK-NEXT: movl %edi, %eax -; CHECK-NEXT: shrl $5, %eax +; CHECK-NEXT: shrl $7, %esi +; CHECK-NEXT: shrl $12, %eax ; CHECK-NEXT: xorl %esi, %eax -; CHECK-NEXT: shrl $7, %eax ; CHECK-NEXT: retq %sh0 = lshr i32 %x, 5 %r = xor i32 %y, %sh0 @@ -90,9 +90,9 @@ ; CHECK-LABEL: ashr_and: ; CHECK: # %bb.0: ; CHECK-NEXT: movl %edi, %eax -; CHECK-NEXT: sarl $5, %eax +; CHECK-NEXT: sarl $7, %esi +; CHECK-NEXT: sarl $12, %eax ; CHECK-NEXT: andl %esi, %eax -; CHECK-NEXT: sarl $7, %eax ; CHECK-NEXT: retq %sh0 = ashr i32 %x, 5 %r = and i32 %y, %sh0 @@ -104,9 +104,9 @@ ; CHECK-LABEL: ashr_or: ; CHECK: # %bb.0: ; CHECK-NEXT: movl %edi, %eax -; CHECK-NEXT: sarl $5, %eax +; CHECK-NEXT: sarl $7, %esi +; CHECK-NEXT: sarl $12, %eax ; CHECK-NEXT: orl %esi, %eax -; CHECK-NEXT: sarl $7, %eax ; CHECK-NEXT: retq %sh0 = ashr i32 %x, 5 %r = or i32 %sh0, %y @@ -118,9 +118,9 @@ ; CHECK-LABEL: ashr_xor: ; CHECK: # %bb.0: ; CHECK-NEXT: movl %edi, %eax -; CHECK-NEXT: sarl $5, %eax +; CHECK-NEXT: sarl $7, %esi +; CHECK-NEXT: sarl $12, %eax ; CHECK-NEXT: xorl %esi, %eax -; CHECK-NEXT: sarl $7, %eax ; CHECK-NEXT: retq %sh0 = ashr i32 %x, 5 %r = xor i32 %y, %sh0 @@ -171,4 +171,3 @@ %sh1 = lshr i32 %r, 7 ret i32 %sh1 } -