diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -6152,6 +6152,43 @@ return DAG.getNode(LogicOpcode, DL, VT, NewShift, Z); } +/// Given a tree of logic operations with shape like +/// (LOGIC (LOGIC (X, Y), LOGIC (Z, Y))) +/// try to match and fold shift operations with the same shift amount. +/// For example: +/// LOGIC (LOGIC (SH X0, Y), Z), (LOGIC (SH X1, Y), W) --> +/// --> LOGIC (SH (LOGIC X0, X1), Y), (LOGIC Z, W) +static SDValue foldLogicTreeOfShifts(SDNode *N, SDValue LeftHand, + SDValue RightHand, SelectionDAG &DAG) { + unsigned LogicOpcode = N->getOpcode(); + assert((LogicOpcode == ISD::AND || LogicOpcode == ISD::OR || + LogicOpcode == ISD::XOR)); + if (LeftHand.getOpcode() != LogicOpcode || + RightHand.getOpcode() != LogicOpcode) + return SDValue(); + if (!LeftHand.hasOneUse() || !RightHand.hasOneUse()) + return SDValue(); + + // Try to match one of following patterns: + // LOGIC (LOGIC (SH X0, Y), Z), (LOGIC (SH X1, Y), W) + // LOGIC (LOGIC (SH X0, Y), Z), (LOGIC W, (SH X1, Y)) + // Note that foldLogicOfShifts will handle commuted versions of the left hand + // itself. + SDValue CombinedShifts, W; + SDValue R0 = RightHand.getOperand(0); + SDValue R1 = RightHand.getOperand(1); + if ((CombinedShifts = foldLogicOfShifts(N, LeftHand, R0, DAG))) + W = R1; + else if ((CombinedShifts = foldLogicOfShifts(N, LeftHand, R1, DAG))) + W = R0; + else + return SDValue(); + + EVT VT = N->getValueType(0); + SDLoc DL(N); + return DAG.getNode(LogicOpcode, DL, VT, CombinedShifts, W); +} + SDValue DAGCombiner::visitAND(SDNode *N) { SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); @@ -6524,6 +6561,12 @@ if (SDValue V = foldAndToUsubsat(N, DAG)) return V; + // Postpone until legalization completed to avoid interference with bswap + // folding + if (LegalOperations || VT.isVector()) + if (SDValue R = foldLogicTreeOfShifts(N, N0, N1, DAG)) + return R; + return SDValue(); } @@ -7124,6 +7167,12 @@ if (SDValue Combined = visitADDLike(N)) return Combined; + // Postpone until legalization completed to avoid interference with bswap + // folding + if (LegalOperations || VT.isVector()) + if (SDValue R = foldLogicTreeOfShifts(N, N0, N1, DAG)) + return R; + return SDValue(); } @@ -8608,6 +8657,8 @@ return R; if (SDValue R = foldLogicOfShifts(N, N1, N0, DAG)) return R; + if (SDValue R = foldLogicTreeOfShifts(N, N0, N1, DAG)) + return R; // Unfold ((x ^ y) & m) ^ y into (x & m) | (y & ~m) if profitable if (SDValue MM = unfoldMaskedMerge(N)) diff --git a/llvm/test/CodeGen/ARM/icmp-shift-opt.ll b/llvm/test/CodeGen/ARM/icmp-shift-opt.ll --- a/llvm/test/CodeGen/ARM/icmp-shift-opt.ll +++ b/llvm/test/CodeGen/ARM/icmp-shift-opt.ll @@ -139,15 +139,13 @@ define i1 @opt_setcc_shl_ne_zero_i128(i128 %a) nounwind { ; CHECK-LABEL: opt_setcc_shl_ne_zero_i128: ; CHECK: @ %bb.0: -; CHECK-NEXT: lsl r3, r3, #17 -; CHECK-NEXT: orr r12, r3, r2, lsr #15 -; CHECK-NEXT: lsl r3, r1, #17 -; CHECK-NEXT: orr r3, r3, r0, lsr #15 +; CHECK-NEXT: orr r3, r1, r3 ; CHECK-NEXT: orr r0, r2, r0 -; CHECK-NEXT: orr r3, r3, r12 -; CHECK-NEXT: lsl r0, r0, #17 -; CHECK-NEXT: orr r0, r0, r1, lsr #15 -; CHECK-NEXT: orrs r0, r0, r3 +; CHECK-NEXT: orr r2, r0, r3 +; CHECK-NEXT: orr r0, r0, r1 +; CHECK-NEXT: lsr r0, r0, #15 +; CHECK-NEXT: orr r0, r0, r2, lsl #17 +; CHECK-NEXT: cmp r0, #0 ; CHECK-NEXT: movwne r0, #1 ; CHECK-NEXT: bx lr %shl = shl i128 %a, 17 diff --git a/llvm/test/CodeGen/ARM/shift-combine.ll b/llvm/test/CodeGen/ARM/shift-combine.ll --- a/llvm/test/CodeGen/ARM/shift-combine.ll +++ b/llvm/test/CodeGen/ARM/shift-combine.ll @@ -893,3 +893,388 @@ %cmp.i = icmp ugt i32 %bf.cast.i, %AttrArgNo ret i1 %cmp.i } + +define i64 @or_tree_with_shifts_i64(i64 %a, i64 %b, i64 %c, i64 %d) { +; CHECK-ARM-LABEL: or_tree_with_shifts_i64: +; CHECK-ARM: @ %bb.0: +; CHECK-ARM-NEXT: .save {r11, lr} +; CHECK-ARM-NEXT: push {r11, lr} +; CHECK-ARM-NEXT: ldr lr, [sp, #16] +; CHECK-ARM-NEXT: orr r0, r0, r2, lsl #16 +; CHECK-ARM-NEXT: ldr r12, [sp, #8] +; CHECK-ARM-NEXT: orr r3, lr, r3 +; CHECK-ARM-NEXT: orr r1, r1, r3, lsl #16 +; CHECK-ARM-NEXT: orr r1, r1, r2, lsr #16 +; CHECK-ARM-NEXT: orr r1, r1, r12 +; CHECK-ARM-NEXT: pop {r11, pc} +; +; CHECK-BE-LABEL: or_tree_with_shifts_i64: +; CHECK-BE: @ %bb.0: +; CHECK-BE-NEXT: .save {r11, lr} +; CHECK-BE-NEXT: push {r11, lr} +; CHECK-BE-NEXT: ldr lr, [sp, #20] +; CHECK-BE-NEXT: orr r1, r1, r3, lsl #16 +; CHECK-BE-NEXT: ldr r12, [sp, #12] +; CHECK-BE-NEXT: orr r2, lr, r2 +; CHECK-BE-NEXT: orr r0, r0, r2, lsl #16 +; CHECK-BE-NEXT: orr r0, r0, r3, lsr #16 +; CHECK-BE-NEXT: orr r0, r0, r12 +; CHECK-BE-NEXT: pop {r11, pc} +; +; CHECK-ALIGN-LABEL: or_tree_with_shifts_i64: +; CHECK-ALIGN: @ %bb.0: +; CHECK-ALIGN-NEXT: ldr.w r12, [sp, #8] +; CHECK-ALIGN-NEXT: orr.w r0, r0, r2, lsl #16 +; CHECK-ALIGN-NEXT: orr.w r3, r3, r12 +; CHECK-ALIGN-NEXT: orr.w r1, r1, r3, lsl #16 +; CHECK-ALIGN-NEXT: orr.w r1, r1, r2, lsr #16 +; CHECK-ALIGN-NEXT: ldr r2, [sp] +; CHECK-ALIGN-NEXT: orrs r1, r2 +; CHECK-ALIGN-NEXT: bx lr +; +; CHECK-V6M-LABEL: or_tree_with_shifts_i64: +; CHECK-V6M: @ %bb.0: +; CHECK-V6M-NEXT: push {r4, lr} +; CHECK-V6M-NEXT: lsls r4, r2, #16 +; CHECK-V6M-NEXT: orrs r0, r4 +; CHECK-V6M-NEXT: ldr r4, [sp, #16] +; CHECK-V6M-NEXT: orrs r4, r3 +; CHECK-V6M-NEXT: lsls r3, r4, #16 +; CHECK-V6M-NEXT: orrs r1, r3 +; CHECK-V6M-NEXT: lsrs r2, r2, #16 +; CHECK-V6M-NEXT: orrs r1, r2 +; CHECK-V6M-NEXT: ldr r2, [sp, #8] +; CHECK-V6M-NEXT: orrs r1, r2 +; CHECK-V6M-NEXT: pop {r4, pc} + %b.shifted = shl i64 %b, 16 + %c.shifted = shl i64 %c, 32 + %d.shifted = shl i64 %d, 48 + %or.ad = or i64 %a, %d.shifted + %or.adb = or i64 %or.ad, %b.shifted + %or.adbc = or i64 %or.adb, %c.shifted + ret i64 %or.adbc +} + +define i32 @or_tree_with_shifts_i32(i32 %a, i32 %b, i32 %c, i32 %d) { +; CHECK-ARM-LABEL: or_tree_with_shifts_i32: +; CHECK-ARM: @ %bb.0: +; CHECK-ARM-NEXT: orr r0, r0, r2 +; CHECK-ARM-NEXT: orr r0, r1, r0, lsl #16 +; CHECK-ARM-NEXT: orr r0, r0, r3 +; CHECK-ARM-NEXT: bx lr +; +; CHECK-BE-LABEL: or_tree_with_shifts_i32: +; CHECK-BE: @ %bb.0: +; CHECK-BE-NEXT: orr r0, r0, r2 +; CHECK-BE-NEXT: orr r0, r1, r0, lsl #16 +; CHECK-BE-NEXT: orr r0, r0, r3 +; CHECK-BE-NEXT: bx lr +; +; CHECK-THUMB-LABEL: or_tree_with_shifts_i32: +; CHECK-THUMB: @ %bb.0: +; CHECK-THUMB-NEXT: orrs r0, r2 +; CHECK-THUMB-NEXT: orr.w r0, r1, r0, lsl #16 +; CHECK-THUMB-NEXT: orrs r0, r3 +; CHECK-THUMB-NEXT: bx lr +; +; CHECK-ALIGN-LABEL: or_tree_with_shifts_i32: +; CHECK-ALIGN: @ %bb.0: +; CHECK-ALIGN-NEXT: orrs r0, r2 +; CHECK-ALIGN-NEXT: orr.w r0, r1, r0, lsl #16 +; CHECK-ALIGN-NEXT: orrs r0, r3 +; CHECK-ALIGN-NEXT: bx lr +; +; CHECK-V6M-LABEL: or_tree_with_shifts_i32: +; CHECK-V6M: @ %bb.0: +; CHECK-V6M-NEXT: orrs r0, r2 +; CHECK-V6M-NEXT: lsls r0, r0, #16 +; CHECK-V6M-NEXT: orrs r0, r1 +; CHECK-V6M-NEXT: orrs r0, r3 +; CHECK-V6M-NEXT: bx lr + %a.shifted = shl i32 %a, 16 + %c.shifted = shl i32 %c, 16 + %or.ab = or i32 %a.shifted, %b + %or.cd = or i32 %c.shifted, %d + %r = or i32 %or.ab, %or.cd + ret i32 %r +} + +define i32 @xor_tree_with_shifts_i32(i32 %a, i32 %b, i32 %c, i32 %d) { +; CHECK-ARM-LABEL: xor_tree_with_shifts_i32: +; CHECK-ARM: @ %bb.0: +; CHECK-ARM-NEXT: eor r0, r0, r2 +; CHECK-ARM-NEXT: eor r0, r1, r0, lsr #16 +; CHECK-ARM-NEXT: eor r0, r0, r3 +; CHECK-ARM-NEXT: bx lr +; +; CHECK-BE-LABEL: xor_tree_with_shifts_i32: +; CHECK-BE: @ %bb.0: +; CHECK-BE-NEXT: eor r0, r0, r2 +; CHECK-BE-NEXT: eor r0, r1, r0, lsr #16 +; CHECK-BE-NEXT: eor r0, r0, r3 +; CHECK-BE-NEXT: bx lr +; +; CHECK-THUMB-LABEL: xor_tree_with_shifts_i32: +; CHECK-THUMB: @ %bb.0: +; CHECK-THUMB-NEXT: eors r0, r2 +; CHECK-THUMB-NEXT: eor.w r0, r1, r0, lsr #16 +; CHECK-THUMB-NEXT: eors r0, r3 +; CHECK-THUMB-NEXT: bx lr +; +; CHECK-ALIGN-LABEL: xor_tree_with_shifts_i32: +; CHECK-ALIGN: @ %bb.0: +; CHECK-ALIGN-NEXT: eors r0, r2 +; CHECK-ALIGN-NEXT: eor.w r0, r1, r0, lsr #16 +; CHECK-ALIGN-NEXT: eors r0, r3 +; CHECK-ALIGN-NEXT: bx lr +; +; CHECK-V6M-LABEL: xor_tree_with_shifts_i32: +; CHECK-V6M: @ %bb.0: +; CHECK-V6M-NEXT: eors r0, r2 +; CHECK-V6M-NEXT: lsrs r0, r0, #16 +; CHECK-V6M-NEXT: eors r0, r1 +; CHECK-V6M-NEXT: eors r0, r3 +; CHECK-V6M-NEXT: bx lr + %a.shifted = lshr i32 %a, 16 + %c.shifted = lshr i32 %c, 16 + %xor.ab = xor i32 %a.shifted, %b + %xor.cd = xor i32 %c.shifted, %d + %r = xor i32 %xor.ab, %xor.cd + ret i32 %r +} + +define i32 @and_tree_with_shifts_i32(i32 %a, i32 %b, i32 %c, i32 %d) { +; CHECK-ARM-LABEL: and_tree_with_shifts_i32: +; CHECK-ARM: @ %bb.0: +; CHECK-ARM-NEXT: eor r0, r0, r2 +; CHECK-ARM-NEXT: eor r0, r1, r0, asr #16 +; CHECK-ARM-NEXT: eor r0, r0, r3 +; CHECK-ARM-NEXT: bx lr +; +; CHECK-BE-LABEL: and_tree_with_shifts_i32: +; CHECK-BE: @ %bb.0: +; CHECK-BE-NEXT: eor r0, r0, r2 +; CHECK-BE-NEXT: eor r0, r1, r0, asr #16 +; CHECK-BE-NEXT: eor r0, r0, r3 +; CHECK-BE-NEXT: bx lr +; +; CHECK-THUMB-LABEL: and_tree_with_shifts_i32: +; CHECK-THUMB: @ %bb.0: +; CHECK-THUMB-NEXT: eors r0, r2 +; CHECK-THUMB-NEXT: eor.w r0, r1, r0, asr #16 +; CHECK-THUMB-NEXT: eors r0, r3 +; CHECK-THUMB-NEXT: bx lr +; +; CHECK-ALIGN-LABEL: and_tree_with_shifts_i32: +; CHECK-ALIGN: @ %bb.0: +; CHECK-ALIGN-NEXT: eors r0, r2 +; CHECK-ALIGN-NEXT: eor.w r0, r1, r0, asr #16 +; CHECK-ALIGN-NEXT: eors r0, r3 +; CHECK-ALIGN-NEXT: bx lr +; +; CHECK-V6M-LABEL: and_tree_with_shifts_i32: +; CHECK-V6M: @ %bb.0: +; CHECK-V6M-NEXT: eors r0, r2 +; CHECK-V6M-NEXT: asrs r0, r0, #16 +; CHECK-V6M-NEXT: eors r0, r1 +; CHECK-V6M-NEXT: eors r0, r3 +; CHECK-V6M-NEXT: bx lr + %a.shifted = ashr i32 %a, 16 + %c.shifted = ashr i32 %c, 16 + %and.ab = xor i32 %a.shifted, %b + %and.cd = xor i32 %c.shifted, %d + %r = xor i32 %and.ab, %and.cd + ret i32 %r +} + +define i32 @logic_tree_with_shifts_var_i32(i32 %a, i32 %b, i32 %c, i32 %d, i32 %s) { +; CHECK-ARM-LABEL: logic_tree_with_shifts_var_i32: +; CHECK-ARM: @ %bb.0: +; CHECK-ARM-NEXT: ldr r12, [sp] +; CHECK-ARM-NEXT: orr r0, r0, r2 +; CHECK-ARM-NEXT: orr r0, r1, r0, lsl r12 +; CHECK-ARM-NEXT: orr r0, r0, r3 +; CHECK-ARM-NEXT: bx lr +; +; CHECK-BE-LABEL: logic_tree_with_shifts_var_i32: +; CHECK-BE: @ %bb.0: +; CHECK-BE-NEXT: ldr r12, [sp] +; CHECK-BE-NEXT: orr r0, r0, r2 +; CHECK-BE-NEXT: orr r0, r1, r0, lsl r12 +; CHECK-BE-NEXT: orr r0, r0, r3 +; CHECK-BE-NEXT: bx lr +; +; CHECK-ALIGN-LABEL: logic_tree_with_shifts_var_i32: +; CHECK-ALIGN: @ %bb.0: +; CHECK-ALIGN-NEXT: orrs r0, r2 +; CHECK-ALIGN-NEXT: ldr r2, [sp] +; CHECK-ALIGN-NEXT: lsls r0, r2 +; CHECK-ALIGN-NEXT: orrs r0, r1 +; CHECK-ALIGN-NEXT: orrs r0, r3 +; CHECK-ALIGN-NEXT: bx lr +; +; CHECK-V6M-LABEL: logic_tree_with_shifts_var_i32: +; CHECK-V6M: @ %bb.0: +; CHECK-V6M-NEXT: orrs r0, r2 +; CHECK-V6M-NEXT: ldr r2, [sp] +; CHECK-V6M-NEXT: lsls r0, r2 +; CHECK-V6M-NEXT: orrs r0, r1 +; CHECK-V6M-NEXT: orrs r0, r3 +; CHECK-V6M-NEXT: bx lr + %a.shifted = shl i32 %a, %s + %c.shifted = shl i32 %c, %s + %or.ab = or i32 %a.shifted, %b + %or.cd = or i32 %c.shifted, %d + %r = or i32 %or.ab, %or.cd + ret i32 %r +} + +define i32 @logic_tree_with_mismatching_shifts_i32(i32 %a, i32 %b, i32 %c, i32 %d) { +; CHECK-ARM-LABEL: logic_tree_with_mismatching_shifts_i32: +; CHECK-ARM: @ %bb.0: +; CHECK-ARM-NEXT: orr r2, r3, r2, lsl #16 +; CHECK-ARM-NEXT: orr r0, r1, r0, lsl #15 +; CHECK-ARM-NEXT: orr r0, r0, r2 +; CHECK-ARM-NEXT: bx lr +; +; CHECK-BE-LABEL: logic_tree_with_mismatching_shifts_i32: +; CHECK-BE: @ %bb.0: +; CHECK-BE-NEXT: orr r2, r3, r2, lsl #16 +; CHECK-BE-NEXT: orr r0, r1, r0, lsl #15 +; CHECK-BE-NEXT: orr r0, r0, r2 +; CHECK-BE-NEXT: bx lr +; +; CHECK-THUMB-LABEL: logic_tree_with_mismatching_shifts_i32: +; CHECK-THUMB: @ %bb.0: +; CHECK-THUMB-NEXT: orr.w r2, r3, r2, lsl #16 +; CHECK-THUMB-NEXT: orr.w r0, r1, r0, lsl #15 +; CHECK-THUMB-NEXT: orrs r0, r2 +; CHECK-THUMB-NEXT: bx lr +; +; CHECK-ALIGN-LABEL: logic_tree_with_mismatching_shifts_i32: +; CHECK-ALIGN: @ %bb.0: +; CHECK-ALIGN-NEXT: orr.w r2, r3, r2, lsl #16 +; CHECK-ALIGN-NEXT: orr.w r0, r1, r0, lsl #15 +; CHECK-ALIGN-NEXT: orrs r0, r2 +; CHECK-ALIGN-NEXT: bx lr +; +; CHECK-V6M-LABEL: logic_tree_with_mismatching_shifts_i32: +; CHECK-V6M: @ %bb.0: +; CHECK-V6M-NEXT: lsls r2, r2, #16 +; CHECK-V6M-NEXT: orrs r2, r3 +; CHECK-V6M-NEXT: lsls r0, r0, #15 +; CHECK-V6M-NEXT: orrs r0, r1 +; CHECK-V6M-NEXT: orrs r0, r2 +; CHECK-V6M-NEXT: bx lr + %a.shifted = shl i32 %a, 15 + %c.shifted = shl i32 %c, 16 + %or.ab = or i32 %a.shifted, %b + %or.cd = or i32 %c.shifted, %d + %r = or i32 %or.ab, %or.cd + ret i32 %r +} + +define i32 @logic_tree_with_mismatching_shifts2_i32(i32 %a, i32 %b, i32 %c, i32 %d) { +; CHECK-ARM-LABEL: logic_tree_with_mismatching_shifts2_i32: +; CHECK-ARM: @ %bb.0: +; CHECK-ARM-NEXT: orr r2, r3, r2, lsr #16 +; CHECK-ARM-NEXT: orr r0, r1, r0, lsl #16 +; CHECK-ARM-NEXT: orr r0, r0, r2 +; CHECK-ARM-NEXT: bx lr +; +; CHECK-BE-LABEL: logic_tree_with_mismatching_shifts2_i32: +; CHECK-BE: @ %bb.0: +; CHECK-BE-NEXT: orr r2, r3, r2, lsr #16 +; CHECK-BE-NEXT: orr r0, r1, r0, lsl #16 +; CHECK-BE-NEXT: orr r0, r0, r2 +; CHECK-BE-NEXT: bx lr +; +; CHECK-THUMB-LABEL: logic_tree_with_mismatching_shifts2_i32: +; CHECK-THUMB: @ %bb.0: +; CHECK-THUMB-NEXT: orr.w r2, r3, r2, lsr #16 +; CHECK-THUMB-NEXT: orr.w r0, r1, r0, lsl #16 +; CHECK-THUMB-NEXT: orrs r0, r2 +; CHECK-THUMB-NEXT: bx lr +; +; CHECK-ALIGN-LABEL: logic_tree_with_mismatching_shifts2_i32: +; CHECK-ALIGN: @ %bb.0: +; CHECK-ALIGN-NEXT: orr.w r2, r3, r2, lsr #16 +; CHECK-ALIGN-NEXT: orr.w r0, r1, r0, lsl #16 +; CHECK-ALIGN-NEXT: orrs r0, r2 +; CHECK-ALIGN-NEXT: bx lr +; +; CHECK-V6M-LABEL: logic_tree_with_mismatching_shifts2_i32: +; CHECK-V6M: @ %bb.0: +; CHECK-V6M-NEXT: lsrs r2, r2, #16 +; CHECK-V6M-NEXT: orrs r2, r3 +; CHECK-V6M-NEXT: lsls r0, r0, #16 +; CHECK-V6M-NEXT: orrs r0, r1 +; CHECK-V6M-NEXT: orrs r0, r2 +; CHECK-V6M-NEXT: bx lr + %a.shifted = shl i32 %a, 16 + %c.shifted = lshr i32 %c, 16 + %or.ab = or i32 %a.shifted, %b + %or.cd = or i32 %c.shifted, %d + %r = or i32 %or.ab, %or.cd + ret i32 %r +} + +define <4 x i32> @or_tree_with_shifts_vec_i32(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c, <4 x i32> %d) { +; CHECK-ARM-LABEL: or_tree_with_shifts_vec_i32: +; CHECK-ARM: @ %bb.0: +; CHECK-ARM-NEXT: vorr q8, q0, q2 +; CHECK-ARM-NEXT: vshl.i32 q8, q8, #16 +; CHECK-ARM-NEXT: vorr q8, q8, q1 +; CHECK-ARM-NEXT: vorr q0, q8, q3 +; CHECK-ARM-NEXT: bx lr +; +; CHECK-BE-LABEL: or_tree_with_shifts_vec_i32: +; CHECK-BE: @ %bb.0: +; CHECK-BE-NEXT: vrev64.32 q8, q2 +; CHECK-BE-NEXT: vrev64.32 q9, q0 +; CHECK-BE-NEXT: vorr q8, q9, q8 +; CHECK-BE-NEXT: vrev64.32 q9, q1 +; CHECK-BE-NEXT: vrev64.32 q10, q3 +; CHECK-BE-NEXT: vshl.i32 q8, q8, #16 +; CHECK-BE-NEXT: vorr q8, q8, q9 +; CHECK-BE-NEXT: vorr q8, q8, q10 +; CHECK-BE-NEXT: vrev64.32 q0, q8 +; CHECK-BE-NEXT: bx lr + %a.shifted = shl <4 x i32> %a, + %c.shifted = shl <4 x i32> %c, + %or.ab = or <4 x i32> %a.shifted, %b + %or.cd = or <4 x i32> %c.shifted, %d + %r = or <4 x i32> %or.ab, %or.cd + ret <4 x i32> %r +} + +define <4 x i32> @or_tree_with_mismatching_shifts_vec_i32(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c, <4 x i32> %d) { +; CHECK-ARM-LABEL: or_tree_with_mismatching_shifts_vec_i32: +; CHECK-ARM: @ %bb.0: +; CHECK-ARM-NEXT: vshl.i32 q8, q2, #17 +; CHECK-ARM-NEXT: vshl.i32 q9, q0, #16 +; CHECK-ARM-NEXT: vorr q8, q8, q3 +; CHECK-ARM-NEXT: vorr q9, q9, q1 +; CHECK-ARM-NEXT: vorr q0, q9, q8 +; CHECK-ARM-NEXT: bx lr +; +; CHECK-BE-LABEL: or_tree_with_mismatching_shifts_vec_i32: +; CHECK-BE: @ %bb.0: +; CHECK-BE-NEXT: vrev64.32 q8, q2 +; CHECK-BE-NEXT: vrev64.32 q9, q0 +; CHECK-BE-NEXT: vshl.i32 q8, q8, #17 +; CHECK-BE-NEXT: vrev64.32 q10, q3 +; CHECK-BE-NEXT: vshl.i32 q9, q9, #16 +; CHECK-BE-NEXT: vrev64.32 q11, q1 +; CHECK-BE-NEXT: vorr q8, q8, q10 +; CHECK-BE-NEXT: vorr q9, q9, q11 +; CHECK-BE-NEXT: vorr q8, q9, q8 +; CHECK-BE-NEXT: vrev64.32 q0, q8 +; CHECK-BE-NEXT: bx lr + %a.shifted = shl <4 x i32> %a, + %c.shifted = shl <4 x i32> %c, + %or.ab = or <4 x i32> %a.shifted, %b + %or.cd = or <4 x i32> %c.shifted, %d + %r = or <4 x i32> %or.ab, %or.cd + ret <4 x i32> %r +} diff --git a/llvm/test/CodeGen/PowerPC/p10-handle-split-promote-vec.ll b/llvm/test/CodeGen/PowerPC/p10-handle-split-promote-vec.ll --- a/llvm/test/CodeGen/PowerPC/p10-handle-split-promote-vec.ll +++ b/llvm/test/CodeGen/PowerPC/p10-handle-split-promote-vec.ll @@ -8,90 +8,91 @@ define i32 @SplitPromoteVectorTest(i32 %Opc) align 2 { ; CHECK-LABEL: SplitPromoteVectorTest: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: plxv v3, .LCPI0_0@PCREL(0), 1 -; CHECK-NEXT: mtvsrws v2, r3 -; CHECK-NEXT: li r5, 4 +; CHECK-NEXT: plxv v2, .LCPI0_0@PCREL(0), 1 +; CHECK-NEXT: plxv v4, .LCPI0_1@PCREL(0), 1 +; CHECK-NEXT: mtvsrws v3, r3 +; CHECK-NEXT: li r5, 12 ; CHECK-NEXT: li r8, 0 -; CHECK-NEXT: vcmpequw v3, v2, v3 -; CHECK-NEXT: vextubrx r6, r5, v3 -; CHECK-NEXT: vextubrx r4, r8, v3 -; CHECK-NEXT: rlwimi r4, r6, 1, 30, 30 -; CHECK-NEXT: li r6, 8 -; CHECK-NEXT: vextubrx r7, r6, v3 -; CHECK-NEXT: rlwimi r4, r7, 2, 29, 29 -; CHECK-NEXT: li r7, 12 -; CHECK-NEXT: vextubrx r9, r7, v3 -; CHECK-NEXT: plxv v3, .LCPI0_1@PCREL(0), 1 -; CHECK-NEXT: rlwimi r4, r9, 3, 28, 28 -; CHECK-NEXT: vcmpequw v3, v2, v3 -; CHECK-NEXT: vextubrx r9, r8, v3 -; CHECK-NEXT: rlwimi r4, r9, 4, 27, 27 -; CHECK-NEXT: vextubrx r9, r5, v3 -; CHECK-NEXT: rlwimi r4, r9, 5, 26, 26 -; CHECK-NEXT: vextubrx r9, r6, v3 -; CHECK-NEXT: rlwimi r4, r9, 6, 25, 25 -; CHECK-NEXT: vextubrx r9, r7, v3 -; CHECK-NEXT: plxv v3, .LCPI0_2@PCREL(0), 1 -; CHECK-NEXT: rlwimi r4, r9, 7, 24, 24 -; CHECK-NEXT: vcmpequw v3, v2, v3 -; CHECK-NEXT: vextubrx r9, r8, v3 -; CHECK-NEXT: rlwimi r4, r9, 8, 23, 23 -; CHECK-NEXT: vextubrx r9, r5, v3 -; CHECK-NEXT: rlwimi r4, r9, 9, 22, 22 -; CHECK-NEXT: vextubrx r9, r6, v3 -; CHECK-NEXT: rlwimi r4, r9, 10, 21, 21 -; CHECK-NEXT: vextubrx r9, r7, v3 -; CHECK-NEXT: plxv v3, .LCPI0_3@PCREL(0), 1 -; CHECK-NEXT: rlwimi r4, r9, 11, 20, 20 -; CHECK-NEXT: vcmpequw v3, v2, v3 +; CHECK-NEXT: vcmpequw v2, v3, v2 +; CHECK-NEXT: plxv v5, .LCPI0_2@PCREL(0), 1 +; CHECK-NEXT: vcmpequw v4, v3, v4 +; CHECK-NEXT: vcmpequw v5, v3, v5 +; CHECK-NEXT: vextubrx r4, r5, v2 +; CHECK-NEXT: vextubrx r6, r5, v4 +; CHECK-NEXT: or r9, r6, r4 +; CHECK-NEXT: li r6, 4 +; CHECK-NEXT: vextubrx r4, r8, v5 +; CHECK-NEXT: vextubrx r7, r6, v5 +; CHECK-NEXT: rlwimi r4, r7, 1, 30, 30 +; CHECK-NEXT: li r7, 8 +; CHECK-NEXT: vextubrx r10, r7, v5 +; CHECK-NEXT: rlwimi r4, r10, 2, 29, 29 +; CHECK-NEXT: vextubrx r10, r5, v5 +; CHECK-NEXT: plxv v5, .LCPI0_3@PCREL(0), 1 +; CHECK-NEXT: rlwimi r4, r10, 3, 28, 28 +; CHECK-NEXT: vcmpequw v5, v3, v5 +; CHECK-NEXT: vextubrx r10, r8, v5 +; CHECK-NEXT: rlwimi r4, r10, 4, 27, 27 +; CHECK-NEXT: vextubrx r10, r6, v5 +; CHECK-NEXT: rlwimi r4, r10, 5, 26, 26 +; CHECK-NEXT: vextubrx r10, r7, v5 +; CHECK-NEXT: rlwimi r4, r10, 6, 25, 25 +; CHECK-NEXT: vextubrx r10, r5, v5 +; CHECK-NEXT: plxv v5, .LCPI0_4@PCREL(0), 1 +; CHECK-NEXT: rlwimi r4, r10, 7, 24, 24 +; CHECK-NEXT: vcmpequw v5, v3, v5 +; CHECK-NEXT: vextubrx r10, r8, v5 +; CHECK-NEXT: rlwimi r4, r10, 8, 23, 23 +; CHECK-NEXT: vextubrx r10, r6, v5 +; CHECK-NEXT: rlwimi r4, r10, 9, 22, 22 +; CHECK-NEXT: vextubrx r10, r7, v5 +; CHECK-NEXT: rlwimi r4, r10, 10, 21, 21 +; CHECK-NEXT: vextubrx r10, r5, v5 +; CHECK-NEXT: rlwimi r4, r10, 11, 20, 20 +; CHECK-NEXT: vextubrx r10, r8, v4 +; CHECK-NEXT: rlwimi r4, r10, 12, 19, 19 +; CHECK-NEXT: vextubrx r10, r6, v4 +; CHECK-NEXT: rlwimi r4, r10, 13, 18, 18 +; CHECK-NEXT: vextubrx r10, r7, v4 +; CHECK-NEXT: plxv v4, .LCPI0_5@PCREL(0), 1 +; CHECK-NEXT: rlwimi r4, r10, 14, 17, 17 +; CHECK-NEXT: rlwimi r4, r9, 15, 0, 16 +; CHECK-NEXT: vcmpequw v4, v3, v4 +; CHECK-NEXT: vextubrx r10, r8, v4 +; CHECK-NEXT: vextubrx r9, r6, v4 +; CHECK-NEXT: clrlwi r10, r10, 31 +; CHECK-NEXT: rlwimi r10, r9, 1, 30, 30 +; CHECK-NEXT: vextubrx r9, r7, v4 +; CHECK-NEXT: rlwimi r10, r9, 2, 29, 29 +; CHECK-NEXT: vextubrx r9, r5, v4 +; CHECK-NEXT: plxv v4, .LCPI0_6@PCREL(0), 1 +; CHECK-NEXT: rlwimi r10, r9, 3, 28, 28 +; CHECK-NEXT: vcmpequw v4, v3, v4 +; CHECK-NEXT: vextubrx r9, r8, v4 +; CHECK-NEXT: rlwimi r10, r9, 4, 27, 27 +; CHECK-NEXT: vextubrx r9, r6, v4 +; CHECK-NEXT: rlwimi r10, r9, 5, 26, 26 +; CHECK-NEXT: vextubrx r9, r7, v4 +; CHECK-NEXT: rlwimi r10, r9, 6, 25, 25 +; CHECK-NEXT: vextubrx r9, r5, v4 +; CHECK-NEXT: plxv v4, .LCPI0_7@PCREL(0), 1 +; CHECK-NEXT: rlwimi r10, r9, 7, 24, 24 +; CHECK-NEXT: vcmpequw v3, v3, v4 ; CHECK-NEXT: vextubrx r9, r8, v3 -; CHECK-NEXT: rlwimi r4, r9, 12, 19, 19 -; CHECK-NEXT: vextubrx r9, r5, v3 -; CHECK-NEXT: rlwimi r4, r9, 13, 18, 18 +; CHECK-NEXT: vextubrx r5, r5, v3 +; CHECK-NEXT: rlwimi r10, r9, 8, 23, 23 ; CHECK-NEXT: vextubrx r9, r6, v3 -; CHECK-NEXT: rlwimi r4, r9, 14, 17, 17 +; CHECK-NEXT: rlwimi r10, r9, 9, 22, 22 ; CHECK-NEXT: vextubrx r9, r7, v3 -; CHECK-NEXT: plxv v3, .LCPI0_4@PCREL(0), 1 -; CHECK-NEXT: rlwimi r4, r9, 15, 0, 16 -; CHECK-NEXT: vcmpequw v3, v2, v3 -; CHECK-NEXT: vextubrx r10, r5, v3 -; CHECK-NEXT: vextubrx r9, r8, v3 -; CHECK-NEXT: rlwimi r9, r10, 1, 30, 30 -; CHECK-NEXT: vextubrx r10, r6, v3 -; CHECK-NEXT: rlwimi r9, r10, 2, 29, 29 -; CHECK-NEXT: vextubrx r10, r7, v3 -; CHECK-NEXT: plxv v3, .LCPI0_5@PCREL(0), 1 -; CHECK-NEXT: rlwimi r9, r10, 3, 28, 28 -; CHECK-NEXT: vcmpequw v3, v2, v3 -; CHECK-NEXT: vextubrx r10, r8, v3 -; CHECK-NEXT: rlwimi r9, r10, 4, 27, 27 -; CHECK-NEXT: vextubrx r10, r5, v3 -; CHECK-NEXT: rlwimi r9, r10, 5, 26, 26 -; CHECK-NEXT: vextubrx r10, r6, v3 -; CHECK-NEXT: rlwimi r9, r10, 6, 25, 25 -; CHECK-NEXT: vextubrx r10, r7, v3 -; CHECK-NEXT: plxv v3, .LCPI0_6@PCREL(0), 1 -; CHECK-NEXT: rlwimi r9, r10, 7, 24, 24 -; CHECK-NEXT: vcmpequw v3, v2, v3 -; CHECK-NEXT: vextubrx r10, r8, v3 -; CHECK-NEXT: rlwimi r9, r10, 8, 23, 23 -; CHECK-NEXT: vextubrx r10, r5, v3 -; CHECK-NEXT: rlwimi r9, r10, 9, 22, 22 -; CHECK-NEXT: vextubrx r10, r6, v3 -; CHECK-NEXT: rlwimi r9, r10, 10, 21, 21 -; CHECK-NEXT: vextubrx r10, r7, v3 -; CHECK-NEXT: plxv v3, .LCPI0_7@PCREL(0), 1 -; CHECK-NEXT: rlwimi r9, r10, 11, 20, 20 -; CHECK-NEXT: vcmpequw v2, v2, v3 -; CHECK-NEXT: vextubrx r8, r8, v2 -; CHECK-NEXT: vextubrx r5, r5, v2 -; CHECK-NEXT: rlwimi r9, r8, 12, 19, 19 -; CHECK-NEXT: rlwimi r9, r5, 13, 18, 18 +; CHECK-NEXT: rlwimi r10, r9, 10, 21, 21 +; CHECK-NEXT: rlwimi r10, r5, 11, 20, 20 +; CHECK-NEXT: vextubrx r5, r8, v2 +; CHECK-NEXT: rlwimi r10, r5, 12, 19, 19 ; CHECK-NEXT: vextubrx r5, r6, v2 -; CHECK-NEXT: rlwimi r9, r5, 14, 17, 17 +; CHECK-NEXT: rlwimi r10, r5, 13, 18, 18 ; CHECK-NEXT: vextubrx r5, r7, v2 -; CHECK-NEXT: rlwimi r9, r5, 15, 0, 16 -; CHECK-NEXT: or r4, r9, r4 +; CHECK-NEXT: rlwimi r10, r5, 14, 17, 17 +; CHECK-NEXT: or r4, r4, r10 ; CHECK-NEXT: andi. r4, r4, 65535 ; CHECK-NEXT: iseleq r3, 0, r3 ; CHECK-NEXT: blr diff --git a/llvm/test/CodeGen/X86/bswap_tree2.ll b/llvm/test/CodeGen/X86/bswap_tree2.ll --- a/llvm/test/CodeGen/X86/bswap_tree2.ll +++ b/llvm/test/CodeGen/X86/bswap_tree2.ll @@ -10,29 +10,23 @@ ; CHECK-LABEL: test1: ; CHECK: # %bb.0: ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax -; CHECK-NEXT: movl %eax, %ecx -; CHECK-NEXT: andl $16711680, %ecx # imm = 0xFF0000 -; CHECK-NEXT: movl %eax, %edx -; CHECK-NEXT: orl $-16777216, %edx # imm = 0xFF000000 -; CHECK-NEXT: shll $8, %ecx -; CHECK-NEXT: shrl $8, %edx -; CHECK-NEXT: orl %ecx, %edx -; CHECK-NEXT: bswapl %eax -; CHECK-NEXT: shrl $16, %eax -; CHECK-NEXT: orl %edx, %eax +; CHECK-NEXT: movzwl %ax, %ecx +; CHECK-NEXT: orl %eax, %ecx +; CHECK-NEXT: orl $-16777216, %ecx # imm = 0xFF000000 +; CHECK-NEXT: shrl $8, %ecx +; CHECK-NEXT: andl $16711935, %eax # imm = 0xFF00FF +; CHECK-NEXT: shll $8, %eax +; CHECK-NEXT: orl %ecx, %eax ; CHECK-NEXT: retl ; ; CHECK64-LABEL: test1: ; CHECK64: # %bb.0: -; CHECK64-NEXT: movl %edi, %ecx -; CHECK64-NEXT: andl $16711680, %ecx # imm = 0xFF0000 -; CHECK64-NEXT: movl %edi, %eax +; CHECK64-NEXT: movzwl %di, %eax +; CHECK64-NEXT: orl %edi, %eax ; CHECK64-NEXT: orl $-16777216, %eax # imm = 0xFF000000 -; CHECK64-NEXT: shll $8, %ecx ; CHECK64-NEXT: shrl $8, %eax -; CHECK64-NEXT: orl %ecx, %eax -; CHECK64-NEXT: bswapl %edi -; CHECK64-NEXT: shrl $16, %edi +; CHECK64-NEXT: andl $16711935, %edi # imm = 0xFF00FF +; CHECK64-NEXT: shll $8, %edi ; CHECK64-NEXT: orl %edi, %eax ; CHECK64-NEXT: retq %byte0 = and i32 %x, 255 ; 0x000000ff diff --git a/llvm/test/CodeGen/X86/shift-combine.ll b/llvm/test/CodeGen/X86/shift-combine.ll --- a/llvm/test/CodeGen/X86/shift-combine.ll +++ b/llvm/test/CodeGen/X86/shift-combine.ll @@ -506,3 +506,199 @@ %conv1 = ashr <4 x i32> %sext, ret <4 x i32> %conv1 } + +define i32 @or_tree_with_shifts_i32(i32 %a, i32 %b, i32 %c, i32 %d) { +; X32-LABEL: or_tree_with_shifts_i32: +; X32: # %bb.0: +; X32-NEXT: movl {{[0-9]+}}(%esp), %eax +; X32-NEXT: orl {{[0-9]+}}(%esp), %eax +; X32-NEXT: shll $16, %eax +; X32-NEXT: orl {{[0-9]+}}(%esp), %eax +; X32-NEXT: orl {{[0-9]+}}(%esp), %eax +; X32-NEXT: retl +; +; X64-LABEL: or_tree_with_shifts_i32: +; X64: # %bb.0: +; X64-NEXT: movl %esi, %eax +; X64-NEXT: orl %edx, %edi +; X64-NEXT: shll $16, %edi +; X64-NEXT: orl %ecx, %eax +; X64-NEXT: orl %edi, %eax +; X64-NEXT: retq + %a.shifted = shl i32 %a, 16 + %c.shifted = shl i32 %c, 16 + %or.ab = or i32 %a.shifted, %b + %or.cd = or i32 %c.shifted, %d + %r = or i32 %or.ab, %or.cd + ret i32 %r +} + +define i32 @xor_tree_with_shifts_i32(i32 %a, i32 %b, i32 %c, i32 %d) { +; X32-LABEL: xor_tree_with_shifts_i32: +; X32: # %bb.0: +; X32-NEXT: movl {{[0-9]+}}(%esp), %eax +; X32-NEXT: xorl {{[0-9]+}}(%esp), %eax +; X32-NEXT: shrl $16, %eax +; X32-NEXT: xorl {{[0-9]+}}(%esp), %eax +; X32-NEXT: xorl {{[0-9]+}}(%esp), %eax +; X32-NEXT: retl +; +; X64-LABEL: xor_tree_with_shifts_i32: +; X64: # %bb.0: +; X64-NEXT: movl %esi, %eax +; X64-NEXT: xorl %edx, %edi +; X64-NEXT: shrl $16, %edi +; X64-NEXT: xorl %ecx, %eax +; X64-NEXT: xorl %edi, %eax +; X64-NEXT: retq + %a.shifted = lshr i32 %a, 16 + %c.shifted = lshr i32 %c, 16 + %xor.ab = xor i32 %a.shifted, %b + %xor.cd = xor i32 %c.shifted, %d + %r = xor i32 %xor.ab, %xor.cd + ret i32 %r +} + +define i32 @and_tree_with_shifts_i32(i32 %a, i32 %b, i32 %c, i32 %d) { +; X32-LABEL: and_tree_with_shifts_i32: +; X32: # %bb.0: +; X32-NEXT: movl {{[0-9]+}}(%esp), %eax +; X32-NEXT: xorl {{[0-9]+}}(%esp), %eax +; X32-NEXT: sarl $16, %eax +; X32-NEXT: xorl {{[0-9]+}}(%esp), %eax +; X32-NEXT: xorl {{[0-9]+}}(%esp), %eax +; X32-NEXT: retl +; +; X64-LABEL: and_tree_with_shifts_i32: +; X64: # %bb.0: +; X64-NEXT: movl %esi, %eax +; X64-NEXT: xorl %edx, %edi +; X64-NEXT: sarl $16, %edi +; X64-NEXT: xorl %ecx, %eax +; X64-NEXT: xorl %edi, %eax +; X64-NEXT: retq + %a.shifted = ashr i32 %a, 16 + %c.shifted = ashr i32 %c, 16 + %and.ab = xor i32 %a.shifted, %b + %and.cd = xor i32 %c.shifted, %d + %r = xor i32 %and.ab, %and.cd + ret i32 %r +} + +define i32 @logic_tree_with_shifts_var_i32(i32 %a, i32 %b, i32 %c, i32 %d, i32 %s) { +; X32-LABEL: logic_tree_with_shifts_var_i32: +; X32: # %bb.0: +; X32-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X32-NEXT: movl {{[0-9]+}}(%esp), %eax +; X32-NEXT: orl {{[0-9]+}}(%esp), %eax +; X32-NEXT: shll %cl, %eax +; X32-NEXT: orl {{[0-9]+}}(%esp), %eax +; X32-NEXT: orl {{[0-9]+}}(%esp), %eax +; X32-NEXT: retl +; +; X64-LABEL: logic_tree_with_shifts_var_i32: +; X64: # %bb.0: +; X64-NEXT: movl %ecx, %eax +; X64-NEXT: orl %edx, %edi +; X64-NEXT: movl %r8d, %ecx +; X64-NEXT: shll %cl, %edi +; X64-NEXT: orl %esi, %eax +; X64-NEXT: orl %edi, %eax +; X64-NEXT: retq + %a.shifted = shl i32 %a, %s + %c.shifted = shl i32 %c, %s + %or.ab = or i32 %a.shifted, %b + %or.cd = or i32 %c.shifted, %d + %r = or i32 %or.ab, %or.cd + ret i32 %r +} + +define i32 @logic_tree_with_mismatching_shifts_i32(i32 %a, i32 %b, i32 %c, i32 %d) { +; X32-LABEL: logic_tree_with_mismatching_shifts_i32: +; X32: # %bb.0: +; X32-NEXT: movl {{[0-9]+}}(%esp), %eax +; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X32-NEXT: shll $15, %ecx +; X32-NEXT: shll $16, %eax +; X32-NEXT: orl {{[0-9]+}}(%esp), %ecx +; X32-NEXT: orl {{[0-9]+}}(%esp), %eax +; X32-NEXT: orl %ecx, %eax +; X32-NEXT: retl +; +; X64-LABEL: logic_tree_with_mismatching_shifts_i32: +; X64: # %bb.0: +; X64-NEXT: movl %edx, %eax +; X64-NEXT: shll $15, %edi +; X64-NEXT: shll $16, %eax +; X64-NEXT: orl %ecx, %eax +; X64-NEXT: orl %esi, %eax +; X64-NEXT: orl %edi, %eax +; X64-NEXT: retq + %a.shifted = shl i32 %a, 15 + %c.shifted = shl i32 %c, 16 + %or.ab = or i32 %a.shifted, %b + %or.cd = or i32 %c.shifted, %d + %r = or i32 %or.ab, %or.cd + ret i32 %r +} + +define i32 @logic_tree_with_mismatching_shifts2_i32(i32 %a, i32 %b, i32 %c, i32 %d) { +; X32-LABEL: logic_tree_with_mismatching_shifts2_i32: +; X32: # %bb.0: +; X32-NEXT: movzwl {{[0-9]+}}(%esp), %eax +; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X32-NEXT: shll $16, %ecx +; X32-NEXT: orl {{[0-9]+}}(%esp), %ecx +; X32-NEXT: orl {{[0-9]+}}(%esp), %eax +; X32-NEXT: orl %ecx, %eax +; X32-NEXT: retl +; +; X64-LABEL: logic_tree_with_mismatching_shifts2_i32: +; X64: # %bb.0: +; X64-NEXT: movl %edx, %eax +; X64-NEXT: shll $16, %edi +; X64-NEXT: shrl $16, %eax +; X64-NEXT: orl %ecx, %eax +; X64-NEXT: orl %esi, %eax +; X64-NEXT: orl %edi, %eax +; X64-NEXT: retq + %a.shifted = shl i32 %a, 16 + %c.shifted = lshr i32 %c, 16 + %or.ab = or i32 %a.shifted, %b + %or.cd = or i32 %c.shifted, %d + %r = or i32 %or.ab, %or.cd + ret i32 %r +} + +define <4 x i32> @or_tree_with_shifts_vec_i32(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c, <4 x i32> %d) { +; X64-LABEL: or_tree_with_shifts_vec_i32: +; X64: # %bb.0: +; X64-NEXT: por %xmm2, %xmm0 +; X64-NEXT: pslld $16, %xmm0 +; X64-NEXT: por %xmm3, %xmm1 +; X64-NEXT: por %xmm1, %xmm0 +; X64-NEXT: retq + %a.shifted = shl <4 x i32> %a, + %c.shifted = shl <4 x i32> %c, + %or.ab = or <4 x i32> %a.shifted, %b + %or.cd = or <4 x i32> %c.shifted, %d + %r = or <4 x i32> %or.ab, %or.cd + ret <4 x i32> %r +} + +define <4 x i32> @or_tree_with_mismatching_shifts_vec_i32(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c, <4 x i32> %d) { +; X64-LABEL: or_tree_with_mismatching_shifts_vec_i32: +; X64: # %bb.0: +; X64-NEXT: pslld $16, %xmm0 +; X64-NEXT: pslld $17, %xmm2 +; X64-NEXT: por %xmm3, %xmm2 +; X64-NEXT: por %xmm1, %xmm2 +; X64-NEXT: por %xmm2, %xmm0 +; X64-NEXT: retq + %a.shifted = shl <4 x i32> %a, + %c.shifted = shl <4 x i32> %c, + %or.ab = or <4 x i32> %a.shifted, %b + %or.cd = or <4 x i32> %c.shifted, %d + %r = or <4 x i32> %or.ab, %or.cd + ret <4 x i32> %r +}