diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -6152,6 +6152,43 @@ return DAG.getNode(LogicOpcode, DL, VT, NewShift, Z); } +/// Given a tree of logic operations with shape like +/// (LOGIC (LOGIC (X, Y), LOGIC (Z, Y))) +/// try to match and fold shift operations with the same shift amount. +/// For example: +/// LOGIC (LOGIC (SH X0, Y), Z), (LOGIC (SH X1, Y), W) --> +/// --> LOGIC (SH (LOGIC X0, X1), Y), (LOGIC Z, W) +static SDValue foldLogicTreeOfShifts(SDNode *N, SDValue LeftHand, + SDValue RightHand, SelectionDAG &DAG) { + unsigned LogicOpcode = N->getOpcode(); + assert((LogicOpcode == ISD::AND || LogicOpcode == ISD::OR || + LogicOpcode == ISD::XOR)); + if (LeftHand.getOpcode() != LogicOpcode || + RightHand.getOpcode() != LogicOpcode) + return SDValue(); + if (!LeftHand.hasOneUse() || !RightHand.hasOneUse()) + return SDValue(); + + // Try to match one of following patterns: + // LOGIC (LOGIC (SH X0, Y), Z), (LOGIC (SH X1, Y), W) + // LOGIC (LOGIC (SH X0, Y), Z), (LOGIC W, (SH X1, Y)) + // Note that foldLogicOfShifts will handle commuted versions of the left hand + // itself. + SDValue CombinedShifts, W; + SDValue R0 = RightHand.getOperand(0); + SDValue R1 = RightHand.getOperand(1); + if ((CombinedShifts = foldLogicOfShifts(N, LeftHand, R0, DAG))) + W = R1; + else if ((CombinedShifts = foldLogicOfShifts(N, LeftHand, R1, DAG))) + W = R0; + else + return SDValue(); + + EVT VT = N->getValueType(0); + SDLoc DL(N); + return DAG.getNode(LogicOpcode, DL, VT, CombinedShifts, W); +} + SDValue DAGCombiner::visitAND(SDNode *N) { SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); @@ -6524,6 +6561,12 @@ if (SDValue V = foldAndToUsubsat(N, DAG)) return V; + // Postpone until legalization completed to avoid interference with bswap + // folding + if (LegalOperations || VT.isVector()) + if (SDValue R = foldLogicTreeOfShifts(N, N0, N1, DAG)) + return R; + return SDValue(); } @@ -7124,6 +7167,12 @@ if (SDValue Combined = visitADDLike(N)) return Combined; + // Postpone until legalization completed to avoid interference with bswap + // folding + if (LegalOperations || VT.isVector()) + if (SDValue R = foldLogicTreeOfShifts(N, N0, N1, DAG)) + return R; + return SDValue(); } @@ -8608,6 +8657,8 @@ return R; if (SDValue R = foldLogicOfShifts(N, N1, N0, DAG)) return R; + if (SDValue R = foldLogicTreeOfShifts(N, N0, N1, DAG)) + return R; // Unfold ((x ^ y) & m) ^ y into (x & m) | (y & ~m) if profitable if (SDValue MM = unfoldMaskedMerge(N)) diff --git a/llvm/test/CodeGen/ARM/icmp-shift-opt.ll b/llvm/test/CodeGen/ARM/icmp-shift-opt.ll --- a/llvm/test/CodeGen/ARM/icmp-shift-opt.ll +++ b/llvm/test/CodeGen/ARM/icmp-shift-opt.ll @@ -139,15 +139,13 @@ define i1 @opt_setcc_shl_ne_zero_i128(i128 %a) nounwind { ; CHECK-LABEL: opt_setcc_shl_ne_zero_i128: ; CHECK: @ %bb.0: -; CHECK-NEXT: lsl r3, r3, #17 -; CHECK-NEXT: orr r12, r3, r2, lsr #15 -; CHECK-NEXT: lsl r3, r1, #17 -; CHECK-NEXT: orr r3, r3, r0, lsr #15 +; CHECK-NEXT: orr r3, r1, r3 ; CHECK-NEXT: orr r0, r2, r0 -; CHECK-NEXT: orr r3, r3, r12 -; CHECK-NEXT: lsl r0, r0, #17 -; CHECK-NEXT: orr r0, r0, r1, lsr #15 -; CHECK-NEXT: orrs r0, r0, r3 +; CHECK-NEXT: orr r2, r0, r3 +; CHECK-NEXT: orr r0, r0, r1 +; CHECK-NEXT: lsr r0, r0, #15 +; CHECK-NEXT: orr r0, r0, r2, lsl #17 +; CHECK-NEXT: cmp r0, #0 ; CHECK-NEXT: movwne r0, #1 ; CHECK-NEXT: bx lr %shl = shl i128 %a, 17 diff --git a/llvm/test/CodeGen/ARM/shift-combine.ll b/llvm/test/CodeGen/ARM/shift-combine.ll --- a/llvm/test/CodeGen/ARM/shift-combine.ll +++ b/llvm/test/CodeGen/ARM/shift-combine.ll @@ -900,12 +900,11 @@ ; CHECK-ARM-NEXT: .save {r11, lr} ; CHECK-ARM-NEXT: push {r11, lr} ; CHECK-ARM-NEXT: ldr lr, [sp, #16] -; CHECK-ARM-NEXT: lsl r3, r3, #16 -; CHECK-ARM-NEXT: ldr r12, [sp, #8] -; CHECK-ARM-NEXT: orr r3, r3, r2, lsr #16 ; CHECK-ARM-NEXT: orr r0, r0, r2, lsl #16 -; CHECK-ARM-NEXT: orr r1, r1, lr, lsl #16 -; CHECK-ARM-NEXT: orr r1, r1, r3 +; CHECK-ARM-NEXT: ldr r12, [sp, #8] +; CHECK-ARM-NEXT: orr r3, lr, r3 +; CHECK-ARM-NEXT: orr r1, r1, r3, lsl #16 +; CHECK-ARM-NEXT: orr r1, r1, r2, lsr #16 ; CHECK-ARM-NEXT: orr r1, r1, r12 ; CHECK-ARM-NEXT: pop {r11, pc} ; @@ -914,41 +913,38 @@ ; CHECK-BE-NEXT: .save {r11, lr} ; CHECK-BE-NEXT: push {r11, lr} ; CHECK-BE-NEXT: ldr lr, [sp, #20] -; CHECK-BE-NEXT: lsl r2, r2, #16 -; CHECK-BE-NEXT: ldr r12, [sp, #12] -; CHECK-BE-NEXT: orr r2, r2, r3, lsr #16 ; CHECK-BE-NEXT: orr r1, r1, r3, lsl #16 -; CHECK-BE-NEXT: orr r0, r0, lr, lsl #16 -; CHECK-BE-NEXT: orr r0, r0, r2 +; CHECK-BE-NEXT: ldr r12, [sp, #12] +; CHECK-BE-NEXT: orr r2, lr, r2 +; CHECK-BE-NEXT: orr r0, r0, r2, lsl #16 +; CHECK-BE-NEXT: orr r0, r0, r3, lsr #16 ; CHECK-BE-NEXT: orr r0, r0, r12 ; CHECK-BE-NEXT: pop {r11, pc} ; ; CHECK-ALIGN-LABEL: or_tree_with_shifts_i64: ; CHECK-ALIGN: @ %bb.0: ; CHECK-ALIGN-NEXT: ldr.w r12, [sp, #8] -; CHECK-ALIGN-NEXT: lsls r3, r3, #16 -; CHECK-ALIGN-NEXT: orr.w r3, r3, r2, lsr #16 ; CHECK-ALIGN-NEXT: orr.w r0, r0, r2, lsl #16 -; CHECK-ALIGN-NEXT: orr.w r1, r1, r12, lsl #16 -; CHECK-ALIGN-NEXT: orrs r1, r3 -; CHECK-ALIGN-NEXT: ldr r3, [sp] -; CHECK-ALIGN-NEXT: orrs r1, r3 +; CHECK-ALIGN-NEXT: orr.w r3, r3, r12 +; CHECK-ALIGN-NEXT: orr.w r1, r1, r3, lsl #16 +; CHECK-ALIGN-NEXT: orr.w r1, r1, r2, lsr #16 +; CHECK-ALIGN-NEXT: ldr r2, [sp] +; CHECK-ALIGN-NEXT: orrs r1, r2 ; CHECK-ALIGN-NEXT: bx lr ; ; CHECK-V6M-LABEL: or_tree_with_shifts_i64: ; CHECK-V6M: @ %bb.0: ; CHECK-V6M-NEXT: push {r4, lr} -; CHECK-V6M-NEXT: lsrs r4, r2, #16 -; CHECK-V6M-NEXT: lsls r3, r3, #16 -; CHECK-V6M-NEXT: adds r3, r3, r4 +; CHECK-V6M-NEXT: lsls r4, r2, #16 +; CHECK-V6M-NEXT: orrs r0, r4 ; CHECK-V6M-NEXT: ldr r4, [sp, #16] -; CHECK-V6M-NEXT: lsls r4, r4, #16 -; CHECK-V6M-NEXT: orrs r1, r4 +; CHECK-V6M-NEXT: orrs r4, r3 +; CHECK-V6M-NEXT: lsls r3, r4, #16 ; CHECK-V6M-NEXT: orrs r1, r3 -; CHECK-V6M-NEXT: ldr r3, [sp, #8] -; CHECK-V6M-NEXT: orrs r1, r3 -; CHECK-V6M-NEXT: lsls r2, r2, #16 -; CHECK-V6M-NEXT: orrs r0, r2 +; CHECK-V6M-NEXT: lsrs r2, r2, #16 +; CHECK-V6M-NEXT: orrs r1, r2 +; CHECK-V6M-NEXT: ldr r2, [sp, #8] +; CHECK-V6M-NEXT: orrs r1, r2 ; CHECK-V6M-NEXT: pop {r4, pc} %b.shifted = shl i64 %b, 16 %c.shifted = shl i64 %c, 32 @@ -962,39 +958,38 @@ define i32 @or_tree_with_shifts_i32(i32 %a, i32 %b, i32 %c, i32 %d) { ; CHECK-ARM-LABEL: or_tree_with_shifts_i32: ; CHECK-ARM: @ %bb.0: -; CHECK-ARM-NEXT: orr r2, r3, r2, lsl #16 -; CHECK-ARM-NEXT: orr r0, r1, r0, lsl #16 ; CHECK-ARM-NEXT: orr r0, r0, r2 +; CHECK-ARM-NEXT: orr r0, r1, r0, lsl #16 +; CHECK-ARM-NEXT: orr r0, r0, r3 ; CHECK-ARM-NEXT: bx lr ; ; CHECK-BE-LABEL: or_tree_with_shifts_i32: ; CHECK-BE: @ %bb.0: -; CHECK-BE-NEXT: orr r2, r3, r2, lsl #16 -; CHECK-BE-NEXT: orr r0, r1, r0, lsl #16 ; CHECK-BE-NEXT: orr r0, r0, r2 +; CHECK-BE-NEXT: orr r0, r1, r0, lsl #16 +; CHECK-BE-NEXT: orr r0, r0, r3 ; CHECK-BE-NEXT: bx lr ; ; CHECK-THUMB-LABEL: or_tree_with_shifts_i32: ; CHECK-THUMB: @ %bb.0: -; CHECK-THUMB-NEXT: orr.w r2, r3, r2, lsl #16 -; CHECK-THUMB-NEXT: orr.w r0, r1, r0, lsl #16 ; CHECK-THUMB-NEXT: orrs r0, r2 +; CHECK-THUMB-NEXT: orr.w r0, r1, r0, lsl #16 +; CHECK-THUMB-NEXT: orrs r0, r3 ; CHECK-THUMB-NEXT: bx lr ; ; CHECK-ALIGN-LABEL: or_tree_with_shifts_i32: ; CHECK-ALIGN: @ %bb.0: -; CHECK-ALIGN-NEXT: orr.w r2, r3, r2, lsl #16 -; CHECK-ALIGN-NEXT: orr.w r0, r1, r0, lsl #16 ; CHECK-ALIGN-NEXT: orrs r0, r2 +; CHECK-ALIGN-NEXT: orr.w r0, r1, r0, lsl #16 +; CHECK-ALIGN-NEXT: orrs r0, r3 ; CHECK-ALIGN-NEXT: bx lr ; ; CHECK-V6M-LABEL: or_tree_with_shifts_i32: ; CHECK-V6M: @ %bb.0: -; CHECK-V6M-NEXT: lsls r2, r2, #16 -; CHECK-V6M-NEXT: orrs r2, r3 +; CHECK-V6M-NEXT: orrs r0, r2 ; CHECK-V6M-NEXT: lsls r0, r0, #16 ; CHECK-V6M-NEXT: orrs r0, r1 -; CHECK-V6M-NEXT: orrs r0, r2 +; CHECK-V6M-NEXT: orrs r0, r3 ; CHECK-V6M-NEXT: bx lr %a.shifted = shl i32 %a, 16 %c.shifted = shl i32 %c, 16 @@ -1007,39 +1002,38 @@ define i32 @xor_tree_with_shifts_i32(i32 %a, i32 %b, i32 %c, i32 %d) { ; CHECK-ARM-LABEL: xor_tree_with_shifts_i32: ; CHECK-ARM: @ %bb.0: -; CHECK-ARM-NEXT: eor r2, r3, r2, lsr #16 -; CHECK-ARM-NEXT: eor r0, r1, r0, lsr #16 ; CHECK-ARM-NEXT: eor r0, r0, r2 +; CHECK-ARM-NEXT: eor r0, r1, r0, lsr #16 +; CHECK-ARM-NEXT: eor r0, r0, r3 ; CHECK-ARM-NEXT: bx lr ; ; CHECK-BE-LABEL: xor_tree_with_shifts_i32: ; CHECK-BE: @ %bb.0: -; CHECK-BE-NEXT: eor r2, r3, r2, lsr #16 -; CHECK-BE-NEXT: eor r0, r1, r0, lsr #16 ; CHECK-BE-NEXT: eor r0, r0, r2 +; CHECK-BE-NEXT: eor r0, r1, r0, lsr #16 +; CHECK-BE-NEXT: eor r0, r0, r3 ; CHECK-BE-NEXT: bx lr ; ; CHECK-THUMB-LABEL: xor_tree_with_shifts_i32: ; CHECK-THUMB: @ %bb.0: -; CHECK-THUMB-NEXT: eor.w r2, r3, r2, lsr #16 -; CHECK-THUMB-NEXT: eor.w r0, r1, r0, lsr #16 ; CHECK-THUMB-NEXT: eors r0, r2 +; CHECK-THUMB-NEXT: eor.w r0, r1, r0, lsr #16 +; CHECK-THUMB-NEXT: eors r0, r3 ; CHECK-THUMB-NEXT: bx lr ; ; CHECK-ALIGN-LABEL: xor_tree_with_shifts_i32: ; CHECK-ALIGN: @ %bb.0: -; CHECK-ALIGN-NEXT: eor.w r2, r3, r2, lsr #16 -; CHECK-ALIGN-NEXT: eor.w r0, r1, r0, lsr #16 ; CHECK-ALIGN-NEXT: eors r0, r2 +; CHECK-ALIGN-NEXT: eor.w r0, r1, r0, lsr #16 +; CHECK-ALIGN-NEXT: eors r0, r3 ; CHECK-ALIGN-NEXT: bx lr ; ; CHECK-V6M-LABEL: xor_tree_with_shifts_i32: ; CHECK-V6M: @ %bb.0: -; CHECK-V6M-NEXT: lsrs r2, r2, #16 -; CHECK-V6M-NEXT: eors r2, r3 +; CHECK-V6M-NEXT: eors r0, r2 ; CHECK-V6M-NEXT: lsrs r0, r0, #16 ; CHECK-V6M-NEXT: eors r0, r1 -; CHECK-V6M-NEXT: eors r0, r2 +; CHECK-V6M-NEXT: eors r0, r3 ; CHECK-V6M-NEXT: bx lr %a.shifted = lshr i32 %a, 16 %c.shifted = lshr i32 %c, 16 @@ -1052,39 +1046,38 @@ define i32 @and_tree_with_shifts_i32(i32 %a, i32 %b, i32 %c, i32 %d) { ; CHECK-ARM-LABEL: and_tree_with_shifts_i32: ; CHECK-ARM: @ %bb.0: -; CHECK-ARM-NEXT: and r2, r3, r2, asr #16 -; CHECK-ARM-NEXT: and r0, r1, r0, asr #16 ; CHECK-ARM-NEXT: and r0, r0, r2 +; CHECK-ARM-NEXT: and r0, r1, r0, asr #16 +; CHECK-ARM-NEXT: and r0, r0, r3 ; CHECK-ARM-NEXT: bx lr ; ; CHECK-BE-LABEL: and_tree_with_shifts_i32: ; CHECK-BE: @ %bb.0: -; CHECK-BE-NEXT: and r2, r3, r2, asr #16 -; CHECK-BE-NEXT: and r0, r1, r0, asr #16 ; CHECK-BE-NEXT: and r0, r0, r2 +; CHECK-BE-NEXT: and r0, r1, r0, asr #16 +; CHECK-BE-NEXT: and r0, r0, r3 ; CHECK-BE-NEXT: bx lr ; ; CHECK-THUMB-LABEL: and_tree_with_shifts_i32: ; CHECK-THUMB: @ %bb.0: -; CHECK-THUMB-NEXT: and.w r2, r3, r2, asr #16 -; CHECK-THUMB-NEXT: and.w r0, r1, r0, asr #16 ; CHECK-THUMB-NEXT: ands r0, r2 +; CHECK-THUMB-NEXT: and.w r0, r1, r0, asr #16 +; CHECK-THUMB-NEXT: ands r0, r3 ; CHECK-THUMB-NEXT: bx lr ; ; CHECK-ALIGN-LABEL: and_tree_with_shifts_i32: ; CHECK-ALIGN: @ %bb.0: -; CHECK-ALIGN-NEXT: and.w r2, r3, r2, asr #16 -; CHECK-ALIGN-NEXT: and.w r0, r1, r0, asr #16 ; CHECK-ALIGN-NEXT: ands r0, r2 +; CHECK-ALIGN-NEXT: and.w r0, r1, r0, asr #16 +; CHECK-ALIGN-NEXT: ands r0, r3 ; CHECK-ALIGN-NEXT: bx lr ; ; CHECK-V6M-LABEL: and_tree_with_shifts_i32: ; CHECK-V6M: @ %bb.0: -; CHECK-V6M-NEXT: asrs r2, r2, #16 -; CHECK-V6M-NEXT: ands r2, r3 +; CHECK-V6M-NEXT: ands r0, r2 ; CHECK-V6M-NEXT: asrs r0, r0, #16 ; CHECK-V6M-NEXT: ands r0, r1 -; CHECK-V6M-NEXT: ands r0, r2 +; CHECK-V6M-NEXT: ands r0, r3 ; CHECK-V6M-NEXT: bx lr %a.shifted = ashr i32 %a, 16 %c.shifted = ashr i32 %c, 16 @@ -1098,49 +1091,36 @@ ; CHECK-ARM-LABEL: logic_tree_with_shifts_var_i32: ; CHECK-ARM: @ %bb.0: ; CHECK-ARM-NEXT: ldr r12, [sp] -; CHECK-ARM-NEXT: orr r2, r3, r2, lsl r12 -; CHECK-ARM-NEXT: orr r0, r1, r0, lsl r12 ; CHECK-ARM-NEXT: orr r0, r0, r2 +; CHECK-ARM-NEXT: orr r0, r1, r0, lsl r12 +; CHECK-ARM-NEXT: orr r0, r0, r3 ; CHECK-ARM-NEXT: bx lr ; ; CHECK-BE-LABEL: logic_tree_with_shifts_var_i32: ; CHECK-BE: @ %bb.0: ; CHECK-BE-NEXT: ldr r12, [sp] -; CHECK-BE-NEXT: orr r2, r3, r2, lsl r12 -; CHECK-BE-NEXT: orr r0, r1, r0, lsl r12 ; CHECK-BE-NEXT: orr r0, r0, r2 +; CHECK-BE-NEXT: orr r0, r1, r0, lsl r12 +; CHECK-BE-NEXT: orr r0, r0, r3 ; CHECK-BE-NEXT: bx lr ; -; CHECK-THUMB-LABEL: logic_tree_with_shifts_var_i32: -; CHECK-THUMB: @ %bb.0: -; CHECK-THUMB-NEXT: ldr.w r12, [sp] -; CHECK-THUMB-NEXT: lsl.w r2, r2, r12 -; CHECK-THUMB-NEXT: lsl.w r0, r0, r12 -; CHECK-THUMB-NEXT: orrs r2, r3 -; CHECK-THUMB-NEXT: orrs r0, r1 -; CHECK-THUMB-NEXT: orrs r0, r2 -; CHECK-THUMB-NEXT: bx lr -; ; CHECK-ALIGN-LABEL: logic_tree_with_shifts_var_i32: ; CHECK-ALIGN: @ %bb.0: -; CHECK-ALIGN-NEXT: ldr.w r12, [sp] -; CHECK-ALIGN-NEXT: lsl.w r2, r2, r12 -; CHECK-ALIGN-NEXT: lsl.w r0, r0, r12 -; CHECK-ALIGN-NEXT: orrs r2, r3 -; CHECK-ALIGN-NEXT: orrs r0, r1 ; CHECK-ALIGN-NEXT: orrs r0, r2 +; CHECK-ALIGN-NEXT: ldr r2, [sp] +; CHECK-ALIGN-NEXT: lsls r0, r2 +; CHECK-ALIGN-NEXT: orrs r0, r1 +; CHECK-ALIGN-NEXT: orrs r0, r3 ; CHECK-ALIGN-NEXT: bx lr ; ; CHECK-V6M-LABEL: logic_tree_with_shifts_var_i32: ; CHECK-V6M: @ %bb.0: -; CHECK-V6M-NEXT: push {r4, lr} -; CHECK-V6M-NEXT: ldr r4, [sp, #8] -; CHECK-V6M-NEXT: lsls r2, r4 -; CHECK-V6M-NEXT: orrs r2, r3 -; CHECK-V6M-NEXT: lsls r0, r4 -; CHECK-V6M-NEXT: orrs r0, r1 ; CHECK-V6M-NEXT: orrs r0, r2 -; CHECK-V6M-NEXT: pop {r4, pc} +; CHECK-V6M-NEXT: ldr r2, [sp] +; CHECK-V6M-NEXT: lsls r0, r2 +; CHECK-V6M-NEXT: orrs r0, r1 +; CHECK-V6M-NEXT: orrs r0, r3 +; CHECK-V6M-NEXT: bx lr %a.shifted = shl i32 %a, %s %c.shifted = shl i32 %c, %s %or.ab = or i32 %a.shifted, %b @@ -1242,24 +1222,22 @@ define <4 x i32> @or_tree_with_shifts_vec_i32(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c, <4 x i32> %d) { ; CHECK-ARM-LABEL: or_tree_with_shifts_vec_i32: ; CHECK-ARM: @ %bb.0: -; CHECK-ARM-NEXT: vshl.i32 q8, q2, #16 -; CHECK-ARM-NEXT: vshl.i32 q9, q0, #16 -; CHECK-ARM-NEXT: vorr q8, q8, q3 -; CHECK-ARM-NEXT: vorr q9, q9, q1 -; CHECK-ARM-NEXT: vorr q0, q9, q8 +; CHECK-ARM-NEXT: vorr q8, q0, q2 +; CHECK-ARM-NEXT: vshl.i32 q8, q8, #16 +; CHECK-ARM-NEXT: vorr q8, q8, q1 +; CHECK-ARM-NEXT: vorr q0, q8, q3 ; CHECK-ARM-NEXT: bx lr ; ; CHECK-BE-LABEL: or_tree_with_shifts_vec_i32: ; CHECK-BE: @ %bb.0: ; CHECK-BE-NEXT: vrev64.32 q8, q2 ; CHECK-BE-NEXT: vrev64.32 q9, q0 -; CHECK-BE-NEXT: vshl.i32 q8, q8, #16 +; CHECK-BE-NEXT: vorr q8, q9, q8 +; CHECK-BE-NEXT: vrev64.32 q9, q1 ; CHECK-BE-NEXT: vrev64.32 q10, q3 -; CHECK-BE-NEXT: vshl.i32 q9, q9, #16 -; CHECK-BE-NEXT: vrev64.32 q11, q1 +; CHECK-BE-NEXT: vshl.i32 q8, q8, #16 +; CHECK-BE-NEXT: vorr q8, q8, q9 ; CHECK-BE-NEXT: vorr q8, q8, q10 -; CHECK-BE-NEXT: vorr q9, q9, q11 -; CHECK-BE-NEXT: vorr q8, q9, q8 ; CHECK-BE-NEXT: vrev64.32 q0, q8 ; CHECK-BE-NEXT: bx lr %a.shifted = shl <4 x i32> %a, diff --git a/llvm/test/CodeGen/PowerPC/p10-handle-split-promote-vec.ll b/llvm/test/CodeGen/PowerPC/p10-handle-split-promote-vec.ll --- a/llvm/test/CodeGen/PowerPC/p10-handle-split-promote-vec.ll +++ b/llvm/test/CodeGen/PowerPC/p10-handle-split-promote-vec.ll @@ -8,90 +8,91 @@ define i32 @SplitPromoteVectorTest(i32 %Opc) align 2 { ; CHECK-LABEL: SplitPromoteVectorTest: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: plxv v3, .LCPI0_0@PCREL(0), 1 -; CHECK-NEXT: mtvsrws v2, r3 -; CHECK-NEXT: li r5, 4 +; CHECK-NEXT: plxv v2, .LCPI0_0@PCREL(0), 1 +; CHECK-NEXT: plxv v4, .LCPI0_1@PCREL(0), 1 +; CHECK-NEXT: mtvsrws v3, r3 +; CHECK-NEXT: li r5, 12 ; CHECK-NEXT: li r8, 0 -; CHECK-NEXT: vcmpequw v3, v2, v3 -; CHECK-NEXT: vextubrx r6, r5, v3 -; CHECK-NEXT: vextubrx r4, r8, v3 -; CHECK-NEXT: rlwimi r4, r6, 1, 30, 30 -; CHECK-NEXT: li r6, 8 -; CHECK-NEXT: vextubrx r7, r6, v3 -; CHECK-NEXT: rlwimi r4, r7, 2, 29, 29 -; CHECK-NEXT: li r7, 12 -; CHECK-NEXT: vextubrx r9, r7, v3 -; CHECK-NEXT: plxv v3, .LCPI0_1@PCREL(0), 1 -; CHECK-NEXT: rlwimi r4, r9, 3, 28, 28 -; CHECK-NEXT: vcmpequw v3, v2, v3 -; CHECK-NEXT: vextubrx r9, r8, v3 -; CHECK-NEXT: rlwimi r4, r9, 4, 27, 27 -; CHECK-NEXT: vextubrx r9, r5, v3 -; CHECK-NEXT: rlwimi r4, r9, 5, 26, 26 -; CHECK-NEXT: vextubrx r9, r6, v3 -; CHECK-NEXT: rlwimi r4, r9, 6, 25, 25 -; CHECK-NEXT: vextubrx r9, r7, v3 -; CHECK-NEXT: plxv v3, .LCPI0_2@PCREL(0), 1 -; CHECK-NEXT: rlwimi r4, r9, 7, 24, 24 -; CHECK-NEXT: vcmpequw v3, v2, v3 -; CHECK-NEXT: vextubrx r9, r8, v3 -; CHECK-NEXT: rlwimi r4, r9, 8, 23, 23 -; CHECK-NEXT: vextubrx r9, r5, v3 -; CHECK-NEXT: rlwimi r4, r9, 9, 22, 22 -; CHECK-NEXT: vextubrx r9, r6, v3 -; CHECK-NEXT: rlwimi r4, r9, 10, 21, 21 -; CHECK-NEXT: vextubrx r9, r7, v3 -; CHECK-NEXT: plxv v3, .LCPI0_3@PCREL(0), 1 -; CHECK-NEXT: rlwimi r4, r9, 11, 20, 20 -; CHECK-NEXT: vcmpequw v3, v2, v3 +; CHECK-NEXT: vcmpequw v2, v3, v2 +; CHECK-NEXT: plxv v5, .LCPI0_2@PCREL(0), 1 +; CHECK-NEXT: vcmpequw v4, v3, v4 +; CHECK-NEXT: vcmpequw v5, v3, v5 +; CHECK-NEXT: vextubrx r4, r5, v2 +; CHECK-NEXT: vextubrx r6, r5, v4 +; CHECK-NEXT: or r9, r6, r4 +; CHECK-NEXT: li r6, 4 +; CHECK-NEXT: vextubrx r4, r8, v5 +; CHECK-NEXT: vextubrx r7, r6, v5 +; CHECK-NEXT: rlwimi r4, r7, 1, 30, 30 +; CHECK-NEXT: li r7, 8 +; CHECK-NEXT: vextubrx r10, r7, v5 +; CHECK-NEXT: rlwimi r4, r10, 2, 29, 29 +; CHECK-NEXT: vextubrx r10, r5, v5 +; CHECK-NEXT: plxv v5, .LCPI0_3@PCREL(0), 1 +; CHECK-NEXT: rlwimi r4, r10, 3, 28, 28 +; CHECK-NEXT: vcmpequw v5, v3, v5 +; CHECK-NEXT: vextubrx r10, r8, v5 +; CHECK-NEXT: rlwimi r4, r10, 4, 27, 27 +; CHECK-NEXT: vextubrx r10, r6, v5 +; CHECK-NEXT: rlwimi r4, r10, 5, 26, 26 +; CHECK-NEXT: vextubrx r10, r7, v5 +; CHECK-NEXT: rlwimi r4, r10, 6, 25, 25 +; CHECK-NEXT: vextubrx r10, r5, v5 +; CHECK-NEXT: plxv v5, .LCPI0_4@PCREL(0), 1 +; CHECK-NEXT: rlwimi r4, r10, 7, 24, 24 +; CHECK-NEXT: vcmpequw v5, v3, v5 +; CHECK-NEXT: vextubrx r10, r8, v5 +; CHECK-NEXT: rlwimi r4, r10, 8, 23, 23 +; CHECK-NEXT: vextubrx r10, r6, v5 +; CHECK-NEXT: rlwimi r4, r10, 9, 22, 22 +; CHECK-NEXT: vextubrx r10, r7, v5 +; CHECK-NEXT: rlwimi r4, r10, 10, 21, 21 +; CHECK-NEXT: vextubrx r10, r5, v5 +; CHECK-NEXT: rlwimi r4, r10, 11, 20, 20 +; CHECK-NEXT: vextubrx r10, r8, v4 +; CHECK-NEXT: rlwimi r4, r10, 12, 19, 19 +; CHECK-NEXT: vextubrx r10, r6, v4 +; CHECK-NEXT: rlwimi r4, r10, 13, 18, 18 +; CHECK-NEXT: vextubrx r10, r7, v4 +; CHECK-NEXT: plxv v4, .LCPI0_5@PCREL(0), 1 +; CHECK-NEXT: rlwimi r4, r10, 14, 17, 17 +; CHECK-NEXT: rlwimi r4, r9, 15, 0, 16 +; CHECK-NEXT: vcmpequw v4, v3, v4 +; CHECK-NEXT: vextubrx r10, r8, v4 +; CHECK-NEXT: vextubrx r9, r6, v4 +; CHECK-NEXT: clrlwi r10, r10, 31 +; CHECK-NEXT: rlwimi r10, r9, 1, 30, 30 +; CHECK-NEXT: vextubrx r9, r7, v4 +; CHECK-NEXT: rlwimi r10, r9, 2, 29, 29 +; CHECK-NEXT: vextubrx r9, r5, v4 +; CHECK-NEXT: plxv v4, .LCPI0_6@PCREL(0), 1 +; CHECK-NEXT: rlwimi r10, r9, 3, 28, 28 +; CHECK-NEXT: vcmpequw v4, v3, v4 +; CHECK-NEXT: vextubrx r9, r8, v4 +; CHECK-NEXT: rlwimi r10, r9, 4, 27, 27 +; CHECK-NEXT: vextubrx r9, r6, v4 +; CHECK-NEXT: rlwimi r10, r9, 5, 26, 26 +; CHECK-NEXT: vextubrx r9, r7, v4 +; CHECK-NEXT: rlwimi r10, r9, 6, 25, 25 +; CHECK-NEXT: vextubrx r9, r5, v4 +; CHECK-NEXT: plxv v4, .LCPI0_7@PCREL(0), 1 +; CHECK-NEXT: rlwimi r10, r9, 7, 24, 24 +; CHECK-NEXT: vcmpequw v3, v3, v4 ; CHECK-NEXT: vextubrx r9, r8, v3 -; CHECK-NEXT: rlwimi r4, r9, 12, 19, 19 -; CHECK-NEXT: vextubrx r9, r5, v3 -; CHECK-NEXT: rlwimi r4, r9, 13, 18, 18 +; CHECK-NEXT: vextubrx r5, r5, v3 +; CHECK-NEXT: rlwimi r10, r9, 8, 23, 23 ; CHECK-NEXT: vextubrx r9, r6, v3 -; CHECK-NEXT: rlwimi r4, r9, 14, 17, 17 +; CHECK-NEXT: rlwimi r10, r9, 9, 22, 22 ; CHECK-NEXT: vextubrx r9, r7, v3 -; CHECK-NEXT: plxv v3, .LCPI0_4@PCREL(0), 1 -; CHECK-NEXT: rlwimi r4, r9, 15, 0, 16 -; CHECK-NEXT: vcmpequw v3, v2, v3 -; CHECK-NEXT: vextubrx r10, r5, v3 -; CHECK-NEXT: vextubrx r9, r8, v3 -; CHECK-NEXT: rlwimi r9, r10, 1, 30, 30 -; CHECK-NEXT: vextubrx r10, r6, v3 -; CHECK-NEXT: rlwimi r9, r10, 2, 29, 29 -; CHECK-NEXT: vextubrx r10, r7, v3 -; CHECK-NEXT: plxv v3, .LCPI0_5@PCREL(0), 1 -; CHECK-NEXT: rlwimi r9, r10, 3, 28, 28 -; CHECK-NEXT: vcmpequw v3, v2, v3 -; CHECK-NEXT: vextubrx r10, r8, v3 -; CHECK-NEXT: rlwimi r9, r10, 4, 27, 27 -; CHECK-NEXT: vextubrx r10, r5, v3 -; CHECK-NEXT: rlwimi r9, r10, 5, 26, 26 -; CHECK-NEXT: vextubrx r10, r6, v3 -; CHECK-NEXT: rlwimi r9, r10, 6, 25, 25 -; CHECK-NEXT: vextubrx r10, r7, v3 -; CHECK-NEXT: plxv v3, .LCPI0_6@PCREL(0), 1 -; CHECK-NEXT: rlwimi r9, r10, 7, 24, 24 -; CHECK-NEXT: vcmpequw v3, v2, v3 -; CHECK-NEXT: vextubrx r10, r8, v3 -; CHECK-NEXT: rlwimi r9, r10, 8, 23, 23 -; CHECK-NEXT: vextubrx r10, r5, v3 -; CHECK-NEXT: rlwimi r9, r10, 9, 22, 22 -; CHECK-NEXT: vextubrx r10, r6, v3 -; CHECK-NEXT: rlwimi r9, r10, 10, 21, 21 -; CHECK-NEXT: vextubrx r10, r7, v3 -; CHECK-NEXT: plxv v3, .LCPI0_7@PCREL(0), 1 -; CHECK-NEXT: rlwimi r9, r10, 11, 20, 20 -; CHECK-NEXT: vcmpequw v2, v2, v3 -; CHECK-NEXT: vextubrx r8, r8, v2 -; CHECK-NEXT: vextubrx r5, r5, v2 -; CHECK-NEXT: rlwimi r9, r8, 12, 19, 19 -; CHECK-NEXT: rlwimi r9, r5, 13, 18, 18 +; CHECK-NEXT: rlwimi r10, r9, 10, 21, 21 +; CHECK-NEXT: rlwimi r10, r5, 11, 20, 20 +; CHECK-NEXT: vextubrx r5, r8, v2 +; CHECK-NEXT: rlwimi r10, r5, 12, 19, 19 ; CHECK-NEXT: vextubrx r5, r6, v2 -; CHECK-NEXT: rlwimi r9, r5, 14, 17, 17 +; CHECK-NEXT: rlwimi r10, r5, 13, 18, 18 ; CHECK-NEXT: vextubrx r5, r7, v2 -; CHECK-NEXT: rlwimi r9, r5, 15, 0, 16 -; CHECK-NEXT: or r4, r9, r4 +; CHECK-NEXT: rlwimi r10, r5, 14, 17, 17 +; CHECK-NEXT: or r4, r4, r10 ; CHECK-NEXT: andi. r4, r4, 65535 ; CHECK-NEXT: iseleq r3, 0, r3 ; CHECK-NEXT: blr diff --git a/llvm/test/CodeGen/X86/bswap_tree2.ll b/llvm/test/CodeGen/X86/bswap_tree2.ll --- a/llvm/test/CodeGen/X86/bswap_tree2.ll +++ b/llvm/test/CodeGen/X86/bswap_tree2.ll @@ -10,29 +10,23 @@ ; CHECK-LABEL: test1: ; CHECK: # %bb.0: ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax -; CHECK-NEXT: movl %eax, %ecx -; CHECK-NEXT: andl $16711680, %ecx # imm = 0xFF0000 -; CHECK-NEXT: movl %eax, %edx -; CHECK-NEXT: orl $-16777216, %edx # imm = 0xFF000000 -; CHECK-NEXT: shll $8, %ecx -; CHECK-NEXT: shrl $8, %edx -; CHECK-NEXT: orl %ecx, %edx -; CHECK-NEXT: bswapl %eax -; CHECK-NEXT: shrl $16, %eax -; CHECK-NEXT: orl %edx, %eax +; CHECK-NEXT: movzwl %ax, %ecx +; CHECK-NEXT: orl %eax, %ecx +; CHECK-NEXT: orl $-16777216, %ecx # imm = 0xFF000000 +; CHECK-NEXT: shrl $8, %ecx +; CHECK-NEXT: andl $16711935, %eax # imm = 0xFF00FF +; CHECK-NEXT: shll $8, %eax +; CHECK-NEXT: orl %ecx, %eax ; CHECK-NEXT: retl ; ; CHECK64-LABEL: test1: ; CHECK64: # %bb.0: -; CHECK64-NEXT: movl %edi, %ecx -; CHECK64-NEXT: andl $16711680, %ecx # imm = 0xFF0000 -; CHECK64-NEXT: movl %edi, %eax +; CHECK64-NEXT: movzwl %di, %eax +; CHECK64-NEXT: orl %edi, %eax ; CHECK64-NEXT: orl $-16777216, %eax # imm = 0xFF000000 -; CHECK64-NEXT: shll $8, %ecx ; CHECK64-NEXT: shrl $8, %eax -; CHECK64-NEXT: orl %ecx, %eax -; CHECK64-NEXT: bswapl %edi -; CHECK64-NEXT: shrl $16, %edi +; CHECK64-NEXT: andl $16711935, %edi # imm = 0xFF00FF +; CHECK64-NEXT: shll $8, %edi ; CHECK64-NEXT: orl %edi, %eax ; CHECK64-NEXT: retq %byte0 = and i32 %x, 255 ; 0x000000ff diff --git a/llvm/test/CodeGen/X86/shift-combine.ll b/llvm/test/CodeGen/X86/shift-combine.ll --- a/llvm/test/CodeGen/X86/shift-combine.ll +++ b/llvm/test/CodeGen/X86/shift-combine.ll @@ -511,21 +511,18 @@ ; X32-LABEL: or_tree_with_shifts_i32: ; X32: # %bb.0: ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X32-NEXT: shll $16, %ecx +; X32-NEXT: orl {{[0-9]+}}(%esp), %eax ; X32-NEXT: shll $16, %eax -; X32-NEXT: orl {{[0-9]+}}(%esp), %ecx ; X32-NEXT: orl {{[0-9]+}}(%esp), %eax -; X32-NEXT: orl %ecx, %eax +; X32-NEXT: orl {{[0-9]+}}(%esp), %eax ; X32-NEXT: retl ; ; X64-LABEL: or_tree_with_shifts_i32: ; X64: # %bb.0: -; X64-NEXT: movl %edx, %eax +; X64-NEXT: movl %esi, %eax +; X64-NEXT: orl %edx, %edi ; X64-NEXT: shll $16, %edi -; X64-NEXT: shll $16, %eax ; X64-NEXT: orl %ecx, %eax -; X64-NEXT: orl %esi, %eax ; X64-NEXT: orl %edi, %eax ; X64-NEXT: retq %a.shifted = shl i32 %a, 16 @@ -539,20 +536,19 @@ define i32 @xor_tree_with_shifts_i32(i32 %a, i32 %b, i32 %c, i32 %d) { ; X32-LABEL: xor_tree_with_shifts_i32: ; X32: # %bb.0: -; X32-NEXT: movzwl {{[0-9]+}}(%esp), %eax -; X32-NEXT: movzwl {{[0-9]+}}(%esp), %ecx -; X32-NEXT: xorl {{[0-9]+}}(%esp), %ecx +; X32-NEXT: movl {{[0-9]+}}(%esp), %eax +; X32-NEXT: xorl {{[0-9]+}}(%esp), %eax +; X32-NEXT: shrl $16, %eax +; X32-NEXT: xorl {{[0-9]+}}(%esp), %eax ; X32-NEXT: xorl {{[0-9]+}}(%esp), %eax -; X32-NEXT: xorl %ecx, %eax ; X32-NEXT: retl ; ; X64-LABEL: xor_tree_with_shifts_i32: ; X64: # %bb.0: -; X64-NEXT: movl %edx, %eax +; X64-NEXT: movl %esi, %eax +; X64-NEXT: xorl %edx, %edi ; X64-NEXT: shrl $16, %edi -; X64-NEXT: shrl $16, %eax ; X64-NEXT: xorl %ecx, %eax -; X64-NEXT: xorl %esi, %eax ; X64-NEXT: xorl %edi, %eax ; X64-NEXT: retq %a.shifted = lshr i32 %a, 16 @@ -575,11 +571,10 @@ ; ; X64-LABEL: and_tree_with_shifts_i32: ; X64: # %bb.0: -; X64-NEXT: movl %edx, %eax +; X64-NEXT: movl %esi, %eax +; X64-NEXT: andl %edx, %edi ; X64-NEXT: sarl $16, %edi -; X64-NEXT: sarl $16, %eax ; X64-NEXT: andl %ecx, %eax -; X64-NEXT: andl %esi, %eax ; X64-NEXT: andl %edi, %eax ; X64-NEXT: retq %a.shifted = ashr i32 %a, 16 @@ -593,23 +588,20 @@ define i32 @logic_tree_with_shifts_var_i32(i32 %a, i32 %b, i32 %c, i32 %d, i32 %s) { ; X32-LABEL: logic_tree_with_shifts_var_i32: ; X32: # %bb.0: -; X32-NEXT: movl {{[0-9]+}}(%esp), %eax ; X32-NEXT: movzbl {{[0-9]+}}(%esp), %ecx -; X32-NEXT: movl {{[0-9]+}}(%esp), %edx -; X32-NEXT: shll %cl, %edx +; X32-NEXT: movl {{[0-9]+}}(%esp), %eax +; X32-NEXT: orl {{[0-9]+}}(%esp), %eax ; X32-NEXT: shll %cl, %eax -; X32-NEXT: orl {{[0-9]+}}(%esp), %edx ; X32-NEXT: orl {{[0-9]+}}(%esp), %eax -; X32-NEXT: orl %edx, %eax +; X32-NEXT: orl {{[0-9]+}}(%esp), %eax ; X32-NEXT: retl ; ; X64-LABEL: logic_tree_with_shifts_var_i32: ; X64: # %bb.0: ; X64-NEXT: movl %ecx, %eax +; X64-NEXT: orl %edx, %edi ; X64-NEXT: movl %r8d, %ecx ; X64-NEXT: shll %cl, %edi -; X64-NEXT: shll %cl, %edx -; X64-NEXT: orl %edx, %eax ; X64-NEXT: orl %esi, %eax ; X64-NEXT: orl %edi, %eax ; X64-NEXT: retq @@ -681,11 +673,10 @@ define <4 x i32> @or_tree_with_shifts_vec_i32(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c, <4 x i32> %d) { ; X64-LABEL: or_tree_with_shifts_vec_i32: ; X64: # %bb.0: -; X64-NEXT: pslld $16, %xmm0 -; X64-NEXT: pslld $16, %xmm2 -; X64-NEXT: por %xmm3, %xmm2 -; X64-NEXT: por %xmm1, %xmm2 ; X64-NEXT: por %xmm2, %xmm0 +; X64-NEXT: pslld $16, %xmm0 +; X64-NEXT: por %xmm3, %xmm1 +; X64-NEXT: por %xmm1, %xmm0 ; X64-NEXT: retq %a.shifted = shl <4 x i32> %a, %c.shifted = shl <4 x i32> %c,