diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp @@ -4216,18 +4216,55 @@ void DAGTypeLegalizer::ExpandIntRes_Rotate(SDNode *N, SDValue &Lo, SDValue &Hi) { - // Lower the rotate to shifts and ORs which can be expanded. - SDValue Res; - TLI.expandROT(N, true /*AllowVectorOps*/, Res, DAG); + // Delegate to funnel-shift expansion. + SDLoc dl(N); + unsigned Opcode = N->getOpcode() == ISD::ROTL ? ISD::FSHL : ISD::FSHR; + SDValue Res = DAG.getNode(Opcode, dl, N->getValueType(0), N->getOperand(0), + N->getOperand(0), N->getOperand(1)); SplitInteger(Res, Lo, Hi); } void DAGTypeLegalizer::ExpandIntRes_FunnelShift(SDNode *N, SDValue &Lo, SDValue &Hi) { - // Lower the funnel shift to shifts and ORs which can be expanded. - SDValue Res; - TLI.expandFunnelShift(N, Res, DAG); - SplitInteger(Res, Lo, Hi); + SDLoc dl(N); + // Values numbered from least significant to most significant. + SDValue In1, In2, In3, In4; + GetExpandedInteger(N->getOperand(0), In3, In4); + GetExpandedInteger(N->getOperand(1), In1, In2); + EVT HalfVT = In1.getValueType(); + + SDValue ShAmt = N->getOperand(2); + EVT ShAmtVT = ShAmt.getValueType(); + EVT ShAmtCCVT = getSetCCResultType(ShAmtVT); + + // If the shift amount is at least half the bitwidth, swap the inputs + unsigned HalfVTBits = HalfVT.getScalarSizeInBits(); + SDValue AndNode = DAG.getNode(ISD::AND, dl, ShAmtVT, ShAmt, + DAG.getConstant(HalfVTBits, dl, ShAmtVT)); + SDValue Cond = DAG.getSetCC(dl, ShAmtCCVT, AndNode, + DAG.getConstant(0, dl, ShAmtVT), ISD::SETNE); + + // Expand to a pair of funnel shifts. + EVT NewShAmtVT = TLI.getShiftAmountTy(HalfVT, DAG.getDataLayout()); + SDValue NewShAmt = DAG.getAnyExtOrTrunc(ShAmt, dl, NewShAmtVT); + + if (N->getOpcode() == ISD::FSHL) { + SDValue Select2, Select3, Select4; + Select4 = DAG.getNode(ISD::SELECT, dl, HalfVT, Cond, In3, In4); + Select3 = DAG.getNode(ISD::SELECT, dl, HalfVT, Cond, In2, In3); + Select2 = DAG.getNode(ISD::SELECT, dl, HalfVT, Cond, In1, In2); + + Lo = DAG.getNode(ISD::FSHL, dl, HalfVT, Select3, Select2, NewShAmt); + Hi = DAG.getNode(ISD::FSHL, dl, HalfVT, Select4, Select3, NewShAmt); + } else { + SDValue Select1, Select2, Select3; + Select3 = DAG.getNode(ISD::SELECT, dl, HalfVT, Cond, In4, In3); + Select2 = DAG.getNode(ISD::SELECT, dl, HalfVT, Cond, In3, In2); + Select1 = DAG.getNode(ISD::SELECT, dl, HalfVT, Cond, In2, In1); + + Lo = DAG.getNode(ISD::FSHR, dl, HalfVT, Select2, Select1, NewShAmt); + Hi = DAG.getNode(ISD::FSHR, dl, HalfVT, Select3, Select2, NewShAmt); + } } void DAGTypeLegalizer::ExpandIntRes_VSCALE(SDNode *N, SDValue &Lo, diff --git a/llvm/test/CodeGen/ARM/funnel-shift-rot.ll b/llvm/test/CodeGen/ARM/funnel-shift-rot.ll --- a/llvm/test/CodeGen/ARM/funnel-shift-rot.ll +++ b/llvm/test/CodeGen/ARM/funnel-shift-rot.ll @@ -67,61 +67,24 @@ } define i64 @rotl_i64(i64 %x, i64 %z) { -; SCALAR-LABEL: rotl_i64: -; SCALAR: @ %bb.0: -; SCALAR-NEXT: .save {r4, r5, r11, lr} -; SCALAR-NEXT: push {r4, r5, r11, lr} -; SCALAR-NEXT: rsb r3, r2, #0 -; SCALAR-NEXT: and r4, r2, #63 -; SCALAR-NEXT: and lr, r3, #63 -; SCALAR-NEXT: rsb r3, lr, #32 -; SCALAR-NEXT: lsl r2, r0, r4 -; SCALAR-NEXT: lsr r12, r0, lr -; SCALAR-NEXT: orr r3, r12, r1, lsl r3 -; SCALAR-NEXT: subs r12, lr, #32 -; SCALAR-NEXT: lsrpl r3, r1, r12 -; SCALAR-NEXT: subs r5, r4, #32 -; SCALAR-NEXT: movwpl r2, #0 -; SCALAR-NEXT: cmp r5, #0 -; SCALAR-NEXT: orr r2, r2, r3 -; SCALAR-NEXT: rsb r3, r4, #32 -; SCALAR-NEXT: lsr r3, r0, r3 -; SCALAR-NEXT: orr r3, r3, r1, lsl r4 -; SCALAR-NEXT: lslpl r3, r0, r5 -; SCALAR-NEXT: lsr r0, r1, lr -; SCALAR-NEXT: cmp r12, #0 -; SCALAR-NEXT: movwpl r0, #0 -; SCALAR-NEXT: orr r1, r3, r0 -; SCALAR-NEXT: mov r0, r2 -; SCALAR-NEXT: pop {r4, r5, r11, pc} -; -; NEON-LABEL: rotl_i64: -; NEON: @ %bb.0: -; NEON-NEXT: .save {r4, r5, r11, lr} -; NEON-NEXT: push {r4, r5, r11, lr} -; NEON-NEXT: and r12, r2, #63 -; NEON-NEXT: rsb r2, r2, #0 -; NEON-NEXT: rsb r3, r12, #32 -; NEON-NEXT: and r4, r2, #63 -; NEON-NEXT: subs lr, r12, #32 -; NEON-NEXT: lsr r3, r0, r3 -; NEON-NEXT: lsr r2, r1, r4 -; NEON-NEXT: orr r3, r3, r1, lsl r12 -; NEON-NEXT: lslpl r3, r0, lr -; NEON-NEXT: subs r5, r4, #32 -; NEON-NEXT: movwpl r2, #0 -; NEON-NEXT: cmp r5, #0 -; NEON-NEXT: orr r2, r3, r2 -; NEON-NEXT: lsr r3, r0, r4 -; NEON-NEXT: rsb r4, r4, #32 -; NEON-NEXT: lsl r0, r0, r12 -; NEON-NEXT: orr r3, r3, r1, lsl r4 -; NEON-NEXT: lsrpl r3, r1, r5 -; NEON-NEXT: cmp lr, #0 -; NEON-NEXT: movwpl r0, #0 -; NEON-NEXT: mov r1, r2 -; NEON-NEXT: orr r0, r0, r3 -; NEON-NEXT: pop {r4, r5, r11, pc} +; CHECK-LABEL: rotl_i64: +; CHECK: @ %bb.0: +; CHECK-NEXT: .save {r4, lr} +; CHECK-NEXT: push {r4, lr} +; CHECK-NEXT: ands r3, r2, #32 +; CHECK-NEXT: and r12, r2, #31 +; CHECK-NEXT: mov r3, r0 +; CHECK-NEXT: mov r4, #31 +; CHECK-NEXT: movne r3, r1 +; CHECK-NEXT: movne r1, r0 +; CHECK-NEXT: bic r2, r4, r2 +; CHECK-NEXT: lsl lr, r3, r12 +; CHECK-NEXT: lsr r0, r1, #1 +; CHECK-NEXT: lsl r1, r1, r12 +; CHECK-NEXT: lsr r3, r3, #1 +; CHECK-NEXT: orr r0, lr, r0, lsr r2 +; CHECK-NEXT: orr r1, r1, r3, lsr r2 +; CHECK-NEXT: pop {r4, pc} %f = call i64 @llvm.fshl.i64(i64 %x, i64 %x, i64 %z) ret i64 %f } @@ -243,31 +206,22 @@ define i64 @rotr_i64(i64 %x, i64 %z) { ; CHECK-LABEL: rotr_i64: ; CHECK: @ %bb.0: -; CHECK-NEXT: .save {r4, r5, r11, lr} -; CHECK-NEXT: push {r4, r5, r11, lr} -; CHECK-NEXT: and lr, r2, #63 -; CHECK-NEXT: rsb r2, r2, #0 -; CHECK-NEXT: rsb r3, lr, #32 -; CHECK-NEXT: and r4, r2, #63 -; CHECK-NEXT: lsr r12, r0, lr -; CHECK-NEXT: orr r3, r12, r1, lsl r3 -; CHECK-NEXT: subs r12, lr, #32 -; CHECK-NEXT: lsl r2, r0, r4 -; CHECK-NEXT: lsrpl r3, r1, r12 -; CHECK-NEXT: subs r5, r4, #32 -; CHECK-NEXT: movwpl r2, #0 -; CHECK-NEXT: cmp r5, #0 -; CHECK-NEXT: orr r2, r3, r2 -; CHECK-NEXT: rsb r3, r4, #32 -; CHECK-NEXT: lsr r3, r0, r3 -; CHECK-NEXT: orr r3, r3, r1, lsl r4 -; CHECK-NEXT: lslpl r3, r0, r5 -; CHECK-NEXT: lsr r0, r1, lr -; CHECK-NEXT: cmp r12, #0 -; CHECK-NEXT: movwpl r0, #0 -; CHECK-NEXT: orr r1, r0, r3 -; CHECK-NEXT: mov r0, r2 -; CHECK-NEXT: pop {r4, r5, r11, pc} +; CHECK-NEXT: .save {r11, lr} +; CHECK-NEXT: push {r11, lr} +; CHECK-NEXT: ands r3, r2, #32 +; CHECK-NEXT: mov r3, r0 +; CHECK-NEXT: movne r3, r1 +; CHECK-NEXT: movne r1, r0 +; CHECK-NEXT: mov r0, #31 +; CHECK-NEXT: bic lr, r0, r2 +; CHECK-NEXT: lsl r12, r1, #1 +; CHECK-NEXT: and r2, r2, #31 +; CHECK-NEXT: lsl r0, r12, lr +; CHECK-NEXT: orr r0, r0, r3, lsr r2 +; CHECK-NEXT: lsl r3, r3, #1 +; CHECK-NEXT: lsl r3, r3, lr +; CHECK-NEXT: orr r1, r3, r1, lsr r2 +; CHECK-NEXT: pop {r11, pc} %f = call i64 @llvm.fshr.i64(i64 %x, i64 %x, i64 %z) ret i64 %f } diff --git a/llvm/test/CodeGen/ARM/funnel-shift.ll b/llvm/test/CodeGen/ARM/funnel-shift.ll --- a/llvm/test/CodeGen/ARM/funnel-shift.ll +++ b/llvm/test/CodeGen/ARM/funnel-shift.ll @@ -45,46 +45,69 @@ ; Verify that weird types are minimally supported. declare i37 @llvm.fshl.i37(i37, i37, i37) define i37 @fshl_i37(i37 %x, i37 %y, i37 %z) { -; CHECK-LABEL: fshl_i37: -; CHECK: @ %bb.0: -; CHECK-NEXT: .save {r4, r5, r6, r7, r8, lr} -; CHECK-NEXT: push {r4, r5, r6, r7, r8, lr} -; CHECK-NEXT: mov r8, r1 -; CHECK-NEXT: mov r4, r0 -; CHECK-NEXT: ldr r0, [sp, #24] -; CHECK-NEXT: mov r6, r3 -; CHECK-NEXT: ldr r1, [sp, #28] -; CHECK-NEXT: mov r7, r2 -; CHECK-NEXT: mov r2, #37 -; CHECK-NEXT: mov r3, #0 -; CHECK-NEXT: bl __aeabi_uldivmod -; CHECK-NEXT: mov r0, #63 -; CHECK-NEXT: bic r1, r0, r2 -; CHECK-NEXT: lsl r0, r6, #27 -; CHECK-NEXT: lsl r3, r7, #27 -; CHECK-NEXT: orr r0, r0, r7, lsr #5 -; CHECK-NEXT: and r2, r2, #63 -; CHECK-NEXT: lsrs r7, r0, #1 -; CHECK-NEXT: rrx r0, r3 -; CHECK-NEXT: rsb r3, r1, #32 -; CHECK-NEXT: lsr r0, r0, r1 -; CHECK-NEXT: lsl r6, r4, r2 -; CHECK-NEXT: orr r0, r0, r7, lsl r3 -; CHECK-NEXT: subs r3, r1, #32 -; CHECK-NEXT: lsr r1, r7, r1 -; CHECK-NEXT: lsrpl r0, r7, r3 -; CHECK-NEXT: subs r5, r2, #32 -; CHECK-NEXT: movwpl r6, #0 -; CHECK-NEXT: orr r0, r6, r0 -; CHECK-NEXT: rsb r6, r2, #32 -; CHECK-NEXT: cmp r5, #0 -; CHECK-NEXT: lsr r6, r4, r6 -; CHECK-NEXT: orr r2, r6, r8, lsl r2 -; CHECK-NEXT: lslpl r2, r4, r5 -; CHECK-NEXT: cmp r3, #0 -; CHECK-NEXT: movwpl r1, #0 -; CHECK-NEXT: orr r1, r2, r1 -; CHECK-NEXT: pop {r4, r5, r6, r7, r8, pc} +; SCALAR-LABEL: fshl_i37: +; SCALAR: @ %bb.0: +; SCALAR-NEXT: .save {r4, r5, r6, r7, r8, lr} +; SCALAR-NEXT: push {r4, r5, r6, r7, r8, lr} +; SCALAR-NEXT: mov r6, r1 +; SCALAR-NEXT: mov r8, r0 +; SCALAR-NEXT: ldr r0, [sp, #24] +; SCALAR-NEXT: mov r4, r3 +; SCALAR-NEXT: ldr r1, [sp, #28] +; SCALAR-NEXT: mov r5, r2 +; SCALAR-NEXT: mov r2, #37 +; SCALAR-NEXT: mov r3, #0 +; SCALAR-NEXT: bl __aeabi_uldivmod +; SCALAR-NEXT: lsl r1, r4, #27 +; SCALAR-NEXT: ands r12, r2, #32 +; SCALAR-NEXT: orr r1, r1, r5, lsr #5 +; SCALAR-NEXT: mov r3, r8 +; SCALAR-NEXT: and r4, r2, #31 +; SCALAR-NEXT: mov r0, #31 +; SCALAR-NEXT: movne r3, r1 +; SCALAR-NEXT: cmp r12, #0 +; SCALAR-NEXT: bic r2, r0, r2 +; SCALAR-NEXT: lslne r1, r5, #27 +; SCALAR-NEXT: movne r6, r8 +; SCALAR-NEXT: lsl r7, r3, r4 +; SCALAR-NEXT: lsr r0, r1, #1 +; SCALAR-NEXT: lsl r1, r6, r4 +; SCALAR-NEXT: lsr r3, r3, #1 +; SCALAR-NEXT: orr r0, r7, r0, lsr r2 +; SCALAR-NEXT: orr r1, r1, r3, lsr r2 +; SCALAR-NEXT: pop {r4, r5, r6, r7, r8, pc} +; +; NEON-LABEL: fshl_i37: +; NEON: @ %bb.0: +; NEON-NEXT: .save {r4, r5, r6, r7, r11, lr} +; NEON-NEXT: push {r4, r5, r6, r7, r11, lr} +; NEON-NEXT: mov r6, r1 +; NEON-NEXT: mov r4, r0 +; NEON-NEXT: ldr r0, [sp, #24] +; NEON-NEXT: mov r7, r3 +; NEON-NEXT: ldr r1, [sp, #28] +; NEON-NEXT: mov r5, r2 +; NEON-NEXT: mov r2, #37 +; NEON-NEXT: mov r3, #0 +; NEON-NEXT: bl __aeabi_uldivmod +; NEON-NEXT: mov r0, #31 +; NEON-NEXT: bic r1, r0, r2 +; NEON-NEXT: lsl r0, r7, #27 +; NEON-NEXT: ands r12, r2, #32 +; NEON-NEXT: orr r0, r0, r5, lsr #5 +; NEON-NEXT: mov r7, r4 +; NEON-NEXT: and r2, r2, #31 +; NEON-NEXT: movne r7, r0 +; NEON-NEXT: lslne r0, r5, #27 +; NEON-NEXT: cmp r12, #0 +; NEON-NEXT: lsl r3, r7, r2 +; NEON-NEXT: lsr r0, r0, #1 +; NEON-NEXT: movne r6, r4 +; NEON-NEXT: orr r0, r3, r0, lsr r1 +; NEON-NEXT: lsr r3, r7, #1 +; NEON-NEXT: lsl r2, r6, r2 +; NEON-NEXT: orr r1, r2, r3, lsr r1 +; NEON-NEXT: pop {r4, r5, r6, r7, r11, pc} %f = call i37 @llvm.fshl.i37(i37 %x, i37 %y, i37 %z) ret i37 %f } @@ -157,8 +180,8 @@ define i64 @fshl_i64_const_overshift(i64 %x, i64 %y) { ; CHECK-LABEL: fshl_i64_const_overshift: ; CHECK: @ %bb.0: -; CHECK-NEXT: lsr r1, r2, #23 -; CHECK-NEXT: orr r2, r1, r3, lsl #9 +; CHECK-NEXT: lsl r1, r3, #9 +; CHECK-NEXT: orr r2, r1, r2, lsr #23 ; CHECK-NEXT: lsl r0, r0, #9 ; CHECK-NEXT: orr r1, r0, r3, lsr #23 ; CHECK-NEXT: mov r0, r2 @@ -212,46 +235,36 @@ define i37 @fshr_i37(i37 %x, i37 %y, i37 %z) { ; CHECK-LABEL: fshr_i37: ; CHECK: @ %bb.0: -; CHECK-NEXT: .save {r4, r5, r6, r7, r8, r9, r11, lr} -; CHECK-NEXT: push {r4, r5, r6, r7, r8, r9, r11, lr} -; CHECK-NEXT: mov r8, r1 -; CHECK-NEXT: mov r9, r0 -; CHECK-NEXT: ldr r0, [sp, #32] -; CHECK-NEXT: mov r6, r3 -; CHECK-NEXT: ldr r1, [sp, #36] -; CHECK-NEXT: mov r7, r2 +; CHECK-NEXT: .save {r4, r5, r6, r7, r11, lr} +; CHECK-NEXT: push {r4, r5, r6, r7, r11, lr} +; CHECK-NEXT: mov r6, r1 +; CHECK-NEXT: mov r7, r0 +; CHECK-NEXT: ldr r0, [sp, #24] +; CHECK-NEXT: mov r4, r3 +; CHECK-NEXT: ldr r1, [sp, #28] +; CHECK-NEXT: mov r5, r2 ; CHECK-NEXT: mov r2, #37 ; CHECK-NEXT: mov r3, #0 ; CHECK-NEXT: bl __aeabi_uldivmod +; CHECK-NEXT: lsl r3, r4, #27 ; CHECK-NEXT: add r0, r2, #27 -; CHECK-NEXT: lsl r6, r6, #27 -; CHECK-NEXT: and r1, r0, #63 -; CHECK-NEXT: lsl r2, r7, #27 -; CHECK-NEXT: orr r7, r6, r7, lsr #5 -; CHECK-NEXT: mov r6, #63 -; CHECK-NEXT: rsb r3, r1, #32 -; CHECK-NEXT: lsr r2, r2, r1 -; CHECK-NEXT: subs r12, r1, #32 -; CHECK-NEXT: bic r6, r6, r0 -; CHECK-NEXT: orr r2, r2, r7, lsl r3 -; CHECK-NEXT: lsl r5, r9, #1 -; CHECK-NEXT: lsrpl r2, r7, r12 -; CHECK-NEXT: lsl r0, r5, r6 -; CHECK-NEXT: subs r4, r6, #32 -; CHECK-NEXT: lsl r3, r8, #1 -; CHECK-NEXT: movwpl r0, #0 -; CHECK-NEXT: orr r3, r3, r9, lsr #31 -; CHECK-NEXT: orr r0, r0, r2 -; CHECK-NEXT: rsb r2, r6, #32 -; CHECK-NEXT: cmp r4, #0 -; CHECK-NEXT: lsr r1, r7, r1 -; CHECK-NEXT: lsr r2, r5, r2 -; CHECK-NEXT: orr r2, r2, r3, lsl r6 -; CHECK-NEXT: lslpl r2, r5, r4 +; CHECK-NEXT: orr r3, r3, r5, lsr #5 +; CHECK-NEXT: mov r1, #31 +; CHECK-NEXT: ands r12, r0, #32 +; CHECK-NEXT: bic r1, r1, r0 +; CHECK-NEXT: mov r4, r3 +; CHECK-NEXT: lsleq r3, r5, #27 +; CHECK-NEXT: movne r4, r7 ; CHECK-NEXT: cmp r12, #0 -; CHECK-NEXT: movwpl r1, #0 -; CHECK-NEXT: orr r1, r2, r1 -; CHECK-NEXT: pop {r4, r5, r6, r7, r8, r9, r11, pc} +; CHECK-NEXT: lsl r2, r4, #1 +; CHECK-NEXT: and r5, r0, #31 +; CHECK-NEXT: lsl r2, r2, r1 +; CHECK-NEXT: moveq r6, r7 +; CHECK-NEXT: orr r0, r2, r3, lsr r5 +; CHECK-NEXT: lsl r2, r6, #1 +; CHECK-NEXT: lsl r1, r2, r1 +; CHECK-NEXT: orr r1, r1, r4, lsr r5 +; CHECK-NEXT: pop {r4, r5, r6, r7, r11, pc} %f = call i37 @llvm.fshr.i37(i37 %x, i37 %y, i37 %z) ret i37 %f } diff --git a/llvm/test/CodeGen/Mips/funnel-shift-rot.ll b/llvm/test/CodeGen/Mips/funnel-shift-rot.ll --- a/llvm/test/CodeGen/Mips/funnel-shift-rot.ll +++ b/llvm/test/CodeGen/Mips/funnel-shift-rot.ll @@ -76,59 +76,43 @@ define i64 @rotl_i64(i64 %x, i64 %z) { ; CHECK-BE-LABEL: rotl_i64: ; CHECK-BE: # %bb.0: -; CHECK-BE-NEXT: negu $1, $7 -; CHECK-BE-NEXT: andi $3, $1, 63 -; CHECK-BE-NEXT: srlv $6, $4, $3 -; CHECK-BE-NEXT: andi $1, $1, 32 -; CHECK-BE-NEXT: andi $2, $7, 63 -; CHECK-BE-NEXT: move $8, $6 -; CHECK-BE-NEXT: movn $8, $zero, $1 -; CHECK-BE-NEXT: sllv $9, $4, $2 -; CHECK-BE-NEXT: srl $10, $5, 1 -; CHECK-BE-NEXT: not $11, $2 -; CHECK-BE-NEXT: srlv $10, $10, $11 -; CHECK-BE-NEXT: or $9, $9, $10 -; CHECK-BE-NEXT: sllv $10, $5, $2 -; CHECK-BE-NEXT: andi $7, $7, 32 -; CHECK-BE-NEXT: movn $9, $10, $7 -; CHECK-BE-NEXT: or $2, $9, $8 -; CHECK-BE-NEXT: srlv $5, $5, $3 -; CHECK-BE-NEXT: not $3, $3 -; CHECK-BE-NEXT: sll $4, $4, 1 -; CHECK-BE-NEXT: sllv $3, $4, $3 -; CHECK-BE-NEXT: or $3, $3, $5 -; CHECK-BE-NEXT: movn $3, $6, $1 -; CHECK-BE-NEXT: movn $10, $zero, $7 +; CHECK-BE-NEXT: srl $1, $7, 5 +; CHECK-BE-NEXT: andi $1, $1, 1 +; CHECK-BE-NEXT: move $3, $4 +; CHECK-BE-NEXT: movn $3, $5, $1 +; CHECK-BE-NEXT: andi $6, $7, 31 +; CHECK-BE-NEXT: sllv $2, $3, $6 +; CHECK-BE-NEXT: movn $5, $4, $1 +; CHECK-BE-NEXT: srl $1, $5, 1 +; CHECK-BE-NEXT: not $4, $7 +; CHECK-BE-NEXT: andi $4, $4, 31 +; CHECK-BE-NEXT: srlv $1, $1, $4 +; CHECK-BE-NEXT: or $2, $2, $1 +; CHECK-BE-NEXT: sllv $1, $5, $6 +; CHECK-BE-NEXT: srl $3, $3, 1 +; CHECK-BE-NEXT: srlv $3, $3, $4 ; CHECK-BE-NEXT: jr $ra -; CHECK-BE-NEXT: or $3, $10, $3 +; CHECK-BE-NEXT: or $3, $1, $3 ; ; CHECK-LE-LABEL: rotl_i64: ; CHECK-LE: # %bb.0: -; CHECK-LE-NEXT: negu $1, $6 -; CHECK-LE-NEXT: andi $2, $1, 63 -; CHECK-LE-NEXT: srlv $7, $5, $2 -; CHECK-LE-NEXT: andi $1, $1, 32 -; CHECK-LE-NEXT: andi $3, $6, 63 -; CHECK-LE-NEXT: move $8, $7 -; CHECK-LE-NEXT: movn $8, $zero, $1 -; CHECK-LE-NEXT: sllv $9, $5, $3 -; CHECK-LE-NEXT: srl $10, $4, 1 -; CHECK-LE-NEXT: not $11, $3 -; CHECK-LE-NEXT: srlv $10, $10, $11 -; CHECK-LE-NEXT: or $9, $9, $10 -; CHECK-LE-NEXT: sllv $10, $4, $3 -; CHECK-LE-NEXT: andi $6, $6, 32 -; CHECK-LE-NEXT: movn $9, $10, $6 -; CHECK-LE-NEXT: or $3, $9, $8 -; CHECK-LE-NEXT: srlv $4, $4, $2 -; CHECK-LE-NEXT: not $2, $2 -; CHECK-LE-NEXT: sll $5, $5, 1 -; CHECK-LE-NEXT: sllv $2, $5, $2 -; CHECK-LE-NEXT: or $2, $2, $4 -; CHECK-LE-NEXT: movn $2, $7, $1 -; CHECK-LE-NEXT: movn $10, $zero, $6 +; CHECK-LE-NEXT: srl $1, $6, 5 +; CHECK-LE-NEXT: andi $1, $1, 1 +; CHECK-LE-NEXT: move $3, $4 +; CHECK-LE-NEXT: movn $3, $5, $1 +; CHECK-LE-NEXT: andi $7, $6, 31 +; CHECK-LE-NEXT: sllv $2, $3, $7 +; CHECK-LE-NEXT: movn $5, $4, $1 +; CHECK-LE-NEXT: srl $1, $5, 1 +; CHECK-LE-NEXT: not $4, $6 +; CHECK-LE-NEXT: andi $4, $4, 31 +; CHECK-LE-NEXT: srlv $1, $1, $4 +; CHECK-LE-NEXT: or $2, $2, $1 +; CHECK-LE-NEXT: sllv $1, $5, $7 +; CHECK-LE-NEXT: srl $3, $3, 1 +; CHECK-LE-NEXT: srlv $3, $3, $4 ; CHECK-LE-NEXT: jr $ra -; CHECK-LE-NEXT: or $2, $10, $2 +; CHECK-LE-NEXT: or $3, $1, $3 %f = call i64 @llvm.fshl.i64(i64 %x, i64 %x, i64 %z) ret i64 %f } @@ -254,59 +238,43 @@ define i64 @rotr_i64(i64 %x, i64 %z) { ; CHECK-BE-LABEL: rotr_i64: ; CHECK-BE: # %bb.0: -; CHECK-BE-NEXT: negu $1, $7 -; CHECK-BE-NEXT: andi $2, $1, 63 -; CHECK-BE-NEXT: sllv $6, $5, $2 -; CHECK-BE-NEXT: andi $1, $1, 32 -; CHECK-BE-NEXT: andi $3, $7, 63 -; CHECK-BE-NEXT: move $8, $6 -; CHECK-BE-NEXT: movn $8, $zero, $1 -; CHECK-BE-NEXT: srlv $9, $5, $3 -; CHECK-BE-NEXT: sll $10, $4, 1 -; CHECK-BE-NEXT: not $11, $3 -; CHECK-BE-NEXT: sllv $10, $10, $11 -; CHECK-BE-NEXT: or $9, $10, $9 -; CHECK-BE-NEXT: srlv $10, $4, $3 -; CHECK-BE-NEXT: andi $7, $7, 32 -; CHECK-BE-NEXT: movn $9, $10, $7 -; CHECK-BE-NEXT: or $3, $9, $8 -; CHECK-BE-NEXT: sllv $4, $4, $2 -; CHECK-BE-NEXT: not $2, $2 -; CHECK-BE-NEXT: srl $5, $5, 1 -; CHECK-BE-NEXT: srlv $2, $5, $2 -; CHECK-BE-NEXT: or $2, $4, $2 -; CHECK-BE-NEXT: movn $2, $6, $1 -; CHECK-BE-NEXT: movn $10, $zero, $7 +; CHECK-BE-NEXT: srl $1, $7, 5 +; CHECK-BE-NEXT: andi $1, $1, 1 +; CHECK-BE-NEXT: move $3, $4 +; CHECK-BE-NEXT: movn $3, $5, $1 +; CHECK-BE-NEXT: andi $6, $7, 31 +; CHECK-BE-NEXT: srlv $2, $3, $6 +; CHECK-BE-NEXT: movn $5, $4, $1 +; CHECK-BE-NEXT: sll $1, $5, 1 +; CHECK-BE-NEXT: not $4, $7 +; CHECK-BE-NEXT: andi $4, $4, 31 +; CHECK-BE-NEXT: sllv $1, $1, $4 +; CHECK-BE-NEXT: or $2, $1, $2 +; CHECK-BE-NEXT: srlv $1, $5, $6 +; CHECK-BE-NEXT: sll $3, $3, 1 +; CHECK-BE-NEXT: sllv $3, $3, $4 ; CHECK-BE-NEXT: jr $ra -; CHECK-BE-NEXT: or $2, $10, $2 +; CHECK-BE-NEXT: or $3, $3, $1 ; ; CHECK-LE-LABEL: rotr_i64: ; CHECK-LE: # %bb.0: -; CHECK-LE-NEXT: negu $1, $6 -; CHECK-LE-NEXT: andi $3, $1, 63 -; CHECK-LE-NEXT: sllv $7, $4, $3 -; CHECK-LE-NEXT: andi $1, $1, 32 -; CHECK-LE-NEXT: andi $2, $6, 63 -; CHECK-LE-NEXT: move $8, $7 -; CHECK-LE-NEXT: movn $8, $zero, $1 -; CHECK-LE-NEXT: srlv $9, $4, $2 -; CHECK-LE-NEXT: sll $10, $5, 1 -; CHECK-LE-NEXT: not $11, $2 -; CHECK-LE-NEXT: sllv $10, $10, $11 -; CHECK-LE-NEXT: or $9, $10, $9 -; CHECK-LE-NEXT: srlv $10, $5, $2 -; CHECK-LE-NEXT: andi $6, $6, 32 -; CHECK-LE-NEXT: movn $9, $10, $6 -; CHECK-LE-NEXT: or $2, $9, $8 -; CHECK-LE-NEXT: sllv $5, $5, $3 -; CHECK-LE-NEXT: not $3, $3 -; CHECK-LE-NEXT: srl $4, $4, 1 -; CHECK-LE-NEXT: srlv $3, $4, $3 -; CHECK-LE-NEXT: or $3, $5, $3 -; CHECK-LE-NEXT: movn $3, $7, $1 -; CHECK-LE-NEXT: movn $10, $zero, $6 +; CHECK-LE-NEXT: srl $1, $6, 5 +; CHECK-LE-NEXT: andi $1, $1, 1 +; CHECK-LE-NEXT: move $3, $4 +; CHECK-LE-NEXT: movn $3, $5, $1 +; CHECK-LE-NEXT: andi $7, $6, 31 +; CHECK-LE-NEXT: srlv $2, $3, $7 +; CHECK-LE-NEXT: movn $5, $4, $1 +; CHECK-LE-NEXT: sll $1, $5, 1 +; CHECK-LE-NEXT: not $4, $6 +; CHECK-LE-NEXT: andi $4, $4, 31 +; CHECK-LE-NEXT: sllv $1, $1, $4 +; CHECK-LE-NEXT: or $2, $1, $2 +; CHECK-LE-NEXT: srlv $1, $5, $7 +; CHECK-LE-NEXT: sll $3, $3, 1 +; CHECK-LE-NEXT: sllv $3, $3, $4 ; CHECK-LE-NEXT: jr $ra -; CHECK-LE-NEXT: or $3, $10, $3 +; CHECK-LE-NEXT: or $3, $3, $1 %f = call i64 @llvm.fshr.i64(i64 %x, i64 %x, i64 %z) ret i64 %f } diff --git a/llvm/test/CodeGen/Mips/funnel-shift.ll b/llvm/test/CodeGen/Mips/funnel-shift.ll --- a/llvm/test/CodeGen/Mips/funnel-shift.ll +++ b/llvm/test/CodeGen/Mips/funnel-shift.ll @@ -72,37 +72,25 @@ ; CHECK-BE-NEXT: jal __umoddi3 ; CHECK-BE-NEXT: addiu $7, $zero, 37 ; CHECK-BE-NEXT: not $1, $3 -; CHECK-BE-NEXT: andi $2, $3, 63 -; CHECK-BE-NEXT: not $4, $2 -; CHECK-BE-NEXT: srl $5, $18, 1 -; CHECK-BE-NEXT: sllv $6, $19, $2 -; CHECK-BE-NEXT: srlv $4, $5, $4 -; CHECK-BE-NEXT: andi $5, $1, 63 -; CHECK-BE-NEXT: srl $7, $16, 5 -; CHECK-BE-NEXT: sll $8, $17, 27 -; CHECK-BE-NEXT: or $7, $8, $7 -; CHECK-BE-NEXT: srl $8, $7, 1 -; CHECK-BE-NEXT: srlv $9, $8, $5 -; CHECK-BE-NEXT: andi $1, $1, 32 -; CHECK-BE-NEXT: move $10, $9 -; CHECK-BE-NEXT: movn $10, $zero, $1 -; CHECK-BE-NEXT: or $4, $6, $4 -; CHECK-BE-NEXT: sllv $6, $18, $2 -; CHECK-BE-NEXT: andi $3, $3, 32 -; CHECK-BE-NEXT: movn $4, $6, $3 -; CHECK-BE-NEXT: sll $7, $7, 31 -; CHECK-BE-NEXT: sll $2, $16, 27 -; CHECK-BE-NEXT: srl $11, $2, 1 -; CHECK-BE-NEXT: or $2, $4, $10 -; CHECK-BE-NEXT: movn $6, $zero, $3 -; CHECK-BE-NEXT: or $3, $11, $7 -; CHECK-BE-NEXT: srlv $3, $3, $5 -; CHECK-BE-NEXT: not $4, $5 -; CHECK-BE-NEXT: sll $5, $8, 1 -; CHECK-BE-NEXT: sllv $4, $5, $4 -; CHECK-BE-NEXT: or $3, $4, $3 -; CHECK-BE-NEXT: movn $3, $9, $1 -; CHECK-BE-NEXT: or $3, $6, $3 +; CHECK-BE-NEXT: srl $2, $3, 5 +; CHECK-BE-NEXT: andi $4, $2, 1 +; CHECK-BE-NEXT: movn $19, $18, $4 +; CHECK-BE-NEXT: andi $3, $3, 31 +; CHECK-BE-NEXT: sllv $2, $19, $3 +; CHECK-BE-NEXT: andi $1, $1, 31 +; CHECK-BE-NEXT: srl $5, $16, 5 +; CHECK-BE-NEXT: sll $6, $17, 27 +; CHECK-BE-NEXT: or $5, $6, $5 +; CHECK-BE-NEXT: movn $18, $5, $4 +; CHECK-BE-NEXT: srl $6, $18, 1 +; CHECK-BE-NEXT: srlv $6, $6, $1 +; CHECK-BE-NEXT: or $2, $2, $6 +; CHECK-BE-NEXT: sllv $3, $18, $3 +; CHECK-BE-NEXT: sll $6, $16, 27 +; CHECK-BE-NEXT: movn $5, $6, $4 +; CHECK-BE-NEXT: srl $4, $5, 1 +; CHECK-BE-NEXT: srlv $1, $4, $1 +; CHECK-BE-NEXT: or $3, $3, $1 ; CHECK-BE-NEXT: lw $16, 20($sp) # 4-byte Folded Reload ; CHECK-BE-NEXT: lw $17, 24($sp) # 4-byte Folded Reload ; CHECK-BE-NEXT: lw $18, 28($sp) # 4-byte Folded Reload @@ -134,38 +122,27 @@ ; CHECK-LE-NEXT: addiu $6, $zero, 37 ; CHECK-LE-NEXT: jal __umoddi3 ; CHECK-LE-NEXT: addiu $7, $zero, 0 -; CHECK-LE-NEXT: not $1, $2 -; CHECK-LE-NEXT: andi $3, $2, 63 -; CHECK-LE-NEXT: not $4, $3 -; CHECK-LE-NEXT: srl $5, $19, 1 -; CHECK-LE-NEXT: sllv $6, $18, $3 -; CHECK-LE-NEXT: srlv $4, $5, $4 -; CHECK-LE-NEXT: andi $5, $1, 63 -; CHECK-LE-NEXT: srl $7, $17, 5 -; CHECK-LE-NEXT: sll $8, $16, 27 -; CHECK-LE-NEXT: or $7, $8, $7 -; CHECK-LE-NEXT: srl $8, $7, 1 -; CHECK-LE-NEXT: srlv $9, $8, $5 -; CHECK-LE-NEXT: andi $1, $1, 32 -; CHECK-LE-NEXT: move $10, $9 -; CHECK-LE-NEXT: movn $10, $zero, $1 -; CHECK-LE-NEXT: or $4, $6, $4 -; CHECK-LE-NEXT: sllv $6, $19, $3 -; CHECK-LE-NEXT: andi $2, $2, 32 -; CHECK-LE-NEXT: movn $4, $6, $2 -; CHECK-LE-NEXT: sll $7, $7, 31 -; CHECK-LE-NEXT: sll $3, $17, 27 -; CHECK-LE-NEXT: srl $11, $3, 1 -; CHECK-LE-NEXT: or $3, $4, $10 -; CHECK-LE-NEXT: movn $6, $zero, $2 -; CHECK-LE-NEXT: or $2, $11, $7 -; CHECK-LE-NEXT: srlv $2, $2, $5 -; CHECK-LE-NEXT: not $4, $5 -; CHECK-LE-NEXT: sll $5, $8, 1 -; CHECK-LE-NEXT: sllv $4, $5, $4 -; CHECK-LE-NEXT: or $2, $4, $2 -; CHECK-LE-NEXT: movn $2, $9, $1 +; CHECK-LE-NEXT: srl $1, $2, 5 +; CHECK-LE-NEXT: andi $1, $1, 1 +; CHECK-LE-NEXT: srl $3, $17, 5 +; CHECK-LE-NEXT: sll $4, $16, 27 +; CHECK-LE-NEXT: or $3, $4, $3 +; CHECK-LE-NEXT: move $4, $19 +; CHECK-LE-NEXT: movn $4, $3, $1 +; CHECK-LE-NEXT: andi $5, $2, 31 +; CHECK-LE-NEXT: sllv $6, $4, $5 +; CHECK-LE-NEXT: not $2, $2 +; CHECK-LE-NEXT: andi $7, $2, 31 +; CHECK-LE-NEXT: sll $2, $17, 27 +; CHECK-LE-NEXT: movn $3, $2, $1 +; CHECK-LE-NEXT: srl $2, $3, 1 +; CHECK-LE-NEXT: srlv $2, $2, $7 ; CHECK-LE-NEXT: or $2, $6, $2 +; CHECK-LE-NEXT: movn $18, $19, $1 +; CHECK-LE-NEXT: sllv $1, $18, $5 +; CHECK-LE-NEXT: srl $3, $4, 1 +; CHECK-LE-NEXT: srlv $3, $3, $7 +; CHECK-LE-NEXT: or $3, $1, $3 ; CHECK-LE-NEXT: lw $16, 20($sp) # 4-byte Folded Reload ; CHECK-LE-NEXT: lw $17, 24($sp) # 4-byte Folded Reload ; CHECK-LE-NEXT: lw $18, 28($sp) # 4-byte Folded Reload @@ -250,15 +227,15 @@ ; CHECK-BE-NEXT: srl $1, $6, 23 ; CHECK-BE-NEXT: sll $2, $5, 9 ; CHECK-BE-NEXT: or $2, $2, $1 -; CHECK-BE-NEXT: sll $1, $6, 9 -; CHECK-BE-NEXT: srl $3, $7, 23 +; CHECK-BE-NEXT: srl $1, $7, 23 +; CHECK-BE-NEXT: sll $3, $6, 9 ; CHECK-BE-NEXT: jr $ra ; CHECK-BE-NEXT: or $3, $3, $1 ; ; CHECK-LE-LABEL: fshl_i64_const_overshift: ; CHECK-LE: # %bb.0: -; CHECK-LE-NEXT: sll $1, $7, 9 -; CHECK-LE-NEXT: srl $2, $6, 23 +; CHECK-LE-NEXT: srl $1, $6, 23 +; CHECK-LE-NEXT: sll $2, $7, 9 ; CHECK-LE-NEXT: or $2, $2, $1 ; CHECK-LE-NEXT: srl $1, $7, 23 ; CHECK-LE-NEXT: sll $3, $4, 9 @@ -338,40 +315,27 @@ ; CHECK-BE-NEXT: jal __umoddi3 ; CHECK-BE-NEXT: addiu $7, $zero, 37 ; CHECK-BE-NEXT: addiu $1, $3, 27 -; CHECK-BE-NEXT: andi $2, $1, 63 -; CHECK-BE-NEXT: not $3, $2 -; CHECK-BE-NEXT: srl $4, $16, 5 -; CHECK-BE-NEXT: sll $5, $17, 27 -; CHECK-BE-NEXT: or $4, $5, $4 -; CHECK-BE-NEXT: sll $5, $4, 1 -; CHECK-BE-NEXT: sll $6, $16, 27 -; CHECK-BE-NEXT: srlv $6, $6, $2 -; CHECK-BE-NEXT: sllv $3, $5, $3 -; CHECK-BE-NEXT: not $5, $1 -; CHECK-BE-NEXT: andi $7, $5, 63 -; CHECK-BE-NEXT: sll $8, $18, 1 -; CHECK-BE-NEXT: sllv $8, $8, $7 -; CHECK-BE-NEXT: andi $5, $5, 32 -; CHECK-BE-NEXT: move $9, $8 -; CHECK-BE-NEXT: movn $9, $zero, $5 -; CHECK-BE-NEXT: or $3, $3, $6 -; CHECK-BE-NEXT: srlv $2, $4, $2 -; CHECK-BE-NEXT: andi $1, $1, 32 -; CHECK-BE-NEXT: movn $3, $2, $1 -; CHECK-BE-NEXT: srl $4, $18, 31 -; CHECK-BE-NEXT: sll $6, $19, 1 -; CHECK-BE-NEXT: or $4, $6, $4 -; CHECK-BE-NEXT: or $3, $9, $3 -; CHECK-BE-NEXT: movn $2, $zero, $1 -; CHECK-BE-NEXT: sllv $1, $4, $7 -; CHECK-BE-NEXT: not $4, $7 -; CHECK-BE-NEXT: lui $6, 32767 -; CHECK-BE-NEXT: ori $6, $6, 65535 -; CHECK-BE-NEXT: and $6, $18, $6 -; CHECK-BE-NEXT: srlv $4, $6, $4 -; CHECK-BE-NEXT: or $1, $1, $4 -; CHECK-BE-NEXT: movn $1, $8, $5 -; CHECK-BE-NEXT: or $2, $1, $2 +; CHECK-BE-NEXT: andi $2, $1, 32 +; CHECK-BE-NEXT: srl $3, $2, 5 +; CHECK-BE-NEXT: srl $2, $16, 5 +; CHECK-BE-NEXT: sll $4, $17, 27 +; CHECK-BE-NEXT: or $4, $4, $2 +; CHECK-BE-NEXT: move $5, $4 +; CHECK-BE-NEXT: movn $5, $18, $3 +; CHECK-BE-NEXT: andi $6, $1, 31 +; CHECK-BE-NEXT: srlv $2, $5, $6 +; CHECK-BE-NEXT: not $1, $1 +; CHECK-BE-NEXT: andi $1, $1, 31 +; CHECK-BE-NEXT: movn $18, $19, $3 +; CHECK-BE-NEXT: sll $7, $18, 1 +; CHECK-BE-NEXT: sllv $7, $7, $1 +; CHECK-BE-NEXT: sll $8, $16, 27 +; CHECK-BE-NEXT: or $2, $7, $2 +; CHECK-BE-NEXT: movn $8, $4, $3 +; CHECK-BE-NEXT: srlv $3, $8, $6 +; CHECK-BE-NEXT: sll $4, $5, 1 +; CHECK-BE-NEXT: sllv $1, $4, $1 +; CHECK-BE-NEXT: or $3, $1, $3 ; CHECK-BE-NEXT: lw $16, 20($sp) # 4-byte Folded Reload ; CHECK-BE-NEXT: lw $17, 24($sp) # 4-byte Folded Reload ; CHECK-BE-NEXT: lw $18, 28($sp) # 4-byte Folded Reload @@ -404,39 +368,25 @@ ; CHECK-LE-NEXT: jal __umoddi3 ; CHECK-LE-NEXT: addiu $7, $zero, 0 ; CHECK-LE-NEXT: addiu $1, $2, 27 -; CHECK-LE-NEXT: andi $2, $1, 63 -; CHECK-LE-NEXT: not $3, $2 -; CHECK-LE-NEXT: srl $4, $17, 5 -; CHECK-LE-NEXT: sll $5, $16, 27 -; CHECK-LE-NEXT: or $4, $5, $4 -; CHECK-LE-NEXT: sll $5, $4, 1 -; CHECK-LE-NEXT: sll $6, $17, 27 -; CHECK-LE-NEXT: srlv $6, $6, $2 -; CHECK-LE-NEXT: sllv $3, $5, $3 -; CHECK-LE-NEXT: not $5, $1 -; CHECK-LE-NEXT: andi $7, $5, 63 -; CHECK-LE-NEXT: sll $8, $19, 1 -; CHECK-LE-NEXT: sllv $8, $8, $7 -; CHECK-LE-NEXT: andi $5, $5, 32 -; CHECK-LE-NEXT: move $9, $8 -; CHECK-LE-NEXT: movn $9, $zero, $5 -; CHECK-LE-NEXT: or $3, $3, $6 -; CHECK-LE-NEXT: srlv $4, $4, $2 -; CHECK-LE-NEXT: andi $1, $1, 32 -; CHECK-LE-NEXT: movn $3, $4, $1 -; CHECK-LE-NEXT: srl $2, $19, 31 -; CHECK-LE-NEXT: sll $6, $18, 1 -; CHECK-LE-NEXT: or $6, $6, $2 -; CHECK-LE-NEXT: or $2, $9, $3 -; CHECK-LE-NEXT: movn $4, $zero, $1 -; CHECK-LE-NEXT: sllv $1, $6, $7 -; CHECK-LE-NEXT: not $3, $7 -; CHECK-LE-NEXT: lui $6, 32767 -; CHECK-LE-NEXT: ori $6, $6, 65535 -; CHECK-LE-NEXT: and $6, $19, $6 -; CHECK-LE-NEXT: srlv $3, $6, $3 -; CHECK-LE-NEXT: or $1, $1, $3 -; CHECK-LE-NEXT: movn $1, $8, $5 +; CHECK-LE-NEXT: andi $2, $1, 32 +; CHECK-LE-NEXT: srl $3, $2, 5 +; CHECK-LE-NEXT: srl $2, $17, 5 +; CHECK-LE-NEXT: sll $4, $16, 27 +; CHECK-LE-NEXT: or $4, $4, $2 +; CHECK-LE-NEXT: sll $2, $17, 27 +; CHECK-LE-NEXT: movn $2, $4, $3 +; CHECK-LE-NEXT: andi $5, $1, 31 +; CHECK-LE-NEXT: srlv $2, $2, $5 +; CHECK-LE-NEXT: not $1, $1 +; CHECK-LE-NEXT: andi $1, $1, 31 +; CHECK-LE-NEXT: movn $4, $19, $3 +; CHECK-LE-NEXT: sll $6, $4, 1 +; CHECK-LE-NEXT: sllv $6, $6, $1 +; CHECK-LE-NEXT: or $2, $6, $2 +; CHECK-LE-NEXT: srlv $4, $4, $5 +; CHECK-LE-NEXT: movn $19, $18, $3 +; CHECK-LE-NEXT: sll $3, $19, 1 +; CHECK-LE-NEXT: sllv $1, $3, $1 ; CHECK-LE-NEXT: or $3, $1, $4 ; CHECK-LE-NEXT: lw $16, 20($sp) # 4-byte Folded Reload ; CHECK-LE-NEXT: lw $17, 24($sp) # 4-byte Folded Reload diff --git a/llvm/test/CodeGen/PowerPC/funnel-shift-rot.ll b/llvm/test/CodeGen/PowerPC/funnel-shift-rot.ll --- a/llvm/test/CodeGen/PowerPC/funnel-shift-rot.ll +++ b/llvm/test/CodeGen/PowerPC/funnel-shift-rot.ll @@ -87,52 +87,44 @@ define i64 @rotl_i64(i64 %x, i64 %z) { ; CHECK32_32-LABEL: rotl_i64: ; CHECK32_32: # %bb.0: -; CHECK32_32-NEXT: clrlwi 5, 6, 26 -; CHECK32_32-NEXT: subfic 8, 5, 32 -; CHECK32_32-NEXT: neg 6, 6 -; CHECK32_32-NEXT: slw 7, 3, 5 -; CHECK32_32-NEXT: addi 9, 5, -32 -; CHECK32_32-NEXT: srw 8, 4, 8 -; CHECK32_32-NEXT: clrlwi 6, 6, 26 -; CHECK32_32-NEXT: slw 9, 4, 9 -; CHECK32_32-NEXT: or 7, 7, 8 -; CHECK32_32-NEXT: subfic 8, 6, 32 -; CHECK32_32-NEXT: or 7, 7, 9 -; CHECK32_32-NEXT: addi 9, 6, -32 -; CHECK32_32-NEXT: slw 8, 3, 8 -; CHECK32_32-NEXT: srw 9, 3, 9 -; CHECK32_32-NEXT: srw 3, 3, 6 -; CHECK32_32-NEXT: srw 6, 4, 6 -; CHECK32_32-NEXT: or 6, 6, 8 -; CHECK32_32-NEXT: or 6, 6, 9 -; CHECK32_32-NEXT: slw 4, 4, 5 -; CHECK32_32-NEXT: or 3, 7, 3 -; CHECK32_32-NEXT: or 4, 4, 6 +; CHECK32_32-NEXT: andi. 5, 6, 32 +; CHECK32_32-NEXT: clrlwi 5, 6, 27 +; CHECK32_32-NEXT: subfic 6, 5, 32 +; CHECK32_32-NEXT: bc 12, 2, .LBB4_2 +; CHECK32_32-NEXT: # %bb.1: +; CHECK32_32-NEXT: ori 7, 3, 0 +; CHECK32_32-NEXT: ori 3, 4, 0 +; CHECK32_32-NEXT: b .LBB4_3 +; CHECK32_32-NEXT: .LBB4_2: +; CHECK32_32-NEXT: addi 7, 4, 0 +; CHECK32_32-NEXT: .LBB4_3: +; CHECK32_32-NEXT: srw 4, 7, 6 +; CHECK32_32-NEXT: slw 8, 3, 5 +; CHECK32_32-NEXT: srw 6, 3, 6 +; CHECK32_32-NEXT: slw 5, 7, 5 +; CHECK32_32-NEXT: or 3, 8, 4 +; CHECK32_32-NEXT: or 4, 5, 6 ; CHECK32_32-NEXT: blr ; ; CHECK32_64-LABEL: rotl_i64: ; CHECK32_64: # %bb.0: -; CHECK32_64-NEXT: clrlwi 5, 6, 26 -; CHECK32_64-NEXT: neg 6, 6 -; CHECK32_64-NEXT: subfic 8, 5, 32 -; CHECK32_64-NEXT: slw 7, 3, 5 -; CHECK32_64-NEXT: clrlwi 6, 6, 26 -; CHECK32_64-NEXT: srw 8, 4, 8 -; CHECK32_64-NEXT: addi 9, 5, -32 -; CHECK32_64-NEXT: or 7, 7, 8 -; CHECK32_64-NEXT: subfic 8, 6, 32 -; CHECK32_64-NEXT: slw 5, 4, 5 -; CHECK32_64-NEXT: slw 9, 4, 9 -; CHECK32_64-NEXT: srw 10, 3, 6 -; CHECK32_64-NEXT: srw 4, 4, 6 -; CHECK32_64-NEXT: addi 6, 6, -32 -; CHECK32_64-NEXT: slw 8, 3, 8 -; CHECK32_64-NEXT: srw 3, 3, 6 -; CHECK32_64-NEXT: or 4, 4, 8 -; CHECK32_64-NEXT: or 6, 7, 9 -; CHECK32_64-NEXT: or 4, 4, 3 -; CHECK32_64-NEXT: or 3, 6, 10 -; CHECK32_64-NEXT: or 4, 5, 4 +; CHECK32_64-NEXT: andi. 5, 6, 32 +; CHECK32_64-NEXT: clrlwi 5, 6, 27 +; CHECK32_64-NEXT: bc 12, 2, .LBB4_2 +; CHECK32_64-NEXT: # %bb.1: +; CHECK32_64-NEXT: ori 7, 3, 0 +; CHECK32_64-NEXT: ori 3, 4, 0 +; CHECK32_64-NEXT: b .LBB4_3 +; CHECK32_64-NEXT: .LBB4_2: +; CHECK32_64-NEXT: addi 7, 4, 0 +; CHECK32_64-NEXT: .LBB4_3: +; CHECK32_64-NEXT: subfic 6, 5, 32 +; CHECK32_64-NEXT: srw 4, 7, 6 +; CHECK32_64-NEXT: slw 8, 3, 5 +; CHECK32_64-NEXT: srw 6, 3, 6 +; CHECK32_64-NEXT: slw 5, 7, 5 +; CHECK32_64-NEXT: or 3, 8, 4 +; CHECK32_64-NEXT: or 4, 5, 6 ; CHECK32_64-NEXT: blr ; ; CHECK64-LABEL: rotl_i64: @@ -256,52 +248,44 @@ define i64 @rotr_i64(i64 %x, i64 %z) { ; CHECK32_32-LABEL: rotr_i64: ; CHECK32_32: # %bb.0: -; CHECK32_32-NEXT: clrlwi 5, 6, 26 -; CHECK32_32-NEXT: subfic 8, 5, 32 -; CHECK32_32-NEXT: neg 6, 6 -; CHECK32_32-NEXT: srw 7, 4, 5 -; CHECK32_32-NEXT: addi 9, 5, -32 -; CHECK32_32-NEXT: slw 8, 3, 8 -; CHECK32_32-NEXT: clrlwi 6, 6, 26 -; CHECK32_32-NEXT: srw 9, 3, 9 -; CHECK32_32-NEXT: or 7, 7, 8 -; CHECK32_32-NEXT: subfic 8, 6, 32 -; CHECK32_32-NEXT: or 7, 7, 9 -; CHECK32_32-NEXT: addi 9, 6, -32 -; CHECK32_32-NEXT: srw 8, 4, 8 -; CHECK32_32-NEXT: slw 9, 4, 9 -; CHECK32_32-NEXT: slw 4, 4, 6 -; CHECK32_32-NEXT: slw 6, 3, 6 -; CHECK32_32-NEXT: or 6, 6, 8 -; CHECK32_32-NEXT: or 6, 6, 9 -; CHECK32_32-NEXT: srw 3, 3, 5 -; CHECK32_32-NEXT: or 4, 7, 4 -; CHECK32_32-NEXT: or 3, 3, 6 +; CHECK32_32-NEXT: andi. 5, 6, 32 +; CHECK32_32-NEXT: clrlwi 5, 6, 27 +; CHECK32_32-NEXT: subfic 6, 5, 32 +; CHECK32_32-NEXT: bc 12, 2, .LBB11_2 +; CHECK32_32-NEXT: # %bb.1: +; CHECK32_32-NEXT: ori 7, 4, 0 +; CHECK32_32-NEXT: b .LBB11_3 +; CHECK32_32-NEXT: .LBB11_2: +; CHECK32_32-NEXT: addi 7, 3, 0 +; CHECK32_32-NEXT: addi 3, 4, 0 +; CHECK32_32-NEXT: .LBB11_3: +; CHECK32_32-NEXT: srw 4, 7, 5 +; CHECK32_32-NEXT: slw 8, 3, 6 +; CHECK32_32-NEXT: srw 5, 3, 5 +; CHECK32_32-NEXT: slw 6, 7, 6 +; CHECK32_32-NEXT: or 3, 8, 4 +; CHECK32_32-NEXT: or 4, 6, 5 ; CHECK32_32-NEXT: blr ; ; CHECK32_64-LABEL: rotr_i64: ; CHECK32_64: # %bb.0: -; CHECK32_64-NEXT: clrlwi 5, 6, 26 -; CHECK32_64-NEXT: neg 6, 6 -; CHECK32_64-NEXT: subfic 8, 5, 32 -; CHECK32_64-NEXT: srw 7, 4, 5 -; CHECK32_64-NEXT: clrlwi 6, 6, 26 -; CHECK32_64-NEXT: slw 8, 3, 8 -; CHECK32_64-NEXT: addi 9, 5, -32 -; CHECK32_64-NEXT: or 7, 7, 8 -; CHECK32_64-NEXT: subfic 8, 6, 32 +; CHECK32_64-NEXT: andi. 5, 6, 32 +; CHECK32_64-NEXT: clrlwi 5, 6, 27 +; CHECK32_64-NEXT: bc 12, 2, .LBB11_2 +; CHECK32_64-NEXT: # %bb.1: +; CHECK32_64-NEXT: ori 7, 4, 0 +; CHECK32_64-NEXT: b .LBB11_3 +; CHECK32_64-NEXT: .LBB11_2: +; CHECK32_64-NEXT: addi 7, 3, 0 +; CHECK32_64-NEXT: addi 3, 4, 0 +; CHECK32_64-NEXT: .LBB11_3: +; CHECK32_64-NEXT: subfic 6, 5, 32 +; CHECK32_64-NEXT: srw 4, 7, 5 +; CHECK32_64-NEXT: slw 8, 3, 6 ; CHECK32_64-NEXT: srw 5, 3, 5 -; CHECK32_64-NEXT: srw 9, 3, 9 -; CHECK32_64-NEXT: slw 10, 4, 6 -; CHECK32_64-NEXT: slw 3, 3, 6 -; CHECK32_64-NEXT: addi 6, 6, -32 -; CHECK32_64-NEXT: srw 8, 4, 8 -; CHECK32_64-NEXT: slw 4, 4, 6 -; CHECK32_64-NEXT: or 3, 3, 8 -; CHECK32_64-NEXT: or 6, 7, 9 -; CHECK32_64-NEXT: or 3, 3, 4 -; CHECK32_64-NEXT: or 4, 6, 10 -; CHECK32_64-NEXT: or 3, 5, 3 +; CHECK32_64-NEXT: slw 6, 7, 6 +; CHECK32_64-NEXT: or 3, 8, 4 +; CHECK32_64-NEXT: or 4, 6, 5 ; CHECK32_64-NEXT: blr ; ; CHECK64-LABEL: rotr_i64: diff --git a/llvm/test/CodeGen/PowerPC/funnel-shift.ll b/llvm/test/CodeGen/PowerPC/funnel-shift.ll --- a/llvm/test/CodeGen/PowerPC/funnel-shift.ll +++ b/llvm/test/CodeGen/PowerPC/funnel-shift.ll @@ -42,58 +42,47 @@ define i64 @fshl_i64(i64 %x, i64 %y, i64 %z) { ; CHECK32_32-LABEL: fshl_i64: ; CHECK32_32: # %bb.0: -; CHECK32_32-NEXT: clrlwi 7, 8, 26 -; CHECK32_32-NEXT: not 8, 8 -; CHECK32_32-NEXT: rotlwi 6, 6, 31 -; CHECK32_32-NEXT: subfic 10, 7, 32 -; CHECK32_32-NEXT: srwi 9, 5, 1 +; CHECK32_32-NEXT: andi. 7, 8, 32 +; CHECK32_32-NEXT: clrlwi 7, 8, 27 +; CHECK32_32-NEXT: subfic 8, 7, 32 +; CHECK32_32-NEXT: bc 12, 2, .LBB1_2 +; CHECK32_32-NEXT: # %bb.1: +; CHECK32_32-NEXT: ori 9, 5, 0 +; CHECK32_32-NEXT: ori 3, 4, 0 +; CHECK32_32-NEXT: ori 4, 6, 0 +; CHECK32_32-NEXT: b .LBB1_3 +; CHECK32_32-NEXT: .LBB1_2: +; CHECK32_32-NEXT: addi 9, 4, 0 +; CHECK32_32-NEXT: addi 4, 5, 0 +; CHECK32_32-NEXT: .LBB1_3: +; CHECK32_32-NEXT: srw 5, 9, 8 ; CHECK32_32-NEXT: slw 3, 3, 7 -; CHECK32_32-NEXT: clrlwi 8, 8, 26 -; CHECK32_32-NEXT: rlwimi 6, 5, 31, 0, 0 -; CHECK32_32-NEXT: srw 5, 4, 10 -; CHECK32_32-NEXT: srw 10, 9, 8 -; CHECK32_32-NEXT: srw 6, 6, 8 +; CHECK32_32-NEXT: srw 4, 4, 8 +; CHECK32_32-NEXT: slw 6, 9, 7 ; CHECK32_32-NEXT: or 3, 3, 5 -; CHECK32_32-NEXT: subfic 5, 8, 32 -; CHECK32_32-NEXT: addi 8, 8, -32 -; CHECK32_32-NEXT: slw 5, 9, 5 -; CHECK32_32-NEXT: srw 8, 9, 8 -; CHECK32_32-NEXT: addi 9, 7, -32 -; CHECK32_32-NEXT: slw 9, 4, 9 -; CHECK32_32-NEXT: or 5, 6, 5 -; CHECK32_32-NEXT: or 3, 3, 9 -; CHECK32_32-NEXT: or 5, 5, 8 -; CHECK32_32-NEXT: slw 4, 4, 7 -; CHECK32_32-NEXT: or 3, 3, 10 -; CHECK32_32-NEXT: or 4, 4, 5 +; CHECK32_32-NEXT: or 4, 6, 4 ; CHECK32_32-NEXT: blr ; ; CHECK32_64-LABEL: fshl_i64: ; CHECK32_64: # %bb.0: -; CHECK32_64-NEXT: clrlwi 7, 8, 26 -; CHECK32_64-NEXT: not 8, 8 -; CHECK32_64-NEXT: subfic 9, 7, 32 -; CHECK32_64-NEXT: rotlwi 6, 6, 31 +; CHECK32_64-NEXT: andi. 7, 8, 32 +; CHECK32_64-NEXT: clrlwi 7, 8, 27 +; CHECK32_64-NEXT: bc 12, 2, .LBB1_2 +; CHECK32_64-NEXT: # %bb.1: +; CHECK32_64-NEXT: ori 9, 5, 0 +; CHECK32_64-NEXT: ori 3, 4, 0 +; CHECK32_64-NEXT: ori 5, 6, 0 +; CHECK32_64-NEXT: b .LBB1_3 +; CHECK32_64-NEXT: .LBB1_2: +; CHECK32_64-NEXT: addi 9, 4, 0 +; CHECK32_64-NEXT: .LBB1_3: +; CHECK32_64-NEXT: subfic 8, 7, 32 +; CHECK32_64-NEXT: srw 4, 9, 8 ; CHECK32_64-NEXT: slw 3, 3, 7 -; CHECK32_64-NEXT: clrlwi 8, 8, 26 -; CHECK32_64-NEXT: srw 9, 4, 9 -; CHECK32_64-NEXT: rlwimi 6, 5, 31, 0, 0 -; CHECK32_64-NEXT: srwi 5, 5, 1 -; CHECK32_64-NEXT: addi 10, 7, -32 -; CHECK32_64-NEXT: or 3, 3, 9 -; CHECK32_64-NEXT: subfic 9, 8, 32 -; CHECK32_64-NEXT: slw 7, 4, 7 -; CHECK32_64-NEXT: slw 4, 4, 10 -; CHECK32_64-NEXT: srw 10, 5, 8 -; CHECK32_64-NEXT: srw 6, 6, 8 -; CHECK32_64-NEXT: addi 8, 8, -32 -; CHECK32_64-NEXT: slw 9, 5, 9 ; CHECK32_64-NEXT: srw 5, 5, 8 -; CHECK32_64-NEXT: or 6, 6, 9 +; CHECK32_64-NEXT: slw 6, 9, 7 ; CHECK32_64-NEXT: or 3, 3, 4 ; CHECK32_64-NEXT: or 4, 6, 5 -; CHECK32_64-NEXT: or 3, 3, 10 -; CHECK32_64-NEXT: or 4, 7, 4 ; CHECK32_64-NEXT: blr ; ; CHECK64-LABEL: fshl_i64: @@ -135,31 +124,29 @@ ; CHECK32_32-NEXT: li 5, 0 ; CHECK32_32-NEXT: li 6, 37 ; CHECK32_32-NEXT: bl __umoddi3 -; CHECK32_32-NEXT: clrlwi 6, 4, 26 -; CHECK32_32-NEXT: srwi 3, 30, 6 -; CHECK32_32-NEXT: not 4, 4 -; CHECK32_32-NEXT: subfic 8, 6, 32 -; CHECK32_32-NEXT: slwi 5, 30, 26 -; CHECK32_32-NEXT: rlwimi 3, 29, 26, 1, 5 -; CHECK32_32-NEXT: slw 7, 27, 6 -; CHECK32_32-NEXT: clrlwi 4, 4, 26 -; CHECK32_32-NEXT: srw 8, 28, 8 -; CHECK32_32-NEXT: srw 9, 3, 4 -; CHECK32_32-NEXT: srw 5, 5, 4 -; CHECK32_32-NEXT: or 7, 7, 8 -; CHECK32_32-NEXT: subfic 8, 4, 32 -; CHECK32_32-NEXT: addi 4, 4, -32 -; CHECK32_32-NEXT: slw 8, 3, 8 -; CHECK32_32-NEXT: srw 4, 3, 4 -; CHECK32_32-NEXT: addi 3, 6, -32 -; CHECK32_32-NEXT: slw 3, 28, 3 -; CHECK32_32-NEXT: or 5, 5, 8 -; CHECK32_32-NEXT: or 3, 7, 3 -; CHECK32_32-NEXT: or 4, 5, 4 -; CHECK32_32-NEXT: slw 5, 28, 6 -; CHECK32_32-NEXT: or 3, 3, 9 -; CHECK32_32-NEXT: or 4, 5, 4 +; CHECK32_32-NEXT: rotlwi 3, 30, 27 +; CHECK32_32-NEXT: slwi 5, 30, 27 +; CHECK32_32-NEXT: andi. 6, 4, 32 +; CHECK32_32-NEXT: rlwimi 3, 29, 27, 0, 4 +; CHECK32_32-NEXT: clrlwi 4, 4, 27 +; CHECK32_32-NEXT: subfic 6, 4, 32 +; CHECK32_32-NEXT: bc 12, 2, .LBB2_2 +; CHECK32_32-NEXT: # %bb.1: +; CHECK32_32-NEXT: ori 7, 3, 0 +; CHECK32_32-NEXT: ori 8, 28, 0 +; CHECK32_32-NEXT: ori 3, 5, 0 +; CHECK32_32-NEXT: b .LBB2_3 +; CHECK32_32-NEXT: .LBB2_2: +; CHECK32_32-NEXT: addi 7, 28, 0 +; CHECK32_32-NEXT: addi 8, 27, 0 +; CHECK32_32-NEXT: .LBB2_3: ; CHECK32_32-NEXT: lwz 30, 24(1) # 4-byte Folded Reload +; CHECK32_32-NEXT: srw 5, 7, 6 +; CHECK32_32-NEXT: slw 8, 8, 4 +; CHECK32_32-NEXT: srw 6, 3, 6 +; CHECK32_32-NEXT: slw 4, 7, 4 +; CHECK32_32-NEXT: or 3, 8, 5 +; CHECK32_32-NEXT: or 4, 4, 6 ; CHECK32_32-NEXT: lwz 29, 20(1) # 4-byte Folded Reload ; CHECK32_32-NEXT: lwz 28, 16(1) # 4-byte Folded Reload ; CHECK32_32-NEXT: lwz 27, 12(1) # 4-byte Folded Reload @@ -192,35 +179,42 @@ ; CHECK32_64-NEXT: mr 30, 6 ; CHECK32_64-NEXT: li 6, 37 ; CHECK32_64-NEXT: bl __umoddi3 -; CHECK32_64-NEXT: clrlwi 6, 4, 26 -; CHECK32_64-NEXT: not 4, 4 -; CHECK32_64-NEXT: subfic 8, 6, 32 -; CHECK32_64-NEXT: srwi 3, 30, 6 -; CHECK32_64-NEXT: slw 7, 27, 6 -; CHECK32_64-NEXT: clrlwi 4, 4, 26 +; CHECK32_64-NEXT: rotlwi 3, 30, 27 +; CHECK32_64-NEXT: andi. 5, 4, 32 +; CHECK32_64-NEXT: bc 12, 2, .LBB2_2 +; CHECK32_64-NEXT: # %bb.1: +; CHECK32_64-NEXT: ori 8, 28, 0 +; CHECK32_64-NEXT: b .LBB2_3 +; CHECK32_64-NEXT: .LBB2_2: +; CHECK32_64-NEXT: addi 8, 27, 0 +; CHECK32_64-NEXT: .LBB2_3: ; CHECK32_64-NEXT: lwz 27, 12(1) # 4-byte Folded Reload -; CHECK32_64-NEXT: srw 8, 28, 8 -; CHECK32_64-NEXT: rlwimi 3, 29, 26, 1, 5 -; CHECK32_64-NEXT: lwz 29, 20(1) # 4-byte Folded Reload -; CHECK32_64-NEXT: slwi 5, 30, 26 -; CHECK32_64-NEXT: or 7, 7, 8 -; CHECK32_64-NEXT: subfic 8, 4, 32 +; CHECK32_64-NEXT: rlwimi 3, 29, 27, 0, 4 +; CHECK32_64-NEXT: clrlwi 4, 4, 27 +; CHECK32_64-NEXT: bc 12, 2, .LBB2_5 +; CHECK32_64-NEXT: # %bb.4: +; CHECK32_64-NEXT: ori 7, 3, 0 +; CHECK32_64-NEXT: b .LBB2_6 +; CHECK32_64-NEXT: .LBB2_5: +; CHECK32_64-NEXT: addi 7, 28, 0 +; CHECK32_64-NEXT: .LBB2_6: +; CHECK32_64-NEXT: slwi 5, 30, 27 ; CHECK32_64-NEXT: lwz 30, 24(1) # 4-byte Folded Reload -; CHECK32_64-NEXT: addi 9, 6, -32 -; CHECK32_64-NEXT: srw 10, 3, 4 -; CHECK32_64-NEXT: srw 5, 5, 4 -; CHECK32_64-NEXT: addi 4, 4, -32 -; CHECK32_64-NEXT: slw 8, 3, 8 -; CHECK32_64-NEXT: slw 9, 28, 9 -; CHECK32_64-NEXT: srw 3, 3, 4 -; CHECK32_64-NEXT: or 4, 5, 8 -; CHECK32_64-NEXT: slw 6, 28, 6 -; CHECK32_64-NEXT: or 5, 7, 9 +; CHECK32_64-NEXT: bc 12, 2, .LBB2_8 +; CHECK32_64-NEXT: # %bb.7: +; CHECK32_64-NEXT: ori 3, 5, 0 +; CHECK32_64-NEXT: b .LBB2_8 +; CHECK32_64-NEXT: .LBB2_8: +; CHECK32_64-NEXT: subfic 6, 4, 32 +; CHECK32_64-NEXT: slw 8, 8, 4 +; CHECK32_64-NEXT: lwz 29, 20(1) # 4-byte Folded Reload +; CHECK32_64-NEXT: srw 9, 7, 6 +; CHECK32_64-NEXT: srw 5, 3, 6 +; CHECK32_64-NEXT: slw 4, 7, 4 +; CHECK32_64-NEXT: or 3, 8, 9 ; CHECK32_64-NEXT: lwz 28, 16(1) # 4-byte Folded Reload -; CHECK32_64-NEXT: or 4, 4, 3 -; CHECK32_64-NEXT: or 3, 5, 10 +; CHECK32_64-NEXT: or 4, 4, 5 ; CHECK32_64-NEXT: lwz 0, 36(1) -; CHECK32_64-NEXT: or 4, 6, 4 ; CHECK32_64-NEXT: addi 1, 1, 32 ; CHECK32_64-NEXT: mtlr 0 ; CHECK32_64-NEXT: blr @@ -347,58 +341,47 @@ define i64 @fshr_i64(i64 %x, i64 %y, i64 %z) { ; CHECK32_32-LABEL: fshr_i64: ; CHECK32_32: # %bb.0: -; CHECK32_32-NEXT: clrlwi 7, 8, 26 -; CHECK32_32-NEXT: slwi 9, 4, 1 -; CHECK32_32-NEXT: not 8, 8 -; CHECK32_32-NEXT: rotlwi 4, 4, 1 -; CHECK32_32-NEXT: subfic 10, 7, 32 -; CHECK32_32-NEXT: srw 6, 6, 7 -; CHECK32_32-NEXT: clrlwi 8, 8, 26 -; CHECK32_32-NEXT: rlwimi 4, 3, 1, 0, 30 -; CHECK32_32-NEXT: slw 3, 5, 10 -; CHECK32_32-NEXT: slw 10, 9, 8 -; CHECK32_32-NEXT: slw 4, 4, 8 -; CHECK32_32-NEXT: or 3, 6, 3 -; CHECK32_32-NEXT: subfic 6, 8, 32 -; CHECK32_32-NEXT: addi 8, 8, -32 -; CHECK32_32-NEXT: srw 6, 9, 6 -; CHECK32_32-NEXT: slw 8, 9, 8 -; CHECK32_32-NEXT: addi 9, 7, -32 -; CHECK32_32-NEXT: srw 9, 5, 9 -; CHECK32_32-NEXT: or 3, 3, 9 -; CHECK32_32-NEXT: or 6, 4, 6 -; CHECK32_32-NEXT: or 4, 10, 3 -; CHECK32_32-NEXT: or 3, 6, 8 -; CHECK32_32-NEXT: srw 5, 5, 7 +; CHECK32_32-NEXT: andi. 7, 8, 32 +; CHECK32_32-NEXT: clrlwi 7, 8, 27 +; CHECK32_32-NEXT: subfic 8, 7, 32 +; CHECK32_32-NEXT: bc 12, 2, .LBB9_2 +; CHECK32_32-NEXT: # %bb.1: +; CHECK32_32-NEXT: ori 9, 4, 0 +; CHECK32_32-NEXT: ori 4, 5, 0 +; CHECK32_32-NEXT: b .LBB9_3 +; CHECK32_32-NEXT: .LBB9_2: +; CHECK32_32-NEXT: addi 9, 5, 0 +; CHECK32_32-NEXT: addi 3, 4, 0 +; CHECK32_32-NEXT: addi 4, 6, 0 +; CHECK32_32-NEXT: .LBB9_3: +; CHECK32_32-NEXT: srw 5, 9, 7 +; CHECK32_32-NEXT: slw 3, 3, 8 +; CHECK32_32-NEXT: srw 4, 4, 7 +; CHECK32_32-NEXT: slw 6, 9, 8 ; CHECK32_32-NEXT: or 3, 3, 5 +; CHECK32_32-NEXT: or 4, 6, 4 ; CHECK32_32-NEXT: blr ; ; CHECK32_64-LABEL: fshr_i64: ; CHECK32_64: # %bb.0: -; CHECK32_64-NEXT: rotlwi 7, 4, 1 -; CHECK32_64-NEXT: slwi 4, 4, 1 -; CHECK32_64-NEXT: rlwimi 7, 3, 1, 0, 30 -; CHECK32_64-NEXT: clrlwi 3, 8, 26 -; CHECK32_64-NEXT: not 8, 8 -; CHECK32_64-NEXT: subfic 9, 3, 32 -; CHECK32_64-NEXT: srw 6, 6, 3 -; CHECK32_64-NEXT: clrlwi 8, 8, 26 -; CHECK32_64-NEXT: slw 9, 5, 9 -; CHECK32_64-NEXT: addi 10, 3, -32 -; CHECK32_64-NEXT: or 6, 6, 9 -; CHECK32_64-NEXT: subfic 9, 8, 32 -; CHECK32_64-NEXT: srw 3, 5, 3 -; CHECK32_64-NEXT: srw 5, 5, 10 -; CHECK32_64-NEXT: slw 10, 4, 8 -; CHECK32_64-NEXT: slw 7, 7, 8 -; CHECK32_64-NEXT: addi 8, 8, -32 -; CHECK32_64-NEXT: srw 9, 4, 9 -; CHECK32_64-NEXT: slw 4, 4, 8 -; CHECK32_64-NEXT: or 7, 7, 9 -; CHECK32_64-NEXT: or 5, 6, 5 -; CHECK32_64-NEXT: or 6, 7, 4 -; CHECK32_64-NEXT: or 4, 10, 5 -; CHECK32_64-NEXT: or 3, 6, 3 +; CHECK32_64-NEXT: andi. 7, 8, 32 +; CHECK32_64-NEXT: clrlwi 7, 8, 27 +; CHECK32_64-NEXT: bc 12, 2, .LBB9_2 +; CHECK32_64-NEXT: # %bb.1: +; CHECK32_64-NEXT: ori 9, 4, 0 +; CHECK32_64-NEXT: b .LBB9_3 +; CHECK32_64-NEXT: .LBB9_2: +; CHECK32_64-NEXT: addi 9, 5, 0 +; CHECK32_64-NEXT: addi 3, 4, 0 +; CHECK32_64-NEXT: addi 5, 6, 0 +; CHECK32_64-NEXT: .LBB9_3: +; CHECK32_64-NEXT: subfic 8, 7, 32 +; CHECK32_64-NEXT: srw 4, 9, 7 +; CHECK32_64-NEXT: slw 3, 3, 8 +; CHECK32_64-NEXT: srw 5, 5, 7 +; CHECK32_64-NEXT: slw 6, 9, 8 +; CHECK32_64-NEXT: or 3, 3, 4 +; CHECK32_64-NEXT: or 4, 6, 5 ; CHECK32_64-NEXT: blr ; ; CHECK64-LABEL: fshr_i64: @@ -440,35 +423,30 @@ ; CHECK32_32-NEXT: li 5, 0 ; CHECK32_32-NEXT: li 6, 37 ; CHECK32_32-NEXT: bl __umoddi3 +; CHECK32_32-NEXT: rotlwi 3, 30, 27 ; CHECK32_32-NEXT: addi 4, 4, 27 -; CHECK32_32-NEXT: rotlwi 5, 30, 27 -; CHECK32_32-NEXT: clrlwi 8, 4, 26 -; CHECK32_32-NEXT: slwi 3, 30, 27 -; CHECK32_32-NEXT: rotlwi 7, 28, 1 -; CHECK32_32-NEXT: rlwimi 5, 29, 27, 0, 4 -; CHECK32_32-NEXT: not 4, 4 -; CHECK32_32-NEXT: subfic 9, 8, 32 -; CHECK32_32-NEXT: slwi 6, 28, 1 -; CHECK32_32-NEXT: rlwimi 7, 27, 1, 0, 30 -; CHECK32_32-NEXT: srw 3, 3, 8 -; CHECK32_32-NEXT: clrlwi 4, 4, 26 -; CHECK32_32-NEXT: slw 9, 5, 9 -; CHECK32_32-NEXT: slw 10, 6, 4 -; CHECK32_32-NEXT: slw 7, 7, 4 -; CHECK32_32-NEXT: or 3, 3, 9 -; CHECK32_32-NEXT: subfic 9, 4, 32 -; CHECK32_32-NEXT: addi 4, 4, -32 -; CHECK32_32-NEXT: srw 9, 6, 9 -; CHECK32_32-NEXT: slw 6, 6, 4 -; CHECK32_32-NEXT: addi 4, 8, -32 -; CHECK32_32-NEXT: srw 4, 5, 4 -; CHECK32_32-NEXT: or 3, 3, 4 -; CHECK32_32-NEXT: or 7, 7, 9 -; CHECK32_32-NEXT: or 4, 10, 3 -; CHECK32_32-NEXT: or 3, 7, 6 -; CHECK32_32-NEXT: srw 5, 5, 8 -; CHECK32_32-NEXT: or 3, 3, 5 +; CHECK32_32-NEXT: slwi 5, 30, 27 +; CHECK32_32-NEXT: rlwimi 3, 29, 27, 0, 4 +; CHECK32_32-NEXT: andi. 6, 4, 32 +; CHECK32_32-NEXT: clrlwi 4, 4, 27 +; CHECK32_32-NEXT: subfic 6, 4, 32 +; CHECK32_32-NEXT: bc 12, 2, .LBB10_2 +; CHECK32_32-NEXT: # %bb.1: +; CHECK32_32-NEXT: ori 7, 28, 0 +; CHECK32_32-NEXT: ori 8, 27, 0 +; CHECK32_32-NEXT: b .LBB10_3 +; CHECK32_32-NEXT: .LBB10_2: +; CHECK32_32-NEXT: addi 7, 3, 0 +; CHECK32_32-NEXT: addi 8, 28, 0 +; CHECK32_32-NEXT: addi 3, 5, 0 +; CHECK32_32-NEXT: .LBB10_3: ; CHECK32_32-NEXT: lwz 30, 24(1) # 4-byte Folded Reload +; CHECK32_32-NEXT: srw 5, 7, 4 +; CHECK32_32-NEXT: slw 8, 8, 6 +; CHECK32_32-NEXT: srw 4, 3, 4 +; CHECK32_32-NEXT: slw 6, 7, 6 +; CHECK32_32-NEXT: or 3, 8, 5 +; CHECK32_32-NEXT: or 4, 6, 4 ; CHECK32_32-NEXT: lwz 29, 20(1) # 4-byte Folded Reload ; CHECK32_32-NEXT: lwz 28, 16(1) # 4-byte Folded Reload ; CHECK32_32-NEXT: lwz 27, 12(1) # 4-byte Folded Reload @@ -503,37 +481,36 @@ ; CHECK32_64-NEXT: bl __umoddi3 ; CHECK32_64-NEXT: addi 4, 4, 27 ; CHECK32_64-NEXT: rotlwi 3, 30, 27 -; CHECK32_64-NEXT: clrlwi 8, 4, 26 +; CHECK32_64-NEXT: andi. 5, 4, 32 ; CHECK32_64-NEXT: rlwimi 3, 29, 27, 0, 4 ; CHECK32_64-NEXT: lwz 29, 20(1) # 4-byte Folded Reload -; CHECK32_64-NEXT: slwi 6, 30, 27 -; CHECK32_64-NEXT: lwz 30, 24(1) # 4-byte Folded Reload -; CHECK32_64-NEXT: not 4, 4 -; CHECK32_64-NEXT: subfic 9, 8, 32 -; CHECK32_64-NEXT: rotlwi 5, 28, 1 -; CHECK32_64-NEXT: srw 6, 6, 8 -; CHECK32_64-NEXT: clrlwi 4, 4, 26 -; CHECK32_64-NEXT: slw 9, 3, 9 -; CHECK32_64-NEXT: rlwimi 5, 27, 1, 0, 30 -; CHECK32_64-NEXT: slwi 7, 28, 1 +; CHECK32_64-NEXT: bc 12, 2, .LBB10_2 +; CHECK32_64-NEXT: # %bb.1: +; CHECK32_64-NEXT: ori 7, 28, 0 +; CHECK32_64-NEXT: ori 8, 27, 0 +; CHECK32_64-NEXT: b .LBB10_3 +; CHECK32_64-NEXT: .LBB10_2: +; CHECK32_64-NEXT: addi 7, 3, 0 +; CHECK32_64-NEXT: addi 8, 28, 0 +; CHECK32_64-NEXT: .LBB10_3: +; CHECK32_64-NEXT: clrlwi 4, 4, 27 ; CHECK32_64-NEXT: lwz 28, 16(1) # 4-byte Folded Reload -; CHECK32_64-NEXT: addi 10, 8, -32 +; CHECK32_64-NEXT: slwi 5, 30, 27 +; CHECK32_64-NEXT: subfic 6, 4, 32 +; CHECK32_64-NEXT: bc 12, 2, .LBB10_4 +; CHECK32_64-NEXT: b .LBB10_5 +; CHECK32_64-NEXT: .LBB10_4: +; CHECK32_64-NEXT: addi 3, 5, 0 +; CHECK32_64-NEXT: .LBB10_5: +; CHECK32_64-NEXT: srw 9, 7, 4 +; CHECK32_64-NEXT: slw 8, 8, 6 +; CHECK32_64-NEXT: lwz 30, 24(1) # 4-byte Folded Reload +; CHECK32_64-NEXT: srw 4, 3, 4 +; CHECK32_64-NEXT: slw 5, 7, 6 ; CHECK32_64-NEXT: lwz 27, 12(1) # 4-byte Folded Reload -; CHECK32_64-NEXT: or 6, 6, 9 -; CHECK32_64-NEXT: subfic 9, 4, 32 -; CHECK32_64-NEXT: srw 8, 3, 8 -; CHECK32_64-NEXT: srw 3, 3, 10 +; CHECK32_64-NEXT: or 3, 8, 9 +; CHECK32_64-NEXT: or 4, 5, 4 ; CHECK32_64-NEXT: lwz 0, 36(1) -; CHECK32_64-NEXT: slw 10, 7, 4 -; CHECK32_64-NEXT: slw 5, 5, 4 -; CHECK32_64-NEXT: addi 4, 4, -32 -; CHECK32_64-NEXT: srw 9, 7, 9 -; CHECK32_64-NEXT: slw 4, 7, 4 -; CHECK32_64-NEXT: or 5, 5, 9 -; CHECK32_64-NEXT: or 3, 6, 3 -; CHECK32_64-NEXT: or 5, 5, 4 -; CHECK32_64-NEXT: or 4, 10, 3 -; CHECK32_64-NEXT: or 3, 5, 8 ; CHECK32_64-NEXT: addi 1, 1, 32 ; CHECK32_64-NEXT: mtlr 0 ; CHECK32_64-NEXT: blr diff --git a/llvm/test/CodeGen/RISCV/rv32zbb-zbp.ll b/llvm/test/CodeGen/RISCV/rv32zbb-zbp.ll --- a/llvm/test/CodeGen/RISCV/rv32zbb-zbp.ll +++ b/llvm/test/CodeGen/RISCV/rv32zbb-zbp.ll @@ -218,159 +218,86 @@ define i64 @rol_i64(i64 %a, i64 %b) nounwind { ; RV32I-LABEL: rol_i64: ; RV32I: # %bb.0: -; RV32I-NEXT: mv a7, a1 -; RV32I-NEXT: andi a1, a2, 63 -; RV32I-NEXT: addi t0, a1, -32 -; RV32I-NEXT: addi a6, zero, 31 -; RV32I-NEXT: bltz t0, .LBB7_2 +; RV32I-NEXT: srli a3, a2, 5 +; RV32I-NEXT: andi a3, a3, 1 +; RV32I-NEXT: mv a4, a1 +; RV32I-NEXT: bnez a3, .LBB7_2 ; RV32I-NEXT: # %bb.1: -; RV32I-NEXT: sll a1, a0, t0 -; RV32I-NEXT: j .LBB7_3 +; RV32I-NEXT: mv a4, a0 ; RV32I-NEXT: .LBB7_2: -; RV32I-NEXT: sll a3, a7, a2 -; RV32I-NEXT: sub a1, a6, a1 -; RV32I-NEXT: srli a4, a0, 1 -; RV32I-NEXT: srl a1, a4, a1 -; RV32I-NEXT: or a1, a3, a1 -; RV32I-NEXT: .LBB7_3: -; RV32I-NEXT: neg a5, a2 -; RV32I-NEXT: andi a3, a5, 63 -; RV32I-NEXT: addi a4, a3, -32 -; RV32I-NEXT: bltz a4, .LBB7_5 -; RV32I-NEXT: # %bb.4: -; RV32I-NEXT: srl a3, a7, a4 -; RV32I-NEXT: bltz t0, .LBB7_6 -; RV32I-NEXT: j .LBB7_7 -; RV32I-NEXT: .LBB7_5: -; RV32I-NEXT: srl a4, a7, a5 -; RV32I-NEXT: or a1, a1, a4 -; RV32I-NEXT: srl a4, a0, a5 -; RV32I-NEXT: sub a3, a6, a3 -; RV32I-NEXT: slli a5, a7, 1 -; RV32I-NEXT: sll a3, a5, a3 -; RV32I-NEXT: or a3, a4, a3 -; RV32I-NEXT: bgez t0, .LBB7_7 -; RV32I-NEXT: .LBB7_6: +; RV32I-NEXT: sll a6, a4, a2 +; RV32I-NEXT: bnez a3, .LBB7_4 +; RV32I-NEXT: # %bb.3: +; RV32I-NEXT: mv a0, a1 +; RV32I-NEXT: .LBB7_4: +; RV32I-NEXT: srli a1, a0, 1 +; RV32I-NEXT: not a5, a2 +; RV32I-NEXT: srl a1, a1, a5 +; RV32I-NEXT: or a3, a6, a1 ; RV32I-NEXT: sll a0, a0, a2 -; RV32I-NEXT: or a3, a3, a0 -; RV32I-NEXT: .LBB7_7: +; RV32I-NEXT: srli a1, a4, 1 +; RV32I-NEXT: srl a1, a1, a5 +; RV32I-NEXT: or a1, a0, a1 ; RV32I-NEXT: mv a0, a3 ; RV32I-NEXT: ret ; ; RV32IB-LABEL: rol_i64: ; RV32IB: # %bb.0: -; RV32IB-NEXT: sll a7, a1, a2 -; RV32IB-NEXT: andi a4, a2, 63 -; RV32IB-NEXT: addi a6, zero, 31 -; RV32IB-NEXT: sub a5, a6, a4 -; RV32IB-NEXT: srli a3, a0, 1 -; RV32IB-NEXT: srl a3, a3, a5 -; RV32IB-NEXT: or a7, a7, a3 -; RV32IB-NEXT: addi t1, a4, -32 -; RV32IB-NEXT: sll a5, a0, t1 -; RV32IB-NEXT: slti a3, t1, 0 -; RV32IB-NEXT: cmov a7, a3, a7, a5 -; RV32IB-NEXT: neg a5, a2 -; RV32IB-NEXT: srl t0, a1, a5 -; RV32IB-NEXT: andi t2, a5, 63 -; RV32IB-NEXT: addi a4, t2, -32 -; RV32IB-NEXT: srai a3, a4, 31 -; RV32IB-NEXT: and a3, a3, t0 -; RV32IB-NEXT: or a7, a7, a3 -; RV32IB-NEXT: srl t0, a0, a5 -; RV32IB-NEXT: sub a5, a6, t2 -; RV32IB-NEXT: slli a3, a1, 1 -; RV32IB-NEXT: sll a3, a3, a5 -; RV32IB-NEXT: or a3, t0, a3 -; RV32IB-NEXT: srl a1, a1, a4 -; RV32IB-NEXT: slti a4, a4, 0 -; RV32IB-NEXT: cmov a1, a4, a3, a1 -; RV32IB-NEXT: sll a0, a0, a2 -; RV32IB-NEXT: srai a2, t1, 31 -; RV32IB-NEXT: and a0, a2, a0 -; RV32IB-NEXT: or a0, a0, a1 -; RV32IB-NEXT: mv a1, a7 +; RV32IB-NEXT: bexti a3, a2, 5 +; RV32IB-NEXT: cmov a4, a3, a0, a1 +; RV32IB-NEXT: cmov a1, a3, a1, a0 +; RV32IB-NEXT: andi a2, a2, 31 +; RV32IB-NEXT: fsl a0, a1, a4, a2 +; RV32IB-NEXT: fsl a1, a4, a1, a2 ; RV32IB-NEXT: ret ; ; RV32IBB-LABEL: rol_i64: ; RV32IBB: # %bb.0: -; RV32IBB-NEXT: mv a7, a1 -; RV32IBB-NEXT: andi a1, a2, 63 -; RV32IBB-NEXT: addi t0, a1, -32 -; RV32IBB-NEXT: addi a6, zero, 31 -; RV32IBB-NEXT: bltz t0, .LBB7_2 +; RV32IBB-NEXT: srli a3, a2, 5 +; RV32IBB-NEXT: andi a3, a3, 1 +; RV32IBB-NEXT: mv a4, a1 +; RV32IBB-NEXT: bnez a3, .LBB7_2 ; RV32IBB-NEXT: # %bb.1: -; RV32IBB-NEXT: sll a1, a0, t0 -; RV32IBB-NEXT: j .LBB7_3 +; RV32IBB-NEXT: mv a4, a0 ; RV32IBB-NEXT: .LBB7_2: -; RV32IBB-NEXT: sll a3, a7, a2 -; RV32IBB-NEXT: sub a1, a6, a1 -; RV32IBB-NEXT: srli a4, a0, 1 -; RV32IBB-NEXT: srl a1, a4, a1 -; RV32IBB-NEXT: or a1, a3, a1 -; RV32IBB-NEXT: .LBB7_3: -; RV32IBB-NEXT: neg a5, a2 -; RV32IBB-NEXT: andi a3, a5, 63 -; RV32IBB-NEXT: addi a4, a3, -32 -; RV32IBB-NEXT: bltz a4, .LBB7_5 -; RV32IBB-NEXT: # %bb.4: -; RV32IBB-NEXT: srl a3, a7, a4 -; RV32IBB-NEXT: bltz t0, .LBB7_6 -; RV32IBB-NEXT: j .LBB7_7 -; RV32IBB-NEXT: .LBB7_5: -; RV32IBB-NEXT: srl a4, a7, a5 -; RV32IBB-NEXT: or a1, a1, a4 -; RV32IBB-NEXT: srl a4, a0, a5 -; RV32IBB-NEXT: sub a3, a6, a3 -; RV32IBB-NEXT: slli a5, a7, 1 -; RV32IBB-NEXT: sll a3, a5, a3 -; RV32IBB-NEXT: or a3, a4, a3 -; RV32IBB-NEXT: bgez t0, .LBB7_7 -; RV32IBB-NEXT: .LBB7_6: +; RV32IBB-NEXT: sll a6, a4, a2 +; RV32IBB-NEXT: bnez a3, .LBB7_4 +; RV32IBB-NEXT: # %bb.3: +; RV32IBB-NEXT: mv a0, a1 +; RV32IBB-NEXT: .LBB7_4: +; RV32IBB-NEXT: srli a1, a0, 1 +; RV32IBB-NEXT: not a5, a2 +; RV32IBB-NEXT: srl a1, a1, a5 +; RV32IBB-NEXT: or a3, a6, a1 ; RV32IBB-NEXT: sll a0, a0, a2 -; RV32IBB-NEXT: or a3, a3, a0 -; RV32IBB-NEXT: .LBB7_7: +; RV32IBB-NEXT: srli a1, a4, 1 +; RV32IBB-NEXT: srl a1, a1, a5 +; RV32IBB-NEXT: or a1, a0, a1 ; RV32IBB-NEXT: mv a0, a3 ; RV32IBB-NEXT: ret ; ; RV32IBP-LABEL: rol_i64: ; RV32IBP: # %bb.0: -; RV32IBP-NEXT: mv a7, a1 -; RV32IBP-NEXT: andi a1, a2, 63 -; RV32IBP-NEXT: addi t0, a1, -32 -; RV32IBP-NEXT: addi a6, zero, 31 -; RV32IBP-NEXT: bltz t0, .LBB7_2 +; RV32IBP-NEXT: srli a3, a2, 5 +; RV32IBP-NEXT: andi a3, a3, 1 +; RV32IBP-NEXT: mv a4, a1 +; RV32IBP-NEXT: bnez a3, .LBB7_2 ; RV32IBP-NEXT: # %bb.1: -; RV32IBP-NEXT: sll a1, a0, t0 -; RV32IBP-NEXT: j .LBB7_3 +; RV32IBP-NEXT: mv a4, a0 ; RV32IBP-NEXT: .LBB7_2: -; RV32IBP-NEXT: sll a3, a7, a2 -; RV32IBP-NEXT: sub a1, a6, a1 -; RV32IBP-NEXT: srli a4, a0, 1 -; RV32IBP-NEXT: srl a1, a4, a1 -; RV32IBP-NEXT: or a1, a3, a1 -; RV32IBP-NEXT: .LBB7_3: -; RV32IBP-NEXT: neg a5, a2 -; RV32IBP-NEXT: andi a3, a5, 63 -; RV32IBP-NEXT: addi a4, a3, -32 -; RV32IBP-NEXT: bltz a4, .LBB7_5 -; RV32IBP-NEXT: # %bb.4: -; RV32IBP-NEXT: srl a3, a7, a4 -; RV32IBP-NEXT: bltz t0, .LBB7_6 -; RV32IBP-NEXT: j .LBB7_7 -; RV32IBP-NEXT: .LBB7_5: -; RV32IBP-NEXT: srl a4, a7, a5 -; RV32IBP-NEXT: or a1, a1, a4 -; RV32IBP-NEXT: srl a4, a0, a5 -; RV32IBP-NEXT: sub a3, a6, a3 -; RV32IBP-NEXT: slli a5, a7, 1 -; RV32IBP-NEXT: sll a3, a5, a3 -; RV32IBP-NEXT: or a3, a4, a3 -; RV32IBP-NEXT: bgez t0, .LBB7_7 -; RV32IBP-NEXT: .LBB7_6: +; RV32IBP-NEXT: sll a6, a4, a2 +; RV32IBP-NEXT: bnez a3, .LBB7_4 +; RV32IBP-NEXT: # %bb.3: +; RV32IBP-NEXT: mv a0, a1 +; RV32IBP-NEXT: .LBB7_4: +; RV32IBP-NEXT: srli a1, a0, 1 +; RV32IBP-NEXT: not a5, a2 +; RV32IBP-NEXT: srl a1, a1, a5 +; RV32IBP-NEXT: or a3, a6, a1 ; RV32IBP-NEXT: sll a0, a0, a2 -; RV32IBP-NEXT: or a3, a3, a0 -; RV32IBP-NEXT: .LBB7_7: +; RV32IBP-NEXT: srli a1, a4, 1 +; RV32IBP-NEXT: srl a1, a1, a5 +; RV32IBP-NEXT: or a1, a0, a1 ; RV32IBP-NEXT: mv a0, a3 ; RV32IBP-NEXT: ret %or = tail call i64 @llvm.fshl.i64(i64 %a, i64 %a, i64 %b) @@ -416,160 +343,87 @@ define i64 @ror_i64(i64 %a, i64 %b) nounwind { ; RV32I-LABEL: ror_i64: ; RV32I: # %bb.0: -; RV32I-NEXT: mv t0, a0 -; RV32I-NEXT: andi a0, a2, 63 -; RV32I-NEXT: addi a7, a0, -32 -; RV32I-NEXT: addi a6, zero, 31 -; RV32I-NEXT: bltz a7, .LBB9_2 +; RV32I-NEXT: srli a3, a2, 5 +; RV32I-NEXT: andi a3, a3, 1 +; RV32I-NEXT: mv a4, a1 +; RV32I-NEXT: bnez a3, .LBB9_2 ; RV32I-NEXT: # %bb.1: -; RV32I-NEXT: srl a0, a1, a7 -; RV32I-NEXT: j .LBB9_3 +; RV32I-NEXT: mv a4, a0 ; RV32I-NEXT: .LBB9_2: -; RV32I-NEXT: srl a3, t0, a2 -; RV32I-NEXT: sub a0, a6, a0 -; RV32I-NEXT: slli a4, a1, 1 -; RV32I-NEXT: sll a0, a4, a0 -; RV32I-NEXT: or a0, a3, a0 -; RV32I-NEXT: .LBB9_3: -; RV32I-NEXT: neg a5, a2 -; RV32I-NEXT: andi a4, a5, 63 -; RV32I-NEXT: addi a3, a4, -32 -; RV32I-NEXT: bltz a3, .LBB9_5 -; RV32I-NEXT: # %bb.4: -; RV32I-NEXT: sll a3, t0, a3 -; RV32I-NEXT: bltz a7, .LBB9_6 -; RV32I-NEXT: j .LBB9_7 -; RV32I-NEXT: .LBB9_5: -; RV32I-NEXT: sll a3, t0, a5 -; RV32I-NEXT: or a0, a0, a3 -; RV32I-NEXT: sll a3, a1, a5 -; RV32I-NEXT: sub a4, a6, a4 -; RV32I-NEXT: srli a5, t0, 1 -; RV32I-NEXT: srl a4, a5, a4 -; RV32I-NEXT: or a3, a3, a4 -; RV32I-NEXT: bgez a7, .LBB9_7 -; RV32I-NEXT: .LBB9_6: -; RV32I-NEXT: srl a1, a1, a2 -; RV32I-NEXT: or a3, a3, a1 -; RV32I-NEXT: .LBB9_7: -; RV32I-NEXT: mv a1, a3 +; RV32I-NEXT: srl a6, a4, a2 +; RV32I-NEXT: bnez a3, .LBB9_4 +; RV32I-NEXT: # %bb.3: +; RV32I-NEXT: mv a0, a1 +; RV32I-NEXT: .LBB9_4: +; RV32I-NEXT: slli a1, a0, 1 +; RV32I-NEXT: not a5, a2 +; RV32I-NEXT: sll a1, a1, a5 +; RV32I-NEXT: or a3, a1, a6 +; RV32I-NEXT: srl a0, a0, a2 +; RV32I-NEXT: slli a1, a4, 1 +; RV32I-NEXT: sll a1, a1, a5 +; RV32I-NEXT: or a1, a1, a0 +; RV32I-NEXT: mv a0, a3 ; RV32I-NEXT: ret ; ; RV32IB-LABEL: ror_i64: ; RV32IB: # %bb.0: -; RV32IB-NEXT: srl a7, a0, a2 -; RV32IB-NEXT: andi a4, a2, 63 -; RV32IB-NEXT: addi a6, zero, 31 -; RV32IB-NEXT: sub a5, a6, a4 -; RV32IB-NEXT: slli a3, a1, 1 -; RV32IB-NEXT: sll a3, a3, a5 -; RV32IB-NEXT: or a7, a7, a3 -; RV32IB-NEXT: addi t1, a4, -32 -; RV32IB-NEXT: srl a5, a1, t1 -; RV32IB-NEXT: slti a3, t1, 0 -; RV32IB-NEXT: cmov a7, a3, a7, a5 -; RV32IB-NEXT: neg a5, a2 -; RV32IB-NEXT: sll t0, a0, a5 -; RV32IB-NEXT: andi t2, a5, 63 -; RV32IB-NEXT: addi a4, t2, -32 -; RV32IB-NEXT: srai a3, a4, 31 -; RV32IB-NEXT: and a3, a3, t0 -; RV32IB-NEXT: or a7, a7, a3 -; RV32IB-NEXT: sll t0, a1, a5 -; RV32IB-NEXT: sub a5, a6, t2 -; RV32IB-NEXT: srli a3, a0, 1 -; RV32IB-NEXT: srl a3, a3, a5 -; RV32IB-NEXT: or a3, t0, a3 -; RV32IB-NEXT: sll a0, a0, a4 -; RV32IB-NEXT: slti a4, a4, 0 -; RV32IB-NEXT: cmov a0, a4, a3, a0 -; RV32IB-NEXT: srl a1, a1, a2 -; RV32IB-NEXT: srai a2, t1, 31 -; RV32IB-NEXT: and a1, a2, a1 -; RV32IB-NEXT: or a1, a1, a0 -; RV32IB-NEXT: mv a0, a7 +; RV32IB-NEXT: bexti a3, a2, 5 +; RV32IB-NEXT: cmov a4, a3, a0, a1 +; RV32IB-NEXT: cmov a1, a3, a1, a0 +; RV32IB-NEXT: andi a2, a2, 31 +; RV32IB-NEXT: fsr a0, a1, a4, a2 +; RV32IB-NEXT: fsr a1, a4, a1, a2 ; RV32IB-NEXT: ret ; ; RV32IBB-LABEL: ror_i64: ; RV32IBB: # %bb.0: -; RV32IBB-NEXT: mv t0, a0 -; RV32IBB-NEXT: andi a0, a2, 63 -; RV32IBB-NEXT: addi a7, a0, -32 -; RV32IBB-NEXT: addi a6, zero, 31 -; RV32IBB-NEXT: bltz a7, .LBB9_2 +; RV32IBB-NEXT: srli a3, a2, 5 +; RV32IBB-NEXT: andi a3, a3, 1 +; RV32IBB-NEXT: mv a4, a1 +; RV32IBB-NEXT: bnez a3, .LBB9_2 ; RV32IBB-NEXT: # %bb.1: -; RV32IBB-NEXT: srl a0, a1, a7 -; RV32IBB-NEXT: j .LBB9_3 +; RV32IBB-NEXT: mv a4, a0 ; RV32IBB-NEXT: .LBB9_2: -; RV32IBB-NEXT: srl a3, t0, a2 -; RV32IBB-NEXT: sub a0, a6, a0 -; RV32IBB-NEXT: slli a4, a1, 1 -; RV32IBB-NEXT: sll a0, a4, a0 -; RV32IBB-NEXT: or a0, a3, a0 -; RV32IBB-NEXT: .LBB9_3: -; RV32IBB-NEXT: neg a5, a2 -; RV32IBB-NEXT: andi a4, a5, 63 -; RV32IBB-NEXT: addi a3, a4, -32 -; RV32IBB-NEXT: bltz a3, .LBB9_5 -; RV32IBB-NEXT: # %bb.4: -; RV32IBB-NEXT: sll a3, t0, a3 -; RV32IBB-NEXT: bltz a7, .LBB9_6 -; RV32IBB-NEXT: j .LBB9_7 -; RV32IBB-NEXT: .LBB9_5: -; RV32IBB-NEXT: sll a3, t0, a5 -; RV32IBB-NEXT: or a0, a0, a3 -; RV32IBB-NEXT: sll a3, a1, a5 -; RV32IBB-NEXT: sub a4, a6, a4 -; RV32IBB-NEXT: srli a5, t0, 1 -; RV32IBB-NEXT: srl a4, a5, a4 -; RV32IBB-NEXT: or a3, a3, a4 -; RV32IBB-NEXT: bgez a7, .LBB9_7 -; RV32IBB-NEXT: .LBB9_6: -; RV32IBB-NEXT: srl a1, a1, a2 -; RV32IBB-NEXT: or a3, a3, a1 -; RV32IBB-NEXT: .LBB9_7: -; RV32IBB-NEXT: mv a1, a3 +; RV32IBB-NEXT: srl a6, a4, a2 +; RV32IBB-NEXT: bnez a3, .LBB9_4 +; RV32IBB-NEXT: # %bb.3: +; RV32IBB-NEXT: mv a0, a1 +; RV32IBB-NEXT: .LBB9_4: +; RV32IBB-NEXT: slli a1, a0, 1 +; RV32IBB-NEXT: not a5, a2 +; RV32IBB-NEXT: sll a1, a1, a5 +; RV32IBB-NEXT: or a3, a1, a6 +; RV32IBB-NEXT: srl a0, a0, a2 +; RV32IBB-NEXT: slli a1, a4, 1 +; RV32IBB-NEXT: sll a1, a1, a5 +; RV32IBB-NEXT: or a1, a1, a0 +; RV32IBB-NEXT: mv a0, a3 ; RV32IBB-NEXT: ret ; ; RV32IBP-LABEL: ror_i64: ; RV32IBP: # %bb.0: -; RV32IBP-NEXT: mv t0, a0 -; RV32IBP-NEXT: andi a0, a2, 63 -; RV32IBP-NEXT: addi a7, a0, -32 -; RV32IBP-NEXT: addi a6, zero, 31 -; RV32IBP-NEXT: bltz a7, .LBB9_2 +; RV32IBP-NEXT: srli a3, a2, 5 +; RV32IBP-NEXT: andi a3, a3, 1 +; RV32IBP-NEXT: mv a4, a1 +; RV32IBP-NEXT: bnez a3, .LBB9_2 ; RV32IBP-NEXT: # %bb.1: -; RV32IBP-NEXT: srl a0, a1, a7 -; RV32IBP-NEXT: j .LBB9_3 +; RV32IBP-NEXT: mv a4, a0 ; RV32IBP-NEXT: .LBB9_2: -; RV32IBP-NEXT: srl a3, t0, a2 -; RV32IBP-NEXT: sub a0, a6, a0 -; RV32IBP-NEXT: slli a4, a1, 1 -; RV32IBP-NEXT: sll a0, a4, a0 -; RV32IBP-NEXT: or a0, a3, a0 -; RV32IBP-NEXT: .LBB9_3: -; RV32IBP-NEXT: neg a5, a2 -; RV32IBP-NEXT: andi a4, a5, 63 -; RV32IBP-NEXT: addi a3, a4, -32 -; RV32IBP-NEXT: bltz a3, .LBB9_5 -; RV32IBP-NEXT: # %bb.4: -; RV32IBP-NEXT: sll a3, t0, a3 -; RV32IBP-NEXT: bltz a7, .LBB9_6 -; RV32IBP-NEXT: j .LBB9_7 -; RV32IBP-NEXT: .LBB9_5: -; RV32IBP-NEXT: sll a3, t0, a5 -; RV32IBP-NEXT: or a0, a0, a3 -; RV32IBP-NEXT: sll a3, a1, a5 -; RV32IBP-NEXT: sub a4, a6, a4 -; RV32IBP-NEXT: srli a5, t0, 1 -; RV32IBP-NEXT: srl a4, a5, a4 -; RV32IBP-NEXT: or a3, a3, a4 -; RV32IBP-NEXT: bgez a7, .LBB9_7 -; RV32IBP-NEXT: .LBB9_6: -; RV32IBP-NEXT: srl a1, a1, a2 -; RV32IBP-NEXT: or a3, a3, a1 -; RV32IBP-NEXT: .LBB9_7: -; RV32IBP-NEXT: mv a1, a3 +; RV32IBP-NEXT: srl a6, a4, a2 +; RV32IBP-NEXT: bnez a3, .LBB9_4 +; RV32IBP-NEXT: # %bb.3: +; RV32IBP-NEXT: mv a0, a1 +; RV32IBP-NEXT: .LBB9_4: +; RV32IBP-NEXT: slli a1, a0, 1 +; RV32IBP-NEXT: not a5, a2 +; RV32IBP-NEXT: sll a1, a1, a5 +; RV32IBP-NEXT: or a3, a1, a6 +; RV32IBP-NEXT: srl a0, a0, a2 +; RV32IBP-NEXT: slli a1, a4, 1 +; RV32IBP-NEXT: sll a1, a1, a5 +; RV32IBP-NEXT: or a1, a1, a0 +; RV32IBP-NEXT: mv a0, a3 ; RV32IBP-NEXT: ret %or = tail call i64 @llvm.fshr.i64(i64 %a, i64 %a, i64 %b) ret i64 %or @@ -630,8 +484,8 @@ define i64 @rori_i64(i64 %a) nounwind { ; RV32I-LABEL: rori_i64: ; RV32I: # %bb.0: -; RV32I-NEXT: slli a2, a1, 31 -; RV32I-NEXT: srli a3, a0, 1 +; RV32I-NEXT: srli a2, a0, 1 +; RV32I-NEXT: slli a3, a1, 31 ; RV32I-NEXT: or a2, a3, a2 ; RV32I-NEXT: srli a1, a1, 1 ; RV32I-NEXT: slli a0, a0, 31 @@ -648,8 +502,8 @@ ; ; RV32IBB-LABEL: rori_i64: ; RV32IBB: # %bb.0: -; RV32IBB-NEXT: slli a2, a1, 31 -; RV32IBB-NEXT: srli a3, a0, 1 +; RV32IBB-NEXT: srli a2, a0, 1 +; RV32IBB-NEXT: slli a3, a1, 31 ; RV32IBB-NEXT: or a2, a3, a2 ; RV32IBB-NEXT: srli a1, a1, 1 ; RV32IBB-NEXT: slli a0, a0, 31 @@ -659,8 +513,8 @@ ; ; RV32IBP-LABEL: rori_i64: ; RV32IBP: # %bb.0: -; RV32IBP-NEXT: slli a2, a1, 31 -; RV32IBP-NEXT: srli a3, a0, 1 +; RV32IBP-NEXT: srli a2, a0, 1 +; RV32IBP-NEXT: slli a3, a1, 31 ; RV32IBP-NEXT: or a2, a3, a2 ; RV32IBP-NEXT: srli a1, a1, 1 ; RV32IBP-NEXT: slli a0, a0, 31 @@ -674,8 +528,8 @@ define i64 @rori_i64_fshr(i64 %a) nounwind { ; RV32I-LABEL: rori_i64_fshr: ; RV32I: # %bb.0: -; RV32I-NEXT: slli a2, a0, 1 -; RV32I-NEXT: srli a3, a1, 31 +; RV32I-NEXT: srli a2, a1, 31 +; RV32I-NEXT: slli a3, a0, 1 ; RV32I-NEXT: or a2, a3, a2 ; RV32I-NEXT: srli a0, a0, 31 ; RV32I-NEXT: slli a1, a1, 1 @@ -692,8 +546,8 @@ ; ; RV32IBB-LABEL: rori_i64_fshr: ; RV32IBB: # %bb.0: -; RV32IBB-NEXT: slli a2, a0, 1 -; RV32IBB-NEXT: srli a3, a1, 31 +; RV32IBB-NEXT: srli a2, a1, 31 +; RV32IBB-NEXT: slli a3, a0, 1 ; RV32IBB-NEXT: or a2, a3, a2 ; RV32IBB-NEXT: srli a0, a0, 31 ; RV32IBB-NEXT: slli a1, a1, 1 @@ -703,8 +557,8 @@ ; ; RV32IBP-LABEL: rori_i64_fshr: ; RV32IBP: # %bb.0: -; RV32IBP-NEXT: slli a2, a0, 1 -; RV32IBP-NEXT: srli a3, a1, 31 +; RV32IBP-NEXT: srli a2, a1, 31 +; RV32IBP-NEXT: slli a3, a0, 1 ; RV32IBP-NEXT: or a2, a3, a2 ; RV32IBP-NEXT: srli a0, a0, 31 ; RV32IBP-NEXT: slli a1, a1, 1 diff --git a/llvm/test/CodeGen/RISCV/rv32zbt.ll b/llvm/test/CodeGen/RISCV/rv32zbt.ll --- a/llvm/test/CodeGen/RISCV/rv32zbt.ll +++ b/llvm/test/CodeGen/RISCV/rv32zbt.ll @@ -443,119 +443,56 @@ define i64 @fshl_i64(i64 %a, i64 %b, i64 %c) nounwind { ; RV32I-LABEL: fshl_i64: ; RV32I: # %bb.0: -; RV32I-NEXT: andi a5, a4, 63 -; RV32I-NEXT: addi a7, a5, -32 -; RV32I-NEXT: addi a6, zero, 31 -; RV32I-NEXT: bltz a7, .LBB13_2 +; RV32I-NEXT: srli a5, a4, 5 +; RV32I-NEXT: andi a5, a5, 1 +; RV32I-NEXT: mv a6, a3 +; RV32I-NEXT: bnez a5, .LBB13_2 ; RV32I-NEXT: # %bb.1: -; RV32I-NEXT: sll a1, a0, a7 -; RV32I-NEXT: j .LBB13_3 +; RV32I-NEXT: mv a6, a0 ; RV32I-NEXT: .LBB13_2: -; RV32I-NEXT: sll t0, a1, a4 -; RV32I-NEXT: sub a5, a6, a5 -; RV32I-NEXT: srli a1, a0, 1 -; RV32I-NEXT: srl a1, a1, a5 -; RV32I-NEXT: or a1, t0, a1 -; RV32I-NEXT: .LBB13_3: -; RV32I-NEXT: not t2, a4 -; RV32I-NEXT: andi t1, t2, 63 -; RV32I-NEXT: addi a5, t1, -32 -; RV32I-NEXT: srli t0, a3, 1 -; RV32I-NEXT: bltz a5, .LBB13_5 -; RV32I-NEXT: # %bb.4: -; RV32I-NEXT: srl a2, t0, a5 -; RV32I-NEXT: bltz a7, .LBB13_6 -; RV32I-NEXT: j .LBB13_7 -; RV32I-NEXT: .LBB13_5: -; RV32I-NEXT: srl a5, t0, t2 -; RV32I-NEXT: or a1, a1, a5 -; RV32I-NEXT: slli a3, a3, 31 +; RV32I-NEXT: sll a7, a6, a4 +; RV32I-NEXT: bnez a5, .LBB13_4 +; RV32I-NEXT: # %bb.3: +; RV32I-NEXT: mv a2, a3 +; RV32I-NEXT: .LBB13_4: ; RV32I-NEXT: srli a2, a2, 1 -; RV32I-NEXT: or a2, a2, a3 -; RV32I-NEXT: srl a2, a2, t2 -; RV32I-NEXT: sub a3, a6, t1 -; RV32I-NEXT: slli a5, t0, 1 -; RV32I-NEXT: sll a3, a5, a3 -; RV32I-NEXT: or a2, a2, a3 -; RV32I-NEXT: bgez a7, .LBB13_7 +; RV32I-NEXT: not a3, a4 +; RV32I-NEXT: srl a2, a2, a3 +; RV32I-NEXT: or a2, a7, a2 +; RV32I-NEXT: bnez a5, .LBB13_6 +; RV32I-NEXT: # %bb.5: +; RV32I-NEXT: mv a0, a1 ; RV32I-NEXT: .LBB13_6: ; RV32I-NEXT: sll a0, a0, a4 -; RV32I-NEXT: or a2, a2, a0 -; RV32I-NEXT: .LBB13_7: +; RV32I-NEXT: srli a1, a6, 1 +; RV32I-NEXT: srl a1, a1, a3 +; RV32I-NEXT: or a1, a0, a1 ; RV32I-NEXT: mv a0, a2 ; RV32I-NEXT: ret ; ; RV32IB-LABEL: fshl_i64: ; RV32IB: # %bb.0: -; RV32IB-NEXT: sll a7, a1, a4 -; RV32IB-NEXT: andi a5, a4, 63 -; RV32IB-NEXT: addi a6, zero, 31 -; RV32IB-NEXT: sub t0, a6, a5 -; RV32IB-NEXT: srli a1, a0, 1 -; RV32IB-NEXT: srl a1, a1, t0 -; RV32IB-NEXT: or a7, a7, a1 -; RV32IB-NEXT: addi t1, a5, -32 -; RV32IB-NEXT: sll t0, a0, t1 -; RV32IB-NEXT: slti a1, t1, 0 -; RV32IB-NEXT: cmov t0, a1, a7, t0 -; RV32IB-NEXT: not a7, a4 -; RV32IB-NEXT: srli t4, a3, 1 -; RV32IB-NEXT: srl t2, t4, a7 -; RV32IB-NEXT: addi a1, zero, 63 -; RV32IB-NEXT: andn t3, a1, a4 -; RV32IB-NEXT: addi a5, t3, -32 -; RV32IB-NEXT: srai a1, a5, 31 -; RV32IB-NEXT: and a1, a1, t2 -; RV32IB-NEXT: or a1, t0, a1 -; RV32IB-NEXT: fsri a2, a2, a3, 1 -; RV32IB-NEXT: srl a7, a2, a7 -; RV32IB-NEXT: sub a3, a6, t3 -; RV32IB-NEXT: slli a2, t4, 1 -; RV32IB-NEXT: sll a2, a2, a3 -; RV32IB-NEXT: or a2, a7, a2 -; RV32IB-NEXT: srl a3, t4, a5 -; RV32IB-NEXT: slti a5, a5, 0 +; RV32IB-NEXT: bexti a5, a4, 5 ; RV32IB-NEXT: cmov a2, a5, a2, a3 -; RV32IB-NEXT: sll a0, a0, a4 -; RV32IB-NEXT: srai a3, t1, 31 -; RV32IB-NEXT: and a0, a3, a0 -; RV32IB-NEXT: or a0, a0, a2 +; RV32IB-NEXT: cmov a3, a5, a3, a0 +; RV32IB-NEXT: andi a4, a4, 31 +; RV32IB-NEXT: fsl a2, a3, a2, a4 +; RV32IB-NEXT: cmov a0, a5, a0, a1 +; RV32IB-NEXT: fsl a1, a0, a3, a4 +; RV32IB-NEXT: mv a0, a2 ; RV32IB-NEXT: ret ; ; RV32IBT-LABEL: fshl_i64: ; RV32IBT: # %bb.0: -; RV32IBT-NEXT: sll a7, a1, a4 -; RV32IBT-NEXT: andi a5, a4, 63 -; RV32IBT-NEXT: addi a6, zero, 31 -; RV32IBT-NEXT: sub t0, a6, a5 -; RV32IBT-NEXT: srli a1, a0, 1 -; RV32IBT-NEXT: srl a1, a1, t0 -; RV32IBT-NEXT: or a7, a7, a1 -; RV32IBT-NEXT: addi t1, a5, -32 -; RV32IBT-NEXT: sll t0, a0, t1 -; RV32IBT-NEXT: slti a1, t1, 0 -; RV32IBT-NEXT: cmov t0, a1, a7, t0 -; RV32IBT-NEXT: not a5, a4 -; RV32IBT-NEXT: srli a7, a3, 1 -; RV32IBT-NEXT: srl t4, a7, a5 -; RV32IBT-NEXT: andi t2, a5, 63 -; RV32IBT-NEXT: addi t3, t2, -32 -; RV32IBT-NEXT: srai a1, t3, 31 -; RV32IBT-NEXT: and a1, a1, t4 -; RV32IBT-NEXT: or a1, t0, a1 -; RV32IBT-NEXT: fsri a2, a2, a3, 1 -; RV32IBT-NEXT: srl a2, a2, a5 -; RV32IBT-NEXT: sub a3, a6, t2 -; RV32IBT-NEXT: slli a5, a7, 1 -; RV32IBT-NEXT: sll a3, a5, a3 -; RV32IBT-NEXT: or a2, a2, a3 -; RV32IBT-NEXT: srl a3, a7, t3 -; RV32IBT-NEXT: slti a5, t3, 0 +; RV32IBT-NEXT: srli a5, a4, 5 +; RV32IBT-NEXT: andi a5, a5, 1 ; RV32IBT-NEXT: cmov a2, a5, a2, a3 -; RV32IBT-NEXT: sll a0, a0, a4 -; RV32IBT-NEXT: srai a3, t1, 31 -; RV32IBT-NEXT: and a0, a3, a0 -; RV32IBT-NEXT: or a0, a0, a2 +; RV32IBT-NEXT: cmov a3, a5, a3, a0 +; RV32IBT-NEXT: andi a4, a4, 31 +; RV32IBT-NEXT: fsl a2, a3, a2, a4 +; RV32IBT-NEXT: cmov a0, a5, a0, a1 +; RV32IBT-NEXT: fsl a1, a0, a3, a4 +; RV32IBT-NEXT: mv a0, a2 ; RV32IBT-NEXT: ret %1 = tail call i64 @llvm.fshl.i64(i64 %a, i64 %b, i64 %c) ret i64 %1 @@ -599,125 +536,57 @@ define i64 @fshr_i64(i64 %a, i64 %b, i64 %c) nounwind { ; RV32I-LABEL: fshr_i64: ; RV32I: # %bb.0: -; RV32I-NEXT: mv t0, a0 -; RV32I-NEXT: andi a0, a4, 63 -; RV32I-NEXT: addi a6, a0, -32 -; RV32I-NEXT: addi a7, zero, 31 -; RV32I-NEXT: bltz a6, .LBB15_2 +; RV32I-NEXT: srli a5, a4, 5 +; RV32I-NEXT: andi a5, a5, 1 +; RV32I-NEXT: mv a6, a3 +; RV32I-NEXT: bnez a5, .LBB15_2 ; RV32I-NEXT: # %bb.1: -; RV32I-NEXT: srl a0, a3, a6 -; RV32I-NEXT: j .LBB15_3 +; RV32I-NEXT: mv a6, a2 ; RV32I-NEXT: .LBB15_2: -; RV32I-NEXT: srl a2, a2, a4 -; RV32I-NEXT: sub a0, a7, a0 -; RV32I-NEXT: slli a5, a3, 1 -; RV32I-NEXT: sll a0, a5, a0 -; RV32I-NEXT: or a0, a2, a0 -; RV32I-NEXT: .LBB15_3: -; RV32I-NEXT: not t2, a4 -; RV32I-NEXT: andi a5, t2, 63 -; RV32I-NEXT: addi a2, a5, -32 -; RV32I-NEXT: slli t1, t0, 1 -; RV32I-NEXT: bltz a2, .LBB15_5 -; RV32I-NEXT: # %bb.4: -; RV32I-NEXT: sll a1, t1, a2 -; RV32I-NEXT: bltz a6, .LBB15_6 -; RV32I-NEXT: j .LBB15_7 -; RV32I-NEXT: .LBB15_5: -; RV32I-NEXT: sll a2, t1, t2 -; RV32I-NEXT: or a0, a0, a2 -; RV32I-NEXT: lui a2, 524288 -; RV32I-NEXT: addi a2, a2, -1 -; RV32I-NEXT: and a2, t0, a2 -; RV32I-NEXT: sub a5, a7, a5 -; RV32I-NEXT: srl a2, a2, a5 -; RV32I-NEXT: srli a5, t0, 31 -; RV32I-NEXT: slli a1, a1, 1 -; RV32I-NEXT: or a1, a1, a5 -; RV32I-NEXT: sll a1, a1, t2 -; RV32I-NEXT: or a1, a1, a2 -; RV32I-NEXT: bgez a6, .LBB15_7 +; RV32I-NEXT: srl a6, a6, a4 +; RV32I-NEXT: mv a2, a0 +; RV32I-NEXT: bnez a5, .LBB15_4 +; RV32I-NEXT: # %bb.3: +; RV32I-NEXT: mv a2, a3 +; RV32I-NEXT: .LBB15_4: +; RV32I-NEXT: slli a7, a2, 1 +; RV32I-NEXT: not t0, a4 +; RV32I-NEXT: sll a3, a7, t0 +; RV32I-NEXT: or a6, a3, a6 +; RV32I-NEXT: srl a4, a2, a4 +; RV32I-NEXT: bnez a5, .LBB15_6 +; RV32I-NEXT: # %bb.5: +; RV32I-NEXT: mv a1, a0 ; RV32I-NEXT: .LBB15_6: -; RV32I-NEXT: srl a2, a3, a4 -; RV32I-NEXT: or a1, a1, a2 -; RV32I-NEXT: .LBB15_7: +; RV32I-NEXT: slli a0, a1, 1 +; RV32I-NEXT: sll a0, a0, t0 +; RV32I-NEXT: or a1, a0, a4 +; RV32I-NEXT: mv a0, a6 ; RV32I-NEXT: ret ; ; RV32IB-LABEL: fshr_i64: ; RV32IB: # %bb.0: -; RV32IB-NEXT: srl a7, a2, a4 -; RV32IB-NEXT: andi a5, a4, 63 -; RV32IB-NEXT: addi a6, zero, 31 -; RV32IB-NEXT: sub t0, a6, a5 -; RV32IB-NEXT: slli a2, a3, 1 -; RV32IB-NEXT: sll a2, a2, t0 -; RV32IB-NEXT: or a7, a7, a2 -; RV32IB-NEXT: addi t2, a5, -32 -; RV32IB-NEXT: srl t0, a3, t2 -; RV32IB-NEXT: slti a2, t2, 0 -; RV32IB-NEXT: cmov a7, a2, a7, t0 -; RV32IB-NEXT: not t3, a4 -; RV32IB-NEXT: slli t0, a0, 1 -; RV32IB-NEXT: sll t1, t0, t3 -; RV32IB-NEXT: addi a5, zero, 63 -; RV32IB-NEXT: andn t4, a5, a4 -; RV32IB-NEXT: addi a2, t4, -32 -; RV32IB-NEXT: srai a5, a2, 31 -; RV32IB-NEXT: and a5, a5, t1 -; RV32IB-NEXT: or a7, a5, a7 -; RV32IB-NEXT: fsri a1, a0, a1, 31 -; RV32IB-NEXT: sll a1, a1, t3 -; RV32IB-NEXT: sub a5, a6, t4 -; RV32IB-NEXT: bclri a0, a0, 31 -; RV32IB-NEXT: srl a0, a0, a5 -; RV32IB-NEXT: or a0, a1, a0 -; RV32IB-NEXT: sll a1, t0, a2 -; RV32IB-NEXT: slti a2, a2, 0 -; RV32IB-NEXT: cmov a0, a2, a0, a1 -; RV32IB-NEXT: srl a1, a3, a4 -; RV32IB-NEXT: srai a2, t2, 31 -; RV32IB-NEXT: and a1, a2, a1 -; RV32IB-NEXT: or a1, a0, a1 -; RV32IB-NEXT: mv a0, a7 +; RV32IB-NEXT: bexti a5, a4, 5 +; RV32IB-NEXT: cmov a6, a5, a0, a3 +; RV32IB-NEXT: cmov a2, a5, a3, a2 +; RV32IB-NEXT: andi a3, a4, 31 +; RV32IB-NEXT: fsr a2, a2, a6, a3 +; RV32IB-NEXT: cmov a0, a5, a1, a0 +; RV32IB-NEXT: fsr a1, a6, a0, a3 +; RV32IB-NEXT: mv a0, a2 ; RV32IB-NEXT: ret ; ; RV32IBT-LABEL: fshr_i64: ; RV32IBT: # %bb.0: -; RV32IBT-NEXT: srl a7, a2, a4 -; RV32IBT-NEXT: andi a5, a4, 63 -; RV32IBT-NEXT: addi a6, zero, 31 -; RV32IBT-NEXT: sub t0, a6, a5 -; RV32IBT-NEXT: slli a2, a3, 1 -; RV32IBT-NEXT: sll a2, a2, t0 -; RV32IBT-NEXT: or a7, a7, a2 -; RV32IBT-NEXT: addi t2, a5, -32 -; RV32IBT-NEXT: srl t0, a3, t2 -; RV32IBT-NEXT: slti a2, t2, 0 -; RV32IBT-NEXT: cmov a7, a2, a7, t0 -; RV32IBT-NEXT: not t4, a4 -; RV32IBT-NEXT: slli t0, a0, 1 -; RV32IBT-NEXT: sll t1, t0, t4 -; RV32IBT-NEXT: andi t3, t4, 63 -; RV32IBT-NEXT: addi a5, t3, -32 -; RV32IBT-NEXT: srai a2, a5, 31 -; RV32IBT-NEXT: and a2, a2, t1 -; RV32IBT-NEXT: or a7, a2, a7 -; RV32IBT-NEXT: lui a2, 524288 -; RV32IBT-NEXT: addi a2, a2, -1 -; RV32IBT-NEXT: and t1, a0, a2 -; RV32IBT-NEXT: sub a2, a6, t3 -; RV32IBT-NEXT: srl a2, t1, a2 -; RV32IBT-NEXT: fsri a0, a0, a1, 31 -; RV32IBT-NEXT: sll a0, a0, t4 -; RV32IBT-NEXT: or a0, a0, a2 -; RV32IBT-NEXT: sll a1, t0, a5 -; RV32IBT-NEXT: slti a2, a5, 0 -; RV32IBT-NEXT: cmov a0, a2, a0, a1 -; RV32IBT-NEXT: srl a1, a3, a4 -; RV32IBT-NEXT: srai a2, t2, 31 -; RV32IBT-NEXT: and a1, a2, a1 -; RV32IBT-NEXT: or a1, a0, a1 -; RV32IBT-NEXT: mv a0, a7 +; RV32IBT-NEXT: srli a5, a4, 5 +; RV32IBT-NEXT: andi a5, a5, 1 +; RV32IBT-NEXT: cmov a6, a5, a0, a3 +; RV32IBT-NEXT: cmov a2, a5, a3, a2 +; RV32IBT-NEXT: andi a3, a4, 31 +; RV32IBT-NEXT: fsr a2, a2, a6, a3 +; RV32IBT-NEXT: cmov a0, a5, a1, a0 +; RV32IBT-NEXT: fsr a1, a6, a0, a3 +; RV32IBT-NEXT: mv a0, a2 ; RV32IBT-NEXT: ret %1 = tail call i64 @llvm.fshr.i64(i64 %a, i64 %b, i64 %c) ret i64 %1 @@ -747,8 +616,8 @@ define i64 @fshri_i64(i64 %a, i64 %b) nounwind { ; RV32I-LABEL: fshri_i64: ; RV32I: # %bb.0: -; RV32I-NEXT: slli a1, a3, 27 -; RV32I-NEXT: srli a2, a2, 5 +; RV32I-NEXT: srli a1, a2, 5 +; RV32I-NEXT: slli a2, a3, 27 ; RV32I-NEXT: or a2, a2, a1 ; RV32I-NEXT: srli a1, a3, 5 ; RV32I-NEXT: slli a0, a0, 27 diff --git a/llvm/test/CodeGen/X86/fshl.ll b/llvm/test/CodeGen/X86/fshl.ll --- a/llvm/test/CodeGen/X86/fshl.ll +++ b/llvm/test/CodeGen/X86/fshl.ll @@ -178,102 +178,62 @@ define i64 @var_shift_i64(i64 %x, i64 %y, i64 %z) nounwind { ; X86-FAST-LABEL: var_shift_i64: ; X86-FAST: # %bb.0: -; X86-FAST-NEXT: pushl %ebx ; X86-FAST-NEXT: pushl %edi ; X86-FAST-NEXT: pushl %esi -; X86-FAST-NEXT: movl {{[0-9]+}}(%esp), %ebx -; X86-FAST-NEXT: movl {{[0-9]+}}(%esp), %edx ; X86-FAST-NEXT: movl {{[0-9]+}}(%esp), %esi -; X86-FAST-NEXT: movl {{[0-9]+}}(%esp), %edi -; X86-FAST-NEXT: movb {{[0-9]+}}(%esp), %ch -; X86-FAST-NEXT: movb %ch, %cl -; X86-FAST-NEXT: notb %cl -; X86-FAST-NEXT: shrdl $1, %edi, %esi -; X86-FAST-NEXT: shrl %edi -; X86-FAST-NEXT: shrdl %cl, %edi, %esi -; X86-FAST-NEXT: shrl %cl, %edi +; X86-FAST-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-FAST-NEXT: movl {{[0-9]+}}(%esp), %ecx ; X86-FAST-NEXT: testb $32, %cl -; X86-FAST-NEXT: je .LBB5_2 -; X86-FAST-NEXT: # %bb.1: -; X86-FAST-NEXT: movl %edi, %esi -; X86-FAST-NEXT: xorl %edi, %edi -; X86-FAST-NEXT: .LBB5_2: -; X86-FAST-NEXT: movl %ebx, %eax -; X86-FAST-NEXT: movb %ch, %cl -; X86-FAST-NEXT: shll %cl, %eax -; X86-FAST-NEXT: shldl %cl, %ebx, %edx -; X86-FAST-NEXT: testb $32, %ch -; X86-FAST-NEXT: je .LBB5_4 -; X86-FAST-NEXT: # %bb.3: -; X86-FAST-NEXT: movl %eax, %edx -; X86-FAST-NEXT: xorl %eax, %eax -; X86-FAST-NEXT: .LBB5_4: -; X86-FAST-NEXT: orl %edi, %edx -; X86-FAST-NEXT: orl %esi, %eax +; X86-FAST-NEXT: jne .LBB5_1 +; X86-FAST-NEXT: # %bb.2: +; X86-FAST-NEXT: movl %edx, %edi +; X86-FAST-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-FAST-NEXT: jmp .LBB5_3 +; X86-FAST-NEXT: .LBB5_1: +; X86-FAST-NEXT: movl %esi, %edi +; X86-FAST-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-FAST-NEXT: .LBB5_3: +; X86-FAST-NEXT: movl %edi, %eax +; X86-FAST-NEXT: shldl %cl, %esi, %eax +; X86-FAST-NEXT: # kill: def $cl killed $cl killed $ecx +; X86-FAST-NEXT: shldl %cl, %edi, %edx ; X86-FAST-NEXT: popl %esi ; X86-FAST-NEXT: popl %edi -; X86-FAST-NEXT: popl %ebx ; X86-FAST-NEXT: retl ; ; X86-SLOW-LABEL: var_shift_i64: ; X86-SLOW: # %bb.0: -; X86-SLOW-NEXT: pushl %ebp ; X86-SLOW-NEXT: pushl %ebx ; X86-SLOW-NEXT: pushl %edi ; X86-SLOW-NEXT: pushl %esi +; X86-SLOW-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-SLOW-NEXT: movl {{[0-9]+}}(%esp), %edx ; X86-SLOW-NEXT: movl {{[0-9]+}}(%esp), %ebx -; X86-SLOW-NEXT: movb {{[0-9]+}}(%esp), %cl -; X86-SLOW-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-SLOW-NEXT: movl {{[0-9]+}}(%esp), %esi -; X86-SLOW-NEXT: shrl %eax -; X86-SLOW-NEXT: movl %esi, %edi -; X86-SLOW-NEXT: shll $31, %edi -; X86-SLOW-NEXT: orl %eax, %edi -; X86-SLOW-NEXT: movl %ecx, %eax -; X86-SLOW-NEXT: movb %cl, %ch -; X86-SLOW-NEXT: notb %ch -; X86-SLOW-NEXT: movb %ch, %cl -; X86-SLOW-NEXT: shrl %cl, %edi -; X86-SLOW-NEXT: shrl %esi -; X86-SLOW-NEXT: leal (%esi,%esi), %ebp -; X86-SLOW-NEXT: movb %al, %cl -; X86-SLOW-NEXT: shll %cl, %ebp -; X86-SLOW-NEXT: shll %cl, %ebx -; X86-SLOW-NEXT: movl %edx, %eax -; X86-SLOW-NEXT: shrl %eax -; X86-SLOW-NEXT: movb %ch, %cl -; X86-SLOW-NEXT: shrl %cl, %eax -; X86-SLOW-NEXT: movb {{[0-9]+}}(%esp), %cl -; X86-SLOW-NEXT: shll %cl, %edx -; X86-SLOW-NEXT: testb $32, {{[0-9]+}}(%esp) +; X86-SLOW-NEXT: testb $32, %bl ; X86-SLOW-NEXT: jne .LBB5_1 ; X86-SLOW-NEXT: # %bb.2: -; X86-SLOW-NEXT: orl %eax, %ebx +; X86-SLOW-NEXT: movl %edx, %esi +; X86-SLOW-NEXT: movl {{[0-9]+}}(%esp), %edx ; X86-SLOW-NEXT: jmp .LBB5_3 ; X86-SLOW-NEXT: .LBB5_1: -; X86-SLOW-NEXT: movl %edx, %ebx -; X86-SLOW-NEXT: xorl %edx, %edx +; X86-SLOW-NEXT: movl %eax, %esi +; X86-SLOW-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-SLOW-NEXT: .LBB5_3: -; X86-SLOW-NEXT: movb %ch, %cl +; X86-SLOW-NEXT: movl %esi, %edi +; X86-SLOW-NEXT: movl %ebx, %ecx +; X86-SLOW-NEXT: shll %cl, %edi +; X86-SLOW-NEXT: shrl %eax +; X86-SLOW-NEXT: notb %cl +; X86-SLOW-NEXT: shrl %cl, %eax +; X86-SLOW-NEXT: orl %edi, %eax +; X86-SLOW-NEXT: shrl %esi ; X86-SLOW-NEXT: shrl %cl, %esi -; X86-SLOW-NEXT: testb $32, %ch -; X86-SLOW-NEXT: jne .LBB5_4 -; X86-SLOW-NEXT: # %bb.5: -; X86-SLOW-NEXT: orl %edi, %ebp -; X86-SLOW-NEXT: jmp .LBB5_6 -; X86-SLOW-NEXT: .LBB5_4: -; X86-SLOW-NEXT: movl %esi, %ebp -; X86-SLOW-NEXT: xorl %esi, %esi -; X86-SLOW-NEXT: .LBB5_6: -; X86-SLOW-NEXT: orl %ebp, %edx -; X86-SLOW-NEXT: orl %esi, %ebx -; X86-SLOW-NEXT: movl %edx, %eax -; X86-SLOW-NEXT: movl %ebx, %edx +; X86-SLOW-NEXT: movl %ebx, %ecx +; X86-SLOW-NEXT: shll %cl, %edx +; X86-SLOW-NEXT: orl %esi, %edx ; X86-SLOW-NEXT: popl %esi ; X86-SLOW-NEXT: popl %edi ; X86-SLOW-NEXT: popl %ebx -; X86-SLOW-NEXT: popl %ebp ; X86-SLOW-NEXT: retl ; ; X64-FAST-LABEL: var_shift_i64: diff --git a/llvm/test/CodeGen/X86/fshr.ll b/llvm/test/CodeGen/X86/fshr.ll --- a/llvm/test/CodeGen/X86/fshr.ll +++ b/llvm/test/CodeGen/X86/fshr.ll @@ -175,106 +175,60 @@ define i64 @var_shift_i64(i64 %x, i64 %y, i64 %z) nounwind { ; X86-FAST-LABEL: var_shift_i64: ; X86-FAST: # %bb.0: -; X86-FAST-NEXT: pushl %ebp -; X86-FAST-NEXT: pushl %ebx -; X86-FAST-NEXT: pushl %edi ; X86-FAST-NEXT: pushl %esi ; X86-FAST-NEXT: movl {{[0-9]+}}(%esp), %esi -; X86-FAST-NEXT: movl {{[0-9]+}}(%esp), %ebp ; X86-FAST-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-FAST-NEXT: movl {{[0-9]+}}(%esp), %edx -; X86-FAST-NEXT: movb {{[0-9]+}}(%esp), %bl -; X86-FAST-NEXT: movb %bl, %ch -; X86-FAST-NEXT: notb %ch -; X86-FAST-NEXT: shldl $1, %eax, %edx -; X86-FAST-NEXT: addl %eax, %eax -; X86-FAST-NEXT: movb %ch, %cl -; X86-FAST-NEXT: shldl %cl, %eax, %edx -; X86-FAST-NEXT: movl %ebp, %edi -; X86-FAST-NEXT: movb %bl, %cl -; X86-FAST-NEXT: shrl %cl, %edi -; X86-FAST-NEXT: shrdl %cl, %ebp, %esi -; X86-FAST-NEXT: testb $32, %bl -; X86-FAST-NEXT: je .LBB5_2 -; X86-FAST-NEXT: # %bb.1: -; X86-FAST-NEXT: movl %edi, %esi -; X86-FAST-NEXT: xorl %edi, %edi -; X86-FAST-NEXT: .LBB5_2: -; X86-FAST-NEXT: movb %ch, %cl -; X86-FAST-NEXT: shll %cl, %eax -; X86-FAST-NEXT: testb $32, %ch -; X86-FAST-NEXT: je .LBB5_4 -; X86-FAST-NEXT: # %bb.3: +; X86-FAST-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-FAST-NEXT: testb $32, %cl +; X86-FAST-NEXT: jne .LBB5_1 +; X86-FAST-NEXT: # %bb.2: ; X86-FAST-NEXT: movl %eax, %edx -; X86-FAST-NEXT: xorl %eax, %eax -; X86-FAST-NEXT: .LBB5_4: -; X86-FAST-NEXT: orl %edi, %edx -; X86-FAST-NEXT: orl %esi, %eax +; X86-FAST-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-FAST-NEXT: jmp .LBB5_3 +; X86-FAST-NEXT: .LBB5_1: +; X86-FAST-NEXT: movl %esi, %edx +; X86-FAST-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-FAST-NEXT: .LBB5_3: +; X86-FAST-NEXT: shrdl %cl, %edx, %eax +; X86-FAST-NEXT: # kill: def $cl killed $cl killed $ecx +; X86-FAST-NEXT: shrdl %cl, %esi, %edx ; X86-FAST-NEXT: popl %esi -; X86-FAST-NEXT: popl %edi -; X86-FAST-NEXT: popl %ebx -; X86-FAST-NEXT: popl %ebp ; X86-FAST-NEXT: retl ; ; X86-SLOW-LABEL: var_shift_i64: ; X86-SLOW: # %bb.0: -; X86-SLOW-NEXT: pushl %ebp ; X86-SLOW-NEXT: pushl %ebx ; X86-SLOW-NEXT: pushl %edi ; X86-SLOW-NEXT: pushl %esi -; X86-SLOW-NEXT: pushl %eax -; X86-SLOW-NEXT: movl {{[0-9]+}}(%esp), %ebp -; X86-SLOW-NEXT: movl {{[0-9]+}}(%esp), %esi -; X86-SLOW-NEXT: movb {{[0-9]+}}(%esp), %bl -; X86-SLOW-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-SLOW-NEXT: movl {{[0-9]+}}(%esp), %edx -; X86-SLOW-NEXT: movl %eax, %edi -; X86-SLOW-NEXT: andl $2147483647, %edi # imm = 0x7FFFFFFF -; X86-SLOW-NEXT: movl %ebx, %ecx -; X86-SLOW-NEXT: shrl %cl, %edi -; X86-SLOW-NEXT: movl %eax, %ecx -; X86-SLOW-NEXT: shrl $31, %ecx -; X86-SLOW-NEXT: leal (%ecx,%edx,2), %edx -; X86-SLOW-NEXT: movb %bl, %ch -; X86-SLOW-NEXT: notb %ch -; X86-SLOW-NEXT: movb %ch, %cl -; X86-SLOW-NEXT: shll %cl, %edx -; X86-SLOW-NEXT: movb %bl, %cl -; X86-SLOW-NEXT: shrl %cl, %ebp -; X86-SLOW-NEXT: movl %ebp, (%esp) # 4-byte Spill -; X86-SLOW-NEXT: leal (%esi,%esi), %ebp -; X86-SLOW-NEXT: movb %ch, %cl -; X86-SLOW-NEXT: shll %cl, %ebp -; X86-SLOW-NEXT: movb %bl, %cl -; X86-SLOW-NEXT: shrl %cl, %esi +; X86-SLOW-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-SLOW-NEXT: movl {{[0-9]+}}(%esp), %ebx ; X86-SLOW-NEXT: testb $32, %bl ; X86-SLOW-NEXT: jne .LBB5_1 ; X86-SLOW-NEXT: # %bb.2: -; X86-SLOW-NEXT: orl (%esp), %ebp # 4-byte Folded Reload +; X86-SLOW-NEXT: movl %eax, %esi +; X86-SLOW-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-SLOW-NEXT: jmp .LBB5_3 ; X86-SLOW-NEXT: .LBB5_1: -; X86-SLOW-NEXT: movl %esi, %ebp -; X86-SLOW-NEXT: xorl %esi, %esi +; X86-SLOW-NEXT: movl %edx, %esi +; X86-SLOW-NEXT: movl {{[0-9]+}}(%esp), %edx ; X86-SLOW-NEXT: .LBB5_3: -; X86-SLOW-NEXT: addl %eax, %eax +; X86-SLOW-NEXT: leal (%esi,%esi), %edi +; X86-SLOW-NEXT: movb %bl, %ch +; X86-SLOW-NEXT: notb %ch ; X86-SLOW-NEXT: movb %ch, %cl -; X86-SLOW-NEXT: shll %cl, %eax -; X86-SLOW-NEXT: testb $32, %ch -; X86-SLOW-NEXT: jne .LBB5_4 -; X86-SLOW-NEXT: # %bb.5: -; X86-SLOW-NEXT: orl %edi, %edx -; X86-SLOW-NEXT: jmp .LBB5_6 -; X86-SLOW-NEXT: .LBB5_4: -; X86-SLOW-NEXT: movl %eax, %edx -; X86-SLOW-NEXT: xorl %eax, %eax -; X86-SLOW-NEXT: .LBB5_6: +; X86-SLOW-NEXT: shll %cl, %edi +; X86-SLOW-NEXT: movb %bl, %cl +; X86-SLOW-NEXT: shrl %cl, %eax +; X86-SLOW-NEXT: orl %edi, %eax +; X86-SLOW-NEXT: shrl %cl, %esi +; X86-SLOW-NEXT: addl %edx, %edx +; X86-SLOW-NEXT: movb %ch, %cl +; X86-SLOW-NEXT: shll %cl, %edx ; X86-SLOW-NEXT: orl %esi, %edx -; X86-SLOW-NEXT: orl %ebp, %eax -; X86-SLOW-NEXT: addl $4, %esp ; X86-SLOW-NEXT: popl %esi ; X86-SLOW-NEXT: popl %edi ; X86-SLOW-NEXT: popl %ebx -; X86-SLOW-NEXT: popl %ebp ; X86-SLOW-NEXT: retl ; ; X64-FAST-LABEL: var_shift_i64: @@ -400,9 +354,9 @@ define i64 @const_shift_i64(i64 %x, i64 %y) nounwind { ; X86-FAST-LABEL: const_shift_i64: ; X86-FAST: # %bb.0: +; X86-FAST-NEXT: movl {{[0-9]+}}(%esp), %edx ; X86-FAST-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-FAST-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-FAST-NEXT: movl {{[0-9]+}}(%esp), %edx ; X86-FAST-NEXT: shldl $25, %ecx, %edx ; X86-FAST-NEXT: shrdl $7, %ecx, %eax ; X86-FAST-NEXT: retl diff --git a/llvm/test/CodeGen/X86/funnel-shift-rot.ll b/llvm/test/CodeGen/X86/funnel-shift-rot.ll --- a/llvm/test/CodeGen/X86/funnel-shift-rot.ll +++ b/llvm/test/CodeGen/X86/funnel-shift-rot.ll @@ -276,34 +276,19 @@ define i64 @rotr_i64(i64 %x, i64 %z) nounwind { ; X32-SSE2-LABEL: rotr_i64: ; X32-SSE2: # %bb.0: -; X32-SSE2-NEXT: pushl %ebp -; X32-SSE2-NEXT: pushl %ebx -; X32-SSE2-NEXT: pushl %edi ; X32-SSE2-NEXT: pushl %esi -; X32-SSE2-NEXT: movb {{[0-9]+}}(%esp), %cl ; X32-SSE2-NEXT: movl {{[0-9]+}}(%esp), %esi -; X32-SSE2-NEXT: movl {{[0-9]+}}(%esp), %edx -; X32-SSE2-NEXT: movl %edx, %edi -; X32-SSE2-NEXT: shrl %cl, %edi -; X32-SSE2-NEXT: movl %esi, %ebx -; X32-SSE2-NEXT: shrdl %cl, %edx, %ebx -; X32-SSE2-NEXT: xorl %ebp, %ebp +; X32-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax +; X32-SSE2-NEXT: movl {{[0-9]+}}(%esp), %ecx ; X32-SSE2-NEXT: testb $32, %cl -; X32-SSE2-NEXT: cmovnel %edi, %ebx -; X32-SSE2-NEXT: cmovnel %ebp, %edi -; X32-SSE2-NEXT: negb %cl +; X32-SSE2-NEXT: movl %eax, %edx +; X32-SSE2-NEXT: cmovnel %esi, %edx +; X32-SSE2-NEXT: cmovnel %eax, %esi ; X32-SSE2-NEXT: movl %esi, %eax -; X32-SSE2-NEXT: shll %cl, %eax -; X32-SSE2-NEXT: shldl %cl, %esi, %edx -; X32-SSE2-NEXT: testb $32, %cl -; X32-SSE2-NEXT: cmovnel %eax, %edx -; X32-SSE2-NEXT: cmovnel %ebp, %eax -; X32-SSE2-NEXT: orl %ebx, %eax -; X32-SSE2-NEXT: orl %edi, %edx +; X32-SSE2-NEXT: shrdl %cl, %edx, %eax +; X32-SSE2-NEXT: # kill: def $cl killed $cl killed $ecx +; X32-SSE2-NEXT: shrdl %cl, %esi, %edx ; X32-SSE2-NEXT: popl %esi -; X32-SSE2-NEXT: popl %edi -; X32-SSE2-NEXT: popl %ebx -; X32-SSE2-NEXT: popl %ebp ; X32-SSE2-NEXT: retl ; ; X64-AVX2-LABEL: rotr_i64: diff --git a/llvm/test/CodeGen/X86/funnel-shift.ll b/llvm/test/CodeGen/X86/funnel-shift.ll --- a/llvm/test/CodeGen/X86/funnel-shift.ll +++ b/llvm/test/CodeGen/X86/funnel-shift.ll @@ -41,7 +41,6 @@ define i37 @fshl_i37(i37 %x, i37 %y, i37 %z) nounwind { ; X32-SSE2-LABEL: fshl_i37: ; X32-SSE2: # %bb.0: -; X32-SSE2-NEXT: pushl %ebp ; X32-SSE2-NEXT: pushl %ebx ; X32-SSE2-NEXT: pushl %edi ; X32-SSE2-NEXT: pushl %esi @@ -49,40 +48,31 @@ ; X32-SSE2-NEXT: movl {{[0-9]+}}(%esp), %ebx ; X32-SSE2-NEXT: movl {{[0-9]+}}(%esp), %edi ; X32-SSE2-NEXT: shldl $27, %ebx, %edi -; X32-SSE2-NEXT: shll $27, %ebx -; X32-SSE2-NEXT: shrdl $1, %edi, %ebx -; X32-SSE2-NEXT: shrl %edi ; X32-SSE2-NEXT: pushl $0 ; X32-SSE2-NEXT: pushl $37 ; X32-SSE2-NEXT: pushl {{[0-9]+}}(%esp) ; X32-SSE2-NEXT: pushl {{[0-9]+}}(%esp) ; X32-SSE2-NEXT: calll __umoddi3 ; X32-SSE2-NEXT: addl $16, %esp -; X32-SSE2-NEXT: movl %eax, %edx -; X32-SSE2-NEXT: movl %edx, %ecx -; X32-SSE2-NEXT: notb %cl -; X32-SSE2-NEXT: shrdl %cl, %edi, %ebx -; X32-SSE2-NEXT: shrl %cl, %edi -; X32-SSE2-NEXT: xorl %eax, %eax +; X32-SSE2-NEXT: movl %eax, %ecx ; X32-SSE2-NEXT: testb $32, %cl -; X32-SSE2-NEXT: cmovnel %edi, %ebx -; X32-SSE2-NEXT: cmovnel %eax, %edi -; X32-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-SSE2-NEXT: movl %edx, %ecx -; X32-SSE2-NEXT: shll %cl, %eax -; X32-SSE2-NEXT: movl {{[0-9]+}}(%esp), %ebp -; X32-SSE2-NEXT: shldl %cl, %ebp, %esi -; X32-SSE2-NEXT: testb $32, %dl -; X32-SSE2-NEXT: cmovnel %eax, %esi -; X32-SSE2-NEXT: movl $0, %ecx -; X32-SSE2-NEXT: cmovnel %ecx, %eax -; X32-SSE2-NEXT: orl %ebx, %eax -; X32-SSE2-NEXT: orl %edi, %esi +; X32-SSE2-NEXT: jne .LBB1_1 +; X32-SSE2-NEXT: # %bb.2: +; X32-SSE2-NEXT: movl %edi, %ebx +; X32-SSE2-NEXT: movl %esi, %edi +; X32-SSE2-NEXT: movl {{[0-9]+}}(%esp), %esi +; X32-SSE2-NEXT: jmp .LBB1_3 +; X32-SSE2-NEXT: .LBB1_1: +; X32-SSE2-NEXT: shll $27, %ebx +; X32-SSE2-NEXT: .LBB1_3: +; X32-SSE2-NEXT: movl %edi, %eax +; X32-SSE2-NEXT: shldl %cl, %ebx, %eax +; X32-SSE2-NEXT: # kill: def $cl killed $cl killed $ecx +; X32-SSE2-NEXT: shldl %cl, %edi, %esi ; X32-SSE2-NEXT: movl %esi, %edx ; X32-SSE2-NEXT: popl %esi ; X32-SSE2-NEXT: popl %edi ; X32-SSE2-NEXT: popl %ebx -; X32-SSE2-NEXT: popl %ebp ; X32-SSE2-NEXT: retl ; ; X64-AVX2-LABEL: fshl_i37: @@ -215,50 +205,39 @@ define i37 @fshr_i37(i37 %x, i37 %y, i37 %z) nounwind { ; X32-SSE2-LABEL: fshr_i37: ; X32-SSE2: # %bb.0: -; X32-SSE2-NEXT: pushl %ebp ; X32-SSE2-NEXT: pushl %ebx ; X32-SSE2-NEXT: pushl %edi ; X32-SSE2-NEXT: pushl %esi -; X32-SSE2-NEXT: movl {{[0-9]+}}(%esp), %ebp -; X32-SSE2-NEXT: movl {{[0-9]+}}(%esp), %ebx ; X32-SSE2-NEXT: movl {{[0-9]+}}(%esp), %edi +; X32-SSE2-NEXT: movl {{[0-9]+}}(%esp), %ebx ; X32-SSE2-NEXT: movl {{[0-9]+}}(%esp), %esi -; X32-SSE2-NEXT: shldl $1, %edi, %esi -; X32-SSE2-NEXT: addl %edi, %edi +; X32-SSE2-NEXT: shldl $27, %ebx, %esi ; X32-SSE2-NEXT: pushl $0 ; X32-SSE2-NEXT: pushl $37 ; X32-SSE2-NEXT: pushl {{[0-9]+}}(%esp) ; X32-SSE2-NEXT: pushl {{[0-9]+}}(%esp) ; X32-SSE2-NEXT: calll __umoddi3 ; X32-SSE2-NEXT: addl $16, %esp -; X32-SSE2-NEXT: addb $27, %al -; X32-SSE2-NEXT: movl %eax, %edx -; X32-SSE2-NEXT: notb %dl -; X32-SSE2-NEXT: movl %edx, %ecx -; X32-SSE2-NEXT: shldl %cl, %edi, %esi -; X32-SSE2-NEXT: shldl $27, %ebp, %ebx -; X32-SSE2-NEXT: shll $27, %ebp ; X32-SSE2-NEXT: movl %eax, %ecx -; X32-SSE2-NEXT: shrdl %cl, %ebx, %ebp -; X32-SSE2-NEXT: shrl %cl, %ebx -; X32-SSE2-NEXT: xorl %ecx, %ecx -; X32-SSE2-NEXT: testb $32, %al -; X32-SSE2-NEXT: cmovnel %ebx, %ebp -; X32-SSE2-NEXT: cmovnel %ecx, %ebx -; X32-SSE2-NEXT: xorl %eax, %eax -; X32-SSE2-NEXT: movl %edx, %ecx -; X32-SSE2-NEXT: shll %cl, %edi -; X32-SSE2-NEXT: testb $32, %dl -; X32-SSE2-NEXT: cmovnel %edi, %esi -; X32-SSE2-NEXT: cmovnel %eax, %edi -; X32-SSE2-NEXT: orl %ebp, %edi -; X32-SSE2-NEXT: orl %ebx, %esi -; X32-SSE2-NEXT: movl %edi, %eax +; X32-SSE2-NEXT: addl $27, %ecx +; X32-SSE2-NEXT: testb $32, %cl +; X32-SSE2-NEXT: je .LBB8_1 +; X32-SSE2-NEXT: # %bb.2: +; X32-SSE2-NEXT: movl %edi, %edx +; X32-SSE2-NEXT: movl {{[0-9]+}}(%esp), %edi +; X32-SSE2-NEXT: jmp .LBB8_3 +; X32-SSE2-NEXT: .LBB8_1: +; X32-SSE2-NEXT: shll $27, %ebx ; X32-SSE2-NEXT: movl %esi, %edx +; X32-SSE2-NEXT: movl %ebx, %esi +; X32-SSE2-NEXT: .LBB8_3: +; X32-SSE2-NEXT: shrdl %cl, %edx, %esi +; X32-SSE2-NEXT: # kill: def $cl killed $cl killed $ecx +; X32-SSE2-NEXT: shrdl %cl, %edi, %edx +; X32-SSE2-NEXT: movl %esi, %eax ; X32-SSE2-NEXT: popl %esi ; X32-SSE2-NEXT: popl %edi ; X32-SSE2-NEXT: popl %ebx -; X32-SSE2-NEXT: popl %ebp ; X32-SSE2-NEXT: retl ; ; X64-AVX2-LABEL: fshr_i37: @@ -816,9 +795,9 @@ define i64 @fshr_i64_const_overshift(i64 %x, i64 %y) nounwind { ; X32-SSE2-LABEL: fshr_i64_const_overshift: ; X32-SSE2: # %bb.0: -; X32-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax ; X32-SSE2-NEXT: movl {{[0-9]+}}(%esp), %ecx ; X32-SSE2-NEXT: movl {{[0-9]+}}(%esp), %edx +; X32-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax ; X32-SSE2-NEXT: shrdl $9, %ecx, %eax ; X32-SSE2-NEXT: shldl $23, %ecx, %edx ; X32-SSE2-NEXT: retl