diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp @@ -3661,9 +3661,9 @@ // Compute the overflow. // - // LHSSign -> LHS >= 0 - // RHSSign -> RHS >= 0 - // SumSign -> Sum >= 0 + // LHSSign -> LHS < 0 + // RHSSign -> RHS < 0 + // SumSign -> Sum < 0 // // Add: // Overflow -> (LHSSign == RHSSign) && (LHSSign != SumSign) @@ -3673,13 +3673,13 @@ EVT OType = Node->getValueType(1); SDValue Zero = DAG.getConstant(0, dl, LHS.getValueType()); - SDValue LHSSign = DAG.getSetCC(dl, OType, LHS, Zero, ISD::SETGE); - SDValue RHSSign = DAG.getSetCC(dl, OType, RHS, Zero, ISD::SETGE); + SDValue LHSSign = DAG.getSetCC(dl, OType, LHS, Zero, ISD::SETLT); + SDValue RHSSign = DAG.getSetCC(dl, OType, RHS, Zero, ISD::SETLT); SDValue SignsMatch = DAG.getSetCC(dl, OType, LHSSign, RHSSign, Node->getOpcode() == ISD::SADDO ? ISD::SETEQ : ISD::SETNE); - SDValue SumSign = DAG.getSetCC(dl, OType, Sum, Zero, ISD::SETGE); + SDValue SumSign = DAG.getSetCC(dl, OType, Sum, Zero, ISD::SETLT); SDValue SumSignNE = DAG.getSetCC(dl, OType, LHSSign, SumSign, ISD::SETNE); Ovf = DAG.getNode(ISD::AND, dl, OType, SignsMatch, SumSignNE); diff --git a/llvm/test/CodeGen/AArch64/sadd_sat_vec.ll b/llvm/test/CodeGen/AArch64/sadd_sat_vec.ll --- a/llvm/test/CodeGen/AArch64/sadd_sat_vec.ll +++ b/llvm/test/CodeGen/AArch64/sadd_sat_vec.ll @@ -367,39 +367,33 @@ ; CHECK-LABEL: v2i128: ; CHECK: // %bb.0: ; CHECK-NEXT: cmp x7, #0 // =0 -; CHECK-NEXT: cset w9, ge -; CHECK-NEXT: csinc w9, w9, wzr, ne +; CHECK-NEXT: cset w9, lt ; CHECK-NEXT: cmp x3, #0 // =0 -; CHECK-NEXT: cset w10, ge -; CHECK-NEXT: csinc w10, w10, wzr, ne +; CHECK-NEXT: cset w10, lt ; CHECK-NEXT: cmp w10, w9 ; CHECK-NEXT: cset w9, eq ; CHECK-NEXT: adds x11, x2, x6 ; CHECK-NEXT: adcs x12, x3, x7 ; CHECK-NEXT: cmp x12, #0 // =0 -; CHECK-NEXT: cset w13, ge ; CHECK-NEXT: mov x8, #9223372036854775807 -; CHECK-NEXT: csinc w13, w13, wzr, ne +; CHECK-NEXT: cset w15, lt ; CHECK-NEXT: cinv x14, x8, ge -; CHECK-NEXT: cmp w10, w13 -; CHECK-NEXT: cset w13, ne -; CHECK-NEXT: asr x10, x12, #63 -; CHECK-NEXT: tst w9, w13 +; CHECK-NEXT: cmp w10, w15 +; CHECK-NEXT: cset w10, ne +; CHECK-NEXT: asr x13, x12, #63 +; CHECK-NEXT: tst w9, w10 +; CHECK-NEXT: csel x2, x13, x11, ne ; CHECK-NEXT: csel x3, x14, x12, ne -; CHECK-NEXT: csel x2, x10, x11, ne ; CHECK-NEXT: cmp x5, #0 // =0 -; CHECK-NEXT: cset w9, ge -; CHECK-NEXT: csinc w9, w9, wzr, ne +; CHECK-NEXT: cset w9, lt ; CHECK-NEXT: cmp x1, #0 // =0 -; CHECK-NEXT: cset w10, ge -; CHECK-NEXT: csinc w10, w10, wzr, ne +; CHECK-NEXT: cset w10, lt ; CHECK-NEXT: cmp w10, w9 ; CHECK-NEXT: cset w9, eq ; CHECK-NEXT: adds x11, x0, x4 ; CHECK-NEXT: adcs x12, x1, x5 ; CHECK-NEXT: cmp x12, #0 // =0 -; CHECK-NEXT: cset w13, ge -; CHECK-NEXT: csinc w13, w13, wzr, ne +; CHECK-NEXT: cset w13, lt ; CHECK-NEXT: cinv x8, x8, ge ; CHECK-NEXT: cmp w10, w13 ; CHECK-NEXT: cset w10, ne diff --git a/llvm/test/CodeGen/AArch64/ssub_sat_vec.ll b/llvm/test/CodeGen/AArch64/ssub_sat_vec.ll --- a/llvm/test/CodeGen/AArch64/ssub_sat_vec.ll +++ b/llvm/test/CodeGen/AArch64/ssub_sat_vec.ll @@ -370,39 +370,33 @@ ; CHECK-LABEL: v2i128: ; CHECK: // %bb.0: ; CHECK-NEXT: cmp x7, #0 // =0 -; CHECK-NEXT: cset w9, ge -; CHECK-NEXT: csinc w9, w9, wzr, ne +; CHECK-NEXT: cset w9, lt ; CHECK-NEXT: cmp x3, #0 // =0 -; CHECK-NEXT: cset w10, ge -; CHECK-NEXT: csinc w10, w10, wzr, ne +; CHECK-NEXT: cset w10, lt ; CHECK-NEXT: cmp w10, w9 ; CHECK-NEXT: cset w9, ne ; CHECK-NEXT: subs x11, x2, x6 ; CHECK-NEXT: sbcs x12, x3, x7 ; CHECK-NEXT: cmp x12, #0 // =0 -; CHECK-NEXT: cset w13, ge ; CHECK-NEXT: mov x8, #9223372036854775807 -; CHECK-NEXT: csinc w13, w13, wzr, ne +; CHECK-NEXT: cset w15, lt ; CHECK-NEXT: cinv x14, x8, ge -; CHECK-NEXT: cmp w10, w13 -; CHECK-NEXT: cset w13, ne -; CHECK-NEXT: asr x10, x12, #63 -; CHECK-NEXT: tst w9, w13 +; CHECK-NEXT: cmp w10, w15 +; CHECK-NEXT: cset w10, ne +; CHECK-NEXT: asr x13, x12, #63 +; CHECK-NEXT: tst w9, w10 +; CHECK-NEXT: csel x2, x13, x11, ne ; CHECK-NEXT: csel x3, x14, x12, ne -; CHECK-NEXT: csel x2, x10, x11, ne ; CHECK-NEXT: cmp x5, #0 // =0 -; CHECK-NEXT: cset w9, ge -; CHECK-NEXT: csinc w9, w9, wzr, ne +; CHECK-NEXT: cset w9, lt ; CHECK-NEXT: cmp x1, #0 // =0 -; CHECK-NEXT: cset w10, ge -; CHECK-NEXT: csinc w10, w10, wzr, ne +; CHECK-NEXT: cset w10, lt ; CHECK-NEXT: cmp w10, w9 ; CHECK-NEXT: cset w9, ne ; CHECK-NEXT: subs x11, x0, x4 ; CHECK-NEXT: sbcs x12, x1, x5 ; CHECK-NEXT: cmp x12, #0 // =0 -; CHECK-NEXT: cset w13, ge -; CHECK-NEXT: csinc w13, w13, wzr, ne +; CHECK-NEXT: cset w13, lt ; CHECK-NEXT: cinv x8, x8, ge ; CHECK-NEXT: cmp w10, w13 ; CHECK-NEXT: cset w10, ne diff --git a/llvm/test/CodeGen/ARM/sadd_sat.ll b/llvm/test/CodeGen/ARM/sadd_sat.ll --- a/llvm/test/CodeGen/ARM/sadd_sat.ll +++ b/llvm/test/CodeGen/ARM/sadd_sat.ll @@ -104,13 +104,13 @@ ; CHECK-T1-NEXT: movs r0, #0 ; CHECK-T1-NEXT: cmp r3, #0 ; CHECK-T1-NEXT: mov r5, r4 -; CHECK-T1-NEXT: bge .LBB1_2 +; CHECK-T1-NEXT: bmi .LBB1_2 ; CHECK-T1-NEXT: @ %bb.1: ; CHECK-T1-NEXT: mov r5, r0 ; CHECK-T1-NEXT: .LBB1_2: ; CHECK-T1-NEXT: cmp r1, #0 ; CHECK-T1-NEXT: mov r7, r4 -; CHECK-T1-NEXT: bge .LBB1_4 +; CHECK-T1-NEXT: bmi .LBB1_4 ; CHECK-T1-NEXT: @ %bb.3: ; CHECK-T1-NEXT: mov r7, r0 ; CHECK-T1-NEXT: .LBB1_4: @@ -120,9 +120,8 @@ ; CHECK-T1-NEXT: ldr r6, [sp] @ 4-byte Reload ; CHECK-T1-NEXT: adds r6, r2, r6 ; CHECK-T1-NEXT: adcs r1, r3 -; CHECK-T1-NEXT: cmp r1, #0 ; CHECK-T1-NEXT: mov r2, r4 -; CHECK-T1-NEXT: bge .LBB1_6 +; CHECK-T1-NEXT: bmi .LBB1_6 ; CHECK-T1-NEXT: @ %bb.5: ; CHECK-T1-NEXT: mov r2, r0 ; CHECK-T1-NEXT: .LBB1_6: @@ -161,23 +160,22 @@ ; CHECK-T2: @ %bb.0: ; CHECK-T2-NEXT: .save {r7, lr} ; CHECK-T2-NEXT: push {r7, lr} -; CHECK-T2-NEXT: cmp.w r1, #-1 +; CHECK-T2-NEXT: cmp r1, #0 ; CHECK-T2-NEXT: mov.w lr, #0 -; CHECK-T2-NEXT: it gt -; CHECK-T2-NEXT: movgt.w lr, #1 +; CHECK-T2-NEXT: it mi +; CHECK-T2-NEXT: movmi.w lr, #1 ; CHECK-T2-NEXT: adds r0, r0, r2 -; CHECK-T2-NEXT: adc.w r2, r1, r3 -; CHECK-T2-NEXT: movs r1, #0 -; CHECK-T2-NEXT: cmp.w r2, #-1 -; CHECK-T2-NEXT: it gt -; CHECK-T2-NEXT: movgt r1, #1 -; CHECK-T2-NEXT: subs.w r1, lr, r1 +; CHECK-T2-NEXT: adcs.w r2, r1, r3 +; CHECK-T2-NEXT: mov.w r1, #0 +; CHECK-T2-NEXT: it mi +; CHECK-T2-NEXT: movmi r1, #1 ; CHECK-T2-NEXT: mov.w r12, #0 +; CHECK-T2-NEXT: subs.w r1, lr, r1 ; CHECK-T2-NEXT: it ne ; CHECK-T2-NEXT: movne r1, #1 -; CHECK-T2-NEXT: cmp.w r3, #-1 -; CHECK-T2-NEXT: it gt -; CHECK-T2-NEXT: movgt.w r12, #1 +; CHECK-T2-NEXT: cmp r3, #0 +; CHECK-T2-NEXT: it mi +; CHECK-T2-NEXT: movmi.w r12, #1 ; CHECK-T2-NEXT: sub.w r3, lr, r12 ; CHECK-T2-NEXT: clz r3, r3 ; CHECK-T2-NEXT: lsrs r3, r3, #5 @@ -197,19 +195,18 @@ ; CHECK-ARM: @ %bb.0: ; CHECK-ARM-NEXT: .save {r11, lr} ; CHECK-ARM-NEXT: push {r11, lr} -; CHECK-ARM-NEXT: cmn r1, #1 +; CHECK-ARM-NEXT: cmp r1, #0 ; CHECK-ARM-NEXT: mov lr, #0 -; CHECK-ARM-NEXT: movgt lr, #1 +; CHECK-ARM-NEXT: movmi lr, #1 ; CHECK-ARM-NEXT: adds r0, r0, r2 -; CHECK-ARM-NEXT: adc r2, r1, r3 +; CHECK-ARM-NEXT: adcs r2, r1, r3 ; CHECK-ARM-NEXT: mov r1, #0 -; CHECK-ARM-NEXT: cmn r2, #1 +; CHECK-ARM-NEXT: movmi r1, #1 ; CHECK-ARM-NEXT: mov r12, #0 -; CHECK-ARM-NEXT: movgt r1, #1 ; CHECK-ARM-NEXT: subs r1, lr, r1 ; CHECK-ARM-NEXT: movne r1, #1 -; CHECK-ARM-NEXT: cmn r3, #1 -; CHECK-ARM-NEXT: movgt r12, #1 +; CHECK-ARM-NEXT: cmp r3, #0 +; CHECK-ARM-NEXT: movmi r12, #1 ; CHECK-ARM-NEXT: sub r3, lr, r12 ; CHECK-ARM-NEXT: clz r3, r3 ; CHECK-ARM-NEXT: lsr r3, r3, #5 diff --git a/llvm/test/CodeGen/ARM/sadd_sat_plus.ll b/llvm/test/CodeGen/ARM/sadd_sat_plus.ll --- a/llvm/test/CodeGen/ARM/sadd_sat_plus.ll +++ b/llvm/test/CodeGen/ARM/sadd_sat_plus.ll @@ -87,13 +87,13 @@ ; CHECK-T1-NEXT: movs r4, #0 ; CHECK-T1-NEXT: cmp r5, #0 ; CHECK-T1-NEXT: mov r3, r2 -; CHECK-T1-NEXT: bge .LBB1_2 +; CHECK-T1-NEXT: bmi .LBB1_2 ; CHECK-T1-NEXT: @ %bb.1: ; CHECK-T1-NEXT: mov r3, r4 ; CHECK-T1-NEXT: .LBB1_2: ; CHECK-T1-NEXT: cmp r1, #0 ; CHECK-T1-NEXT: mov r6, r2 -; CHECK-T1-NEXT: bge .LBB1_4 +; CHECK-T1-NEXT: bmi .LBB1_4 ; CHECK-T1-NEXT: @ %bb.3: ; CHECK-T1-NEXT: mov r6, r4 ; CHECK-T1-NEXT: .LBB1_4: @@ -103,9 +103,8 @@ ; CHECK-T1-NEXT: ldr r7, [sp, #24] ; CHECK-T1-NEXT: adds r0, r0, r7 ; CHECK-T1-NEXT: adcs r1, r5 -; CHECK-T1-NEXT: cmp r1, #0 ; CHECK-T1-NEXT: mov r5, r2 -; CHECK-T1-NEXT: bge .LBB1_6 +; CHECK-T1-NEXT: bmi .LBB1_6 ; CHECK-T1-NEXT: @ %bb.5: ; CHECK-T1-NEXT: mov r5, r4 ; CHECK-T1-NEXT: .LBB1_6: @@ -144,23 +143,22 @@ ; CHECK-T2-NEXT: .save {r7, lr} ; CHECK-T2-NEXT: push {r7, lr} ; CHECK-T2-NEXT: ldrd r2, r12, [sp, #8] -; CHECK-T2-NEXT: cmp.w r1, #-1 +; CHECK-T2-NEXT: cmp r1, #0 ; CHECK-T2-NEXT: mov.w r3, #0 ; CHECK-T2-NEXT: mov.w lr, #0 -; CHECK-T2-NEXT: it gt -; CHECK-T2-NEXT: movgt r3, #1 +; CHECK-T2-NEXT: it mi +; CHECK-T2-NEXT: movmi r3, #1 ; CHECK-T2-NEXT: adds r0, r0, r2 -; CHECK-T2-NEXT: adc.w r2, r1, r12 -; CHECK-T2-NEXT: movs r1, #0 -; CHECK-T2-NEXT: cmp.w r2, #-1 -; CHECK-T2-NEXT: it gt -; CHECK-T2-NEXT: movgt r1, #1 +; CHECK-T2-NEXT: adcs.w r2, r1, r12 +; CHECK-T2-NEXT: mov.w r1, #0 +; CHECK-T2-NEXT: it mi +; CHECK-T2-NEXT: movmi r1, #1 ; CHECK-T2-NEXT: subs r1, r3, r1 ; CHECK-T2-NEXT: it ne ; CHECK-T2-NEXT: movne r1, #1 -; CHECK-T2-NEXT: cmp.w r12, #-1 -; CHECK-T2-NEXT: it gt -; CHECK-T2-NEXT: movgt.w lr, #1 +; CHECK-T2-NEXT: cmp.w r12, #0 +; CHECK-T2-NEXT: it mi +; CHECK-T2-NEXT: movmi.w lr, #1 ; CHECK-T2-NEXT: sub.w r3, r3, lr ; CHECK-T2-NEXT: clz r3, r3 ; CHECK-T2-NEXT: lsrs r3, r3, #5 @@ -186,15 +184,15 @@ ; CHECK-ARM-NEXT: adds r0, r0, r2 ; CHECK-ARM-NEXT: mov r2, #0 ; CHECK-ARM-NEXT: adc lr, r1, r12 -; CHECK-ARM-NEXT: cmn r1, #1 +; CHECK-ARM-NEXT: cmp r1, #0 ; CHECK-ARM-NEXT: mov r1, #0 -; CHECK-ARM-NEXT: movwgt r1, #1 -; CHECK-ARM-NEXT: cmn lr, #1 -; CHECK-ARM-NEXT: movwgt r2, #1 +; CHECK-ARM-NEXT: movwmi r1, #1 +; CHECK-ARM-NEXT: cmp lr, #0 +; CHECK-ARM-NEXT: movwmi r2, #1 ; CHECK-ARM-NEXT: subs r2, r1, r2 ; CHECK-ARM-NEXT: movwne r2, #1 -; CHECK-ARM-NEXT: cmn r12, #1 -; CHECK-ARM-NEXT: movwgt r3, #1 +; CHECK-ARM-NEXT: cmp r12, #0 +; CHECK-ARM-NEXT: movwmi r3, #1 ; CHECK-ARM-NEXT: sub r1, r1, r3 ; CHECK-ARM-NEXT: clz r1, r1 ; CHECK-ARM-NEXT: lsr r1, r1, #5 diff --git a/llvm/test/CodeGen/ARM/ssub_sat.ll b/llvm/test/CodeGen/ARM/ssub_sat.ll --- a/llvm/test/CodeGen/ARM/ssub_sat.ll +++ b/llvm/test/CodeGen/ARM/ssub_sat.ll @@ -106,13 +106,13 @@ ; CHECK-T1-NEXT: movs r0, #0 ; CHECK-T1-NEXT: cmp r3, #0 ; CHECK-T1-NEXT: mov r5, r4 -; CHECK-T1-NEXT: bge .LBB1_2 +; CHECK-T1-NEXT: bmi .LBB1_2 ; CHECK-T1-NEXT: @ %bb.1: ; CHECK-T1-NEXT: mov r5, r0 ; CHECK-T1-NEXT: .LBB1_2: ; CHECK-T1-NEXT: cmp r1, #0 ; CHECK-T1-NEXT: mov r7, r4 -; CHECK-T1-NEXT: bge .LBB1_4 +; CHECK-T1-NEXT: bmi .LBB1_4 ; CHECK-T1-NEXT: @ %bb.3: ; CHECK-T1-NEXT: mov r7, r0 ; CHECK-T1-NEXT: .LBB1_4: @@ -122,9 +122,8 @@ ; CHECK-T1-NEXT: ldr r6, [sp] @ 4-byte Reload ; CHECK-T1-NEXT: subs r6, r2, r6 ; CHECK-T1-NEXT: sbcs r1, r3 -; CHECK-T1-NEXT: cmp r1, #0 ; CHECK-T1-NEXT: mov r2, r4 -; CHECK-T1-NEXT: bge .LBB1_6 +; CHECK-T1-NEXT: bmi .LBB1_6 ; CHECK-T1-NEXT: @ %bb.5: ; CHECK-T1-NEXT: mov r2, r0 ; CHECK-T1-NEXT: .LBB1_6: @@ -163,23 +162,22 @@ ; CHECK-T2: @ %bb.0: ; CHECK-T2-NEXT: .save {r4, lr} ; CHECK-T2-NEXT: push {r4, lr} -; CHECK-T2-NEXT: cmp.w r3, #-1 +; CHECK-T2-NEXT: cmp r3, #0 ; CHECK-T2-NEXT: mov.w lr, #0 -; CHECK-T2-NEXT: it gt -; CHECK-T2-NEXT: movgt.w lr, #1 -; CHECK-T2-NEXT: cmp.w r1, #-1 +; CHECK-T2-NEXT: it mi +; CHECK-T2-NEXT: movmi.w lr, #1 +; CHECK-T2-NEXT: cmp r1, #0 ; CHECK-T2-NEXT: mov.w r4, #0 ; CHECK-T2-NEXT: mov.w r12, #0 -; CHECK-T2-NEXT: it gt -; CHECK-T2-NEXT: movgt r4, #1 +; CHECK-T2-NEXT: it mi +; CHECK-T2-NEXT: movmi r4, #1 ; CHECK-T2-NEXT: subs.w lr, r4, lr ; CHECK-T2-NEXT: it ne ; CHECK-T2-NEXT: movne.w lr, #1 ; CHECK-T2-NEXT: subs r0, r0, r2 -; CHECK-T2-NEXT: sbc.w r2, r1, r3 -; CHECK-T2-NEXT: cmp.w r2, #-1 -; CHECK-T2-NEXT: it gt -; CHECK-T2-NEXT: movgt.w r12, #1 +; CHECK-T2-NEXT: sbcs.w r2, r1, r3 +; CHECK-T2-NEXT: it mi +; CHECK-T2-NEXT: movmi.w r12, #1 ; CHECK-T2-NEXT: subs.w r1, r4, r12 ; CHECK-T2-NEXT: it ne ; CHECK-T2-NEXT: movne r1, #1 @@ -199,19 +197,18 @@ ; CHECK-ARM: @ %bb.0: ; CHECK-ARM-NEXT: .save {r4, lr} ; CHECK-ARM-NEXT: push {r4, lr} -; CHECK-ARM-NEXT: cmn r3, #1 +; CHECK-ARM-NEXT: cmp r3, #0 ; CHECK-ARM-NEXT: mov lr, #0 -; CHECK-ARM-NEXT: movgt lr, #1 -; CHECK-ARM-NEXT: cmn r1, #1 +; CHECK-ARM-NEXT: movmi lr, #1 +; CHECK-ARM-NEXT: cmp r1, #0 ; CHECK-ARM-NEXT: mov r4, #0 ; CHECK-ARM-NEXT: mov r12, #0 -; CHECK-ARM-NEXT: movgt r4, #1 +; CHECK-ARM-NEXT: movmi r4, #1 ; CHECK-ARM-NEXT: subs lr, r4, lr ; CHECK-ARM-NEXT: movne lr, #1 ; CHECK-ARM-NEXT: subs r0, r0, r2 -; CHECK-ARM-NEXT: sbc r2, r1, r3 -; CHECK-ARM-NEXT: cmn r2, #1 -; CHECK-ARM-NEXT: movgt r12, #1 +; CHECK-ARM-NEXT: sbcs r2, r1, r3 +; CHECK-ARM-NEXT: movmi r12, #1 ; CHECK-ARM-NEXT: subs r1, r4, r12 ; CHECK-ARM-NEXT: movne r1, #1 ; CHECK-ARM-NEXT: ands r3, lr, r1 diff --git a/llvm/test/CodeGen/ARM/ssub_sat_plus.ll b/llvm/test/CodeGen/ARM/ssub_sat_plus.ll --- a/llvm/test/CodeGen/ARM/ssub_sat_plus.ll +++ b/llvm/test/CodeGen/ARM/ssub_sat_plus.ll @@ -92,13 +92,13 @@ ; CHECK-T1-NEXT: movs r4, #0 ; CHECK-T1-NEXT: cmp r5, #0 ; CHECK-T1-NEXT: mov r3, r2 -; CHECK-T1-NEXT: bge .LBB1_2 +; CHECK-T1-NEXT: bmi .LBB1_2 ; CHECK-T1-NEXT: @ %bb.1: ; CHECK-T1-NEXT: mov r3, r4 ; CHECK-T1-NEXT: .LBB1_2: ; CHECK-T1-NEXT: cmp r1, #0 ; CHECK-T1-NEXT: mov r6, r2 -; CHECK-T1-NEXT: bge .LBB1_4 +; CHECK-T1-NEXT: bmi .LBB1_4 ; CHECK-T1-NEXT: @ %bb.3: ; CHECK-T1-NEXT: mov r6, r4 ; CHECK-T1-NEXT: .LBB1_4: @@ -108,9 +108,8 @@ ; CHECK-T1-NEXT: ldr r7, [sp, #24] ; CHECK-T1-NEXT: subs r0, r0, r7 ; CHECK-T1-NEXT: sbcs r1, r5 -; CHECK-T1-NEXT: cmp r1, #0 ; CHECK-T1-NEXT: mov r5, r2 -; CHECK-T1-NEXT: bge .LBB1_6 +; CHECK-T1-NEXT: bmi .LBB1_6 ; CHECK-T1-NEXT: @ %bb.5: ; CHECK-T1-NEXT: mov r5, r4 ; CHECK-T1-NEXT: .LBB1_6: @@ -152,21 +151,20 @@ ; CHECK-T2-NEXT: movs r2, #0 ; CHECK-T2-NEXT: movs r3, #0 ; CHECK-T2-NEXT: ldr r4, [sp, #8] -; CHECK-T2-NEXT: cmp.w r12, #-1 -; CHECK-T2-NEXT: it gt -; CHECK-T2-NEXT: movgt r2, #1 -; CHECK-T2-NEXT: cmp.w r1, #-1 -; CHECK-T2-NEXT: it gt -; CHECK-T2-NEXT: movgt r3, #1 +; CHECK-T2-NEXT: cmp.w r12, #0 +; CHECK-T2-NEXT: it mi +; CHECK-T2-NEXT: movmi r2, #1 +; CHECK-T2-NEXT: cmp r1, #0 +; CHECK-T2-NEXT: it mi +; CHECK-T2-NEXT: movmi r3, #1 ; CHECK-T2-NEXT: subs r2, r3, r2 ; CHECK-T2-NEXT: mov.w lr, #0 ; CHECK-T2-NEXT: it ne ; CHECK-T2-NEXT: movne r2, #1 ; CHECK-T2-NEXT: subs r0, r0, r4 -; CHECK-T2-NEXT: sbc.w r4, r1, r12 -; CHECK-T2-NEXT: cmp.w r4, #-1 -; CHECK-T2-NEXT: it gt -; CHECK-T2-NEXT: movgt.w lr, #1 +; CHECK-T2-NEXT: sbcs.w r4, r1, r12 +; CHECK-T2-NEXT: it mi +; CHECK-T2-NEXT: movmi.w lr, #1 ; CHECK-T2-NEXT: subs.w r1, r3, lr ; CHECK-T2-NEXT: it ne ; CHECK-T2-NEXT: movne r1, #1 @@ -184,33 +182,32 @@ ; ; CHECK-ARM-LABEL: func64: ; CHECK-ARM: @ %bb.0: -; CHECK-ARM-NEXT: .save {r4, r5, r11, lr} -; CHECK-ARM-NEXT: push {r4, r5, r11, lr} -; CHECK-ARM-NEXT: ldr lr, [sp, #20] -; CHECK-ARM-NEXT: cmn r1, #1 +; CHECK-ARM-NEXT: .save {r4, lr} +; CHECK-ARM-NEXT: push {r4, lr} +; CHECK-ARM-NEXT: ldr lr, [sp, #12] +; CHECK-ARM-NEXT: cmp r1, #0 ; CHECK-ARM-NEXT: mov r3, #0 ; CHECK-ARM-NEXT: mov r4, #0 -; CHECK-ARM-NEXT: movwgt r3, #1 -; CHECK-ARM-NEXT: cmn lr, #1 -; CHECK-ARM-NEXT: movwgt r4, #1 -; CHECK-ARM-NEXT: ldr r12, [sp, #16] +; CHECK-ARM-NEXT: movwmi r3, #1 +; CHECK-ARM-NEXT: cmp lr, #0 +; CHECK-ARM-NEXT: movwmi r4, #1 +; CHECK-ARM-NEXT: ldr r12, [sp, #8] ; CHECK-ARM-NEXT: subs r4, r3, r4 -; CHECK-ARM-NEXT: mov r5, #0 +; CHECK-ARM-NEXT: mov r2, #0 ; CHECK-ARM-NEXT: movwne r4, #1 ; CHECK-ARM-NEXT: subs r0, r0, r12 -; CHECK-ARM-NEXT: sbc r2, r1, lr -; CHECK-ARM-NEXT: cmn r2, #1 -; CHECK-ARM-NEXT: movwgt r5, #1 -; CHECK-ARM-NEXT: subs r1, r3, r5 +; CHECK-ARM-NEXT: sbcs r12, r1, lr +; CHECK-ARM-NEXT: movwmi r2, #1 +; CHECK-ARM-NEXT: subs r1, r3, r2 ; CHECK-ARM-NEXT: movwne r1, #1 -; CHECK-ARM-NEXT: ands r3, r4, r1 -; CHECK-ARM-NEXT: asrne r0, r2, #31 +; CHECK-ARM-NEXT: ands r2, r4, r1 +; CHECK-ARM-NEXT: asrne r0, r12, #31 ; CHECK-ARM-NEXT: mov r1, #-2147483648 -; CHECK-ARM-NEXT: cmp r2, #0 +; CHECK-ARM-NEXT: cmp r12, #0 ; CHECK-ARM-NEXT: mvnmi r1, #-2147483648 -; CHECK-ARM-NEXT: cmp r3, #0 -; CHECK-ARM-NEXT: moveq r1, r2 -; CHECK-ARM-NEXT: pop {r4, r5, r11, pc} +; CHECK-ARM-NEXT: cmp r2, #0 +; CHECK-ARM-NEXT: moveq r1, r12 +; CHECK-ARM-NEXT: pop {r4, pc} %a = mul i64 %y, %z %tmp = call i64 @llvm.ssub.sat.i64(i64 %x, i64 %z) ret i64 %tmp diff --git a/llvm/test/CodeGen/RISCV/sadd_sat.ll b/llvm/test/CodeGen/RISCV/sadd_sat.ll --- a/llvm/test/CodeGen/RISCV/sadd_sat.ll +++ b/llvm/test/CodeGen/RISCV/sadd_sat.ll @@ -79,49 +79,31 @@ define i64 @func2(i64 %x, i64 %y) nounwind { ; RV32I-LABEL: func2: ; RV32I: # %bb.0: -; RV32I-NEXT: mv a4, a0 -; RV32I-NEXT: add a5, a1, a3 +; RV32I-NEXT: mv a4, a1 +; RV32I-NEXT: mv a1, a0 +; RV32I-NEXT: add a5, a4, a3 ; RV32I-NEXT: add a0, a0, a2 -; RV32I-NEXT: sltu a2, a0, a4 -; RV32I-NEXT: add a2, a5, a2 -; RV32I-NEXT: addi a6, zero, -1 -; RV32I-NEXT: addi a7, zero, 1 -; RV32I-NEXT: addi a4, zero, 1 -; RV32I-NEXT: beqz a2, .LBB1_2 +; RV32I-NEXT: sltu a1, a0, a1 +; RV32I-NEXT: add a2, a5, a1 +; RV32I-NEXT: lui a1, 524288 +; RV32I-NEXT: bgez a2, .LBB1_2 ; RV32I-NEXT: # %bb.1: -; RV32I-NEXT: slt a4, a6, a2 +; RV32I-NEXT: addi a1, a1, -1 ; RV32I-NEXT: .LBB1_2: -; RV32I-NEXT: addi a5, zero, 1 -; RV32I-NEXT: beqz a1, .LBB1_4 -; RV32I-NEXT: # %bb.3: -; RV32I-NEXT: slt a5, a6, a1 -; RV32I-NEXT: .LBB1_4: -; RV32I-NEXT: xor a1, a5, a4 -; RV32I-NEXT: snez a1, a1 -; RV32I-NEXT: beqz a3, .LBB1_6 -; RV32I-NEXT: # %bb.5: -; RV32I-NEXT: slt a7, a6, a3 -; RV32I-NEXT: .LBB1_6: -; RV32I-NEXT: xor a3, a5, a7 +; RV32I-NEXT: slti a5, a2, 0 +; RV32I-NEXT: slti a4, a4, 0 +; RV32I-NEXT: xor a5, a4, a5 +; RV32I-NEXT: snez a5, a5 +; RV32I-NEXT: slti a3, a3, 0 +; RV32I-NEXT: xor a3, a4, a3 ; RV32I-NEXT: seqz a3, a3 -; RV32I-NEXT: and a3, a3, a1 -; RV32I-NEXT: bnez a3, .LBB1_10 -; RV32I-NEXT: # %bb.7: -; RV32I-NEXT: lui a1, 524288 -; RV32I-NEXT: bltz a2, .LBB1_11 -; RV32I-NEXT: .LBB1_8: -; RV32I-NEXT: beqz a3, .LBB1_12 -; RV32I-NEXT: .LBB1_9: +; RV32I-NEXT: and a3, a3, a5 +; RV32I-NEXT: bnez a3, .LBB1_4 +; RV32I-NEXT: # %bb.3: +; RV32I-NEXT: mv a1, a2 ; RV32I-NEXT: ret -; RV32I-NEXT: .LBB1_10: +; RV32I-NEXT: .LBB1_4: ; RV32I-NEXT: srai a0, a2, 31 -; RV32I-NEXT: lui a1, 524288 -; RV32I-NEXT: bgez a2, .LBB1_8 -; RV32I-NEXT: .LBB1_11: -; RV32I-NEXT: addi a1, a1, -1 -; RV32I-NEXT: bnez a3, .LBB1_9 -; RV32I-NEXT: .LBB1_12: -; RV32I-NEXT: mv a1, a2 ; RV32I-NEXT: ret ; ; RV64I-LABEL: func2: @@ -145,49 +127,31 @@ ; ; RV32IZbb-LABEL: func2: ; RV32IZbb: # %bb.0: -; RV32IZbb-NEXT: mv a4, a0 -; RV32IZbb-NEXT: add a5, a1, a3 +; RV32IZbb-NEXT: mv a4, a1 +; RV32IZbb-NEXT: mv a1, a0 +; RV32IZbb-NEXT: add a5, a4, a3 ; RV32IZbb-NEXT: add a0, a0, a2 -; RV32IZbb-NEXT: sltu a2, a0, a4 -; RV32IZbb-NEXT: add a2, a5, a2 -; RV32IZbb-NEXT: addi a6, zero, -1 -; RV32IZbb-NEXT: addi a7, zero, 1 -; RV32IZbb-NEXT: addi a4, zero, 1 -; RV32IZbb-NEXT: beqz a2, .LBB1_2 +; RV32IZbb-NEXT: sltu a1, a0, a1 +; RV32IZbb-NEXT: add a2, a5, a1 +; RV32IZbb-NEXT: lui a1, 524288 +; RV32IZbb-NEXT: bgez a2, .LBB1_2 ; RV32IZbb-NEXT: # %bb.1: -; RV32IZbb-NEXT: slt a4, a6, a2 +; RV32IZbb-NEXT: addi a1, a1, -1 ; RV32IZbb-NEXT: .LBB1_2: -; RV32IZbb-NEXT: addi a5, zero, 1 -; RV32IZbb-NEXT: beqz a1, .LBB1_4 -; RV32IZbb-NEXT: # %bb.3: -; RV32IZbb-NEXT: slt a5, a6, a1 -; RV32IZbb-NEXT: .LBB1_4: -; RV32IZbb-NEXT: xor a1, a5, a4 -; RV32IZbb-NEXT: snez a1, a1 -; RV32IZbb-NEXT: beqz a3, .LBB1_6 -; RV32IZbb-NEXT: # %bb.5: -; RV32IZbb-NEXT: slt a7, a6, a3 -; RV32IZbb-NEXT: .LBB1_6: -; RV32IZbb-NEXT: xor a3, a5, a7 +; RV32IZbb-NEXT: slti a5, a2, 0 +; RV32IZbb-NEXT: slti a4, a4, 0 +; RV32IZbb-NEXT: xor a5, a4, a5 +; RV32IZbb-NEXT: snez a5, a5 +; RV32IZbb-NEXT: slti a3, a3, 0 +; RV32IZbb-NEXT: xor a3, a4, a3 ; RV32IZbb-NEXT: seqz a3, a3 -; RV32IZbb-NEXT: and a3, a3, a1 -; RV32IZbb-NEXT: bnez a3, .LBB1_10 -; RV32IZbb-NEXT: # %bb.7: -; RV32IZbb-NEXT: lui a1, 524288 -; RV32IZbb-NEXT: bltz a2, .LBB1_11 -; RV32IZbb-NEXT: .LBB1_8: -; RV32IZbb-NEXT: beqz a3, .LBB1_12 -; RV32IZbb-NEXT: .LBB1_9: +; RV32IZbb-NEXT: and a3, a3, a5 +; RV32IZbb-NEXT: bnez a3, .LBB1_4 +; RV32IZbb-NEXT: # %bb.3: +; RV32IZbb-NEXT: mv a1, a2 ; RV32IZbb-NEXT: ret -; RV32IZbb-NEXT: .LBB1_10: +; RV32IZbb-NEXT: .LBB1_4: ; RV32IZbb-NEXT: srai a0, a2, 31 -; RV32IZbb-NEXT: lui a1, 524288 -; RV32IZbb-NEXT: bgez a2, .LBB1_8 -; RV32IZbb-NEXT: .LBB1_11: -; RV32IZbb-NEXT: addi a1, a1, -1 -; RV32IZbb-NEXT: bnez a3, .LBB1_9 -; RV32IZbb-NEXT: .LBB1_12: -; RV32IZbb-NEXT: mv a1, a2 ; RV32IZbb-NEXT: ret ; ; RV64IZbb-LABEL: func2: diff --git a/llvm/test/CodeGen/RISCV/sadd_sat_plus.ll b/llvm/test/CodeGen/RISCV/sadd_sat_plus.ll --- a/llvm/test/CodeGen/RISCV/sadd_sat_plus.ll +++ b/llvm/test/CodeGen/RISCV/sadd_sat_plus.ll @@ -88,49 +88,31 @@ define i64 @func64(i64 %x, i64 %y, i64 %z) nounwind { ; RV32I-LABEL: func64: ; RV32I: # %bb.0: -; RV32I-NEXT: mv a2, a0 -; RV32I-NEXT: add a3, a1, a5 +; RV32I-NEXT: mv a2, a1 +; RV32I-NEXT: mv a1, a0 +; RV32I-NEXT: add a3, a2, a5 ; RV32I-NEXT: add a0, a0, a4 -; RV32I-NEXT: sltu a2, a0, a2 -; RV32I-NEXT: add a2, a3, a2 -; RV32I-NEXT: addi a6, zero, -1 -; RV32I-NEXT: addi a7, zero, 1 -; RV32I-NEXT: addi a3, zero, 1 -; RV32I-NEXT: beqz a2, .LBB1_2 +; RV32I-NEXT: sltu a1, a0, a1 +; RV32I-NEXT: add a3, a3, a1 +; RV32I-NEXT: lui a1, 524288 +; RV32I-NEXT: bgez a3, .LBB1_2 ; RV32I-NEXT: # %bb.1: -; RV32I-NEXT: slt a3, a6, a2 +; RV32I-NEXT: addi a1, a1, -1 ; RV32I-NEXT: .LBB1_2: -; RV32I-NEXT: addi a4, zero, 1 -; RV32I-NEXT: beqz a1, .LBB1_4 +; RV32I-NEXT: slti a4, a3, 0 +; RV32I-NEXT: slti a2, a2, 0 +; RV32I-NEXT: xor a4, a2, a4 +; RV32I-NEXT: snez a4, a4 +; RV32I-NEXT: slti a5, a5, 0 +; RV32I-NEXT: xor a2, a2, a5 +; RV32I-NEXT: seqz a2, a2 +; RV32I-NEXT: and a2, a2, a4 +; RV32I-NEXT: bnez a2, .LBB1_4 ; RV32I-NEXT: # %bb.3: -; RV32I-NEXT: slt a4, a6, a1 -; RV32I-NEXT: .LBB1_4: -; RV32I-NEXT: xor a1, a4, a3 -; RV32I-NEXT: snez a1, a1 -; RV32I-NEXT: beqz a5, .LBB1_6 -; RV32I-NEXT: # %bb.5: -; RV32I-NEXT: slt a7, a6, a5 -; RV32I-NEXT: .LBB1_6: -; RV32I-NEXT: xor a3, a4, a7 -; RV32I-NEXT: seqz a3, a3 -; RV32I-NEXT: and a3, a3, a1 -; RV32I-NEXT: bnez a3, .LBB1_10 -; RV32I-NEXT: # %bb.7: -; RV32I-NEXT: lui a1, 524288 -; RV32I-NEXT: bltz a2, .LBB1_11 -; RV32I-NEXT: .LBB1_8: -; RV32I-NEXT: beqz a3, .LBB1_12 -; RV32I-NEXT: .LBB1_9: +; RV32I-NEXT: mv a1, a3 ; RV32I-NEXT: ret -; RV32I-NEXT: .LBB1_10: -; RV32I-NEXT: srai a0, a2, 31 -; RV32I-NEXT: lui a1, 524288 -; RV32I-NEXT: bgez a2, .LBB1_8 -; RV32I-NEXT: .LBB1_11: -; RV32I-NEXT: addi a1, a1, -1 -; RV32I-NEXT: bnez a3, .LBB1_9 -; RV32I-NEXT: .LBB1_12: -; RV32I-NEXT: mv a1, a2 +; RV32I-NEXT: .LBB1_4: +; RV32I-NEXT: srai a0, a3, 31 ; RV32I-NEXT: ret ; ; RV64I-LABEL: func64: @@ -154,49 +136,31 @@ ; ; RV32IZbb-LABEL: func64: ; RV32IZbb: # %bb.0: -; RV32IZbb-NEXT: mv a2, a0 -; RV32IZbb-NEXT: add a3, a1, a5 +; RV32IZbb-NEXT: mv a2, a1 +; RV32IZbb-NEXT: mv a1, a0 +; RV32IZbb-NEXT: add a3, a2, a5 ; RV32IZbb-NEXT: add a0, a0, a4 -; RV32IZbb-NEXT: sltu a2, a0, a2 -; RV32IZbb-NEXT: add a2, a3, a2 -; RV32IZbb-NEXT: addi a6, zero, -1 -; RV32IZbb-NEXT: addi a7, zero, 1 -; RV32IZbb-NEXT: addi a3, zero, 1 -; RV32IZbb-NEXT: beqz a2, .LBB1_2 +; RV32IZbb-NEXT: sltu a1, a0, a1 +; RV32IZbb-NEXT: add a3, a3, a1 +; RV32IZbb-NEXT: lui a1, 524288 +; RV32IZbb-NEXT: bgez a3, .LBB1_2 ; RV32IZbb-NEXT: # %bb.1: -; RV32IZbb-NEXT: slt a3, a6, a2 +; RV32IZbb-NEXT: addi a1, a1, -1 ; RV32IZbb-NEXT: .LBB1_2: -; RV32IZbb-NEXT: addi a4, zero, 1 -; RV32IZbb-NEXT: beqz a1, .LBB1_4 +; RV32IZbb-NEXT: slti a4, a3, 0 +; RV32IZbb-NEXT: slti a2, a2, 0 +; RV32IZbb-NEXT: xor a4, a2, a4 +; RV32IZbb-NEXT: snez a4, a4 +; RV32IZbb-NEXT: slti a5, a5, 0 +; RV32IZbb-NEXT: xor a2, a2, a5 +; RV32IZbb-NEXT: seqz a2, a2 +; RV32IZbb-NEXT: and a2, a2, a4 +; RV32IZbb-NEXT: bnez a2, .LBB1_4 ; RV32IZbb-NEXT: # %bb.3: -; RV32IZbb-NEXT: slt a4, a6, a1 -; RV32IZbb-NEXT: .LBB1_4: -; RV32IZbb-NEXT: xor a1, a4, a3 -; RV32IZbb-NEXT: snez a1, a1 -; RV32IZbb-NEXT: beqz a5, .LBB1_6 -; RV32IZbb-NEXT: # %bb.5: -; RV32IZbb-NEXT: slt a7, a6, a5 -; RV32IZbb-NEXT: .LBB1_6: -; RV32IZbb-NEXT: xor a3, a4, a7 -; RV32IZbb-NEXT: seqz a3, a3 -; RV32IZbb-NEXT: and a3, a3, a1 -; RV32IZbb-NEXT: bnez a3, .LBB1_10 -; RV32IZbb-NEXT: # %bb.7: -; RV32IZbb-NEXT: lui a1, 524288 -; RV32IZbb-NEXT: bltz a2, .LBB1_11 -; RV32IZbb-NEXT: .LBB1_8: -; RV32IZbb-NEXT: beqz a3, .LBB1_12 -; RV32IZbb-NEXT: .LBB1_9: +; RV32IZbb-NEXT: mv a1, a3 ; RV32IZbb-NEXT: ret -; RV32IZbb-NEXT: .LBB1_10: -; RV32IZbb-NEXT: srai a0, a2, 31 -; RV32IZbb-NEXT: lui a1, 524288 -; RV32IZbb-NEXT: bgez a2, .LBB1_8 -; RV32IZbb-NEXT: .LBB1_11: -; RV32IZbb-NEXT: addi a1, a1, -1 -; RV32IZbb-NEXT: bnez a3, .LBB1_9 -; RV32IZbb-NEXT: .LBB1_12: -; RV32IZbb-NEXT: mv a1, a2 +; RV32IZbb-NEXT: .LBB1_4: +; RV32IZbb-NEXT: srai a0, a3, 31 ; RV32IZbb-NEXT: ret ; ; RV64IZbb-LABEL: func64: diff --git a/llvm/test/CodeGen/RISCV/ssub_sat.ll b/llvm/test/CodeGen/RISCV/ssub_sat.ll --- a/llvm/test/CodeGen/RISCV/ssub_sat.ll +++ b/llvm/test/CodeGen/RISCV/ssub_sat.ll @@ -81,49 +81,31 @@ define i64 @func2(i64 %x, i64 %y) nounwind { ; RV32I-LABEL: func2: ; RV32I: # %bb.0: -; RV32I-NEXT: sltu a4, a0, a2 -; RV32I-NEXT: sub a5, a1, a3 -; RV32I-NEXT: sub t0, a5, a4 -; RV32I-NEXT: addi a6, zero, -1 -; RV32I-NEXT: addi a7, zero, 1 -; RV32I-NEXT: addi a4, zero, 1 -; RV32I-NEXT: beqz t0, .LBB1_2 +; RV32I-NEXT: mv a6, a1 +; RV32I-NEXT: sltu a1, a0, a2 +; RV32I-NEXT: sub a5, a6, a3 +; RV32I-NEXT: sub t0, a5, a1 +; RV32I-NEXT: lui a1, 524288 +; RV32I-NEXT: bgez t0, .LBB1_2 ; RV32I-NEXT: # %bb.1: -; RV32I-NEXT: slt a4, a6, t0 +; RV32I-NEXT: addi a1, a1, -1 ; RV32I-NEXT: .LBB1_2: -; RV32I-NEXT: addi a5, zero, 1 -; RV32I-NEXT: beqz a1, .LBB1_4 -; RV32I-NEXT: # %bb.3: -; RV32I-NEXT: slt a5, a6, a1 -; RV32I-NEXT: .LBB1_4: -; RV32I-NEXT: xor a1, a5, a4 -; RV32I-NEXT: snez a1, a1 -; RV32I-NEXT: beqz a3, .LBB1_6 -; RV32I-NEXT: # %bb.5: -; RV32I-NEXT: slt a7, a6, a3 -; RV32I-NEXT: .LBB1_6: -; RV32I-NEXT: xor a3, a5, a7 +; RV32I-NEXT: slti a7, t0, 0 +; RV32I-NEXT: slti a4, a6, 0 +; RV32I-NEXT: xor a5, a4, a7 +; RV32I-NEXT: snez a5, a5 +; RV32I-NEXT: slti a3, a3, 0 +; RV32I-NEXT: xor a3, a4, a3 ; RV32I-NEXT: snez a3, a3 -; RV32I-NEXT: and a3, a3, a1 -; RV32I-NEXT: bnez a3, .LBB1_8 -; RV32I-NEXT: # %bb.7: +; RV32I-NEXT: and a3, a3, a5 +; RV32I-NEXT: bnez a3, .LBB1_4 +; RV32I-NEXT: # %bb.3: ; RV32I-NEXT: sub a0, a0, a2 -; RV32I-NEXT: lui a1, 524288 -; RV32I-NEXT: bltz t0, .LBB1_9 -; RV32I-NEXT: j .LBB1_10 -; RV32I-NEXT: .LBB1_8: -; RV32I-NEXT: srai a0, t0, 31 -; RV32I-NEXT: lui a1, 524288 -; RV32I-NEXT: bgez t0, .LBB1_10 -; RV32I-NEXT: .LBB1_9: -; RV32I-NEXT: addi a1, a1, -1 -; RV32I-NEXT: .LBB1_10: -; RV32I-NEXT: beqz a3, .LBB1_12 -; RV32I-NEXT: # %bb.11: -; RV32I-NEXT: ret -; RV32I-NEXT: .LBB1_12: ; RV32I-NEXT: mv a1, t0 ; RV32I-NEXT: ret +; RV32I-NEXT: .LBB1_4: +; RV32I-NEXT: srai a0, t0, 31 +; RV32I-NEXT: ret ; ; RV64I-LABEL: func2: ; RV64I: # %bb.0: @@ -147,49 +129,31 @@ ; ; RV32IZbb-LABEL: func2: ; RV32IZbb: # %bb.0: -; RV32IZbb-NEXT: sltu a4, a0, a2 -; RV32IZbb-NEXT: sub a5, a1, a3 -; RV32IZbb-NEXT: sub t0, a5, a4 -; RV32IZbb-NEXT: addi a6, zero, -1 -; RV32IZbb-NEXT: addi a7, zero, 1 -; RV32IZbb-NEXT: addi a4, zero, 1 -; RV32IZbb-NEXT: beqz t0, .LBB1_2 +; RV32IZbb-NEXT: mv a6, a1 +; RV32IZbb-NEXT: sltu a1, a0, a2 +; RV32IZbb-NEXT: sub a5, a6, a3 +; RV32IZbb-NEXT: sub t0, a5, a1 +; RV32IZbb-NEXT: lui a1, 524288 +; RV32IZbb-NEXT: bgez t0, .LBB1_2 ; RV32IZbb-NEXT: # %bb.1: -; RV32IZbb-NEXT: slt a4, a6, t0 +; RV32IZbb-NEXT: addi a1, a1, -1 ; RV32IZbb-NEXT: .LBB1_2: -; RV32IZbb-NEXT: addi a5, zero, 1 -; RV32IZbb-NEXT: beqz a1, .LBB1_4 -; RV32IZbb-NEXT: # %bb.3: -; RV32IZbb-NEXT: slt a5, a6, a1 -; RV32IZbb-NEXT: .LBB1_4: -; RV32IZbb-NEXT: xor a1, a5, a4 -; RV32IZbb-NEXT: snez a1, a1 -; RV32IZbb-NEXT: beqz a3, .LBB1_6 -; RV32IZbb-NEXT: # %bb.5: -; RV32IZbb-NEXT: slt a7, a6, a3 -; RV32IZbb-NEXT: .LBB1_6: -; RV32IZbb-NEXT: xor a3, a5, a7 +; RV32IZbb-NEXT: slti a7, t0, 0 +; RV32IZbb-NEXT: slti a4, a6, 0 +; RV32IZbb-NEXT: xor a5, a4, a7 +; RV32IZbb-NEXT: snez a5, a5 +; RV32IZbb-NEXT: slti a3, a3, 0 +; RV32IZbb-NEXT: xor a3, a4, a3 ; RV32IZbb-NEXT: snez a3, a3 -; RV32IZbb-NEXT: and a3, a3, a1 -; RV32IZbb-NEXT: bnez a3, .LBB1_8 -; RV32IZbb-NEXT: # %bb.7: +; RV32IZbb-NEXT: and a3, a3, a5 +; RV32IZbb-NEXT: bnez a3, .LBB1_4 +; RV32IZbb-NEXT: # %bb.3: ; RV32IZbb-NEXT: sub a0, a0, a2 -; RV32IZbb-NEXT: lui a1, 524288 -; RV32IZbb-NEXT: bltz t0, .LBB1_9 -; RV32IZbb-NEXT: j .LBB1_10 -; RV32IZbb-NEXT: .LBB1_8: -; RV32IZbb-NEXT: srai a0, t0, 31 -; RV32IZbb-NEXT: lui a1, 524288 -; RV32IZbb-NEXT: bgez t0, .LBB1_10 -; RV32IZbb-NEXT: .LBB1_9: -; RV32IZbb-NEXT: addi a1, a1, -1 -; RV32IZbb-NEXT: .LBB1_10: -; RV32IZbb-NEXT: beqz a3, .LBB1_12 -; RV32IZbb-NEXT: # %bb.11: -; RV32IZbb-NEXT: ret -; RV32IZbb-NEXT: .LBB1_12: ; RV32IZbb-NEXT: mv a1, t0 ; RV32IZbb-NEXT: ret +; RV32IZbb-NEXT: .LBB1_4: +; RV32IZbb-NEXT: srai a0, t0, 31 +; RV32IZbb-NEXT: ret ; ; RV64IZbb-LABEL: func2: ; RV64IZbb: # %bb.0: diff --git a/llvm/test/CodeGen/RISCV/ssub_sat_plus.ll b/llvm/test/CodeGen/RISCV/ssub_sat_plus.ll --- a/llvm/test/CodeGen/RISCV/ssub_sat_plus.ll +++ b/llvm/test/CodeGen/RISCV/ssub_sat_plus.ll @@ -88,49 +88,31 @@ define i64 @func64(i64 %x, i64 %y, i64 %z) nounwind { ; RV32I-LABEL: func64: ; RV32I: # %bb.0: -; RV32I-NEXT: sltu a2, a0, a4 -; RV32I-NEXT: sub a3, a1, a5 -; RV32I-NEXT: sub t0, a3, a2 -; RV32I-NEXT: addi a6, zero, -1 -; RV32I-NEXT: addi a7, zero, 1 -; RV32I-NEXT: addi a2, zero, 1 -; RV32I-NEXT: beqz t0, .LBB1_2 +; RV32I-NEXT: mv a6, a1 +; RV32I-NEXT: sltu a1, a0, a4 +; RV32I-NEXT: sub a3, a6, a5 +; RV32I-NEXT: sub t0, a3, a1 +; RV32I-NEXT: lui a1, 524288 +; RV32I-NEXT: bgez t0, .LBB1_2 ; RV32I-NEXT: # %bb.1: -; RV32I-NEXT: slt a2, a6, t0 +; RV32I-NEXT: addi a1, a1, -1 ; RV32I-NEXT: .LBB1_2: -; RV32I-NEXT: addi a3, zero, 1 -; RV32I-NEXT: beqz a1, .LBB1_4 -; RV32I-NEXT: # %bb.3: -; RV32I-NEXT: slt a3, a6, a1 -; RV32I-NEXT: .LBB1_4: -; RV32I-NEXT: xor a1, a3, a2 -; RV32I-NEXT: snez a1, a1 -; RV32I-NEXT: beqz a5, .LBB1_6 -; RV32I-NEXT: # %bb.5: -; RV32I-NEXT: slt a7, a6, a5 -; RV32I-NEXT: .LBB1_6: -; RV32I-NEXT: xor a2, a3, a7 +; RV32I-NEXT: slti a7, t0, 0 +; RV32I-NEXT: slti a2, a6, 0 +; RV32I-NEXT: xor a3, a2, a7 +; RV32I-NEXT: snez a3, a3 +; RV32I-NEXT: slti a5, a5, 0 +; RV32I-NEXT: xor a2, a2, a5 ; RV32I-NEXT: snez a2, a2 -; RV32I-NEXT: and a3, a2, a1 -; RV32I-NEXT: bnez a3, .LBB1_8 -; RV32I-NEXT: # %bb.7: +; RV32I-NEXT: and a2, a2, a3 +; RV32I-NEXT: bnez a2, .LBB1_4 +; RV32I-NEXT: # %bb.3: ; RV32I-NEXT: sub a0, a0, a4 -; RV32I-NEXT: lui a1, 524288 -; RV32I-NEXT: bltz t0, .LBB1_9 -; RV32I-NEXT: j .LBB1_10 -; RV32I-NEXT: .LBB1_8: -; RV32I-NEXT: srai a0, t0, 31 -; RV32I-NEXT: lui a1, 524288 -; RV32I-NEXT: bgez t0, .LBB1_10 -; RV32I-NEXT: .LBB1_9: -; RV32I-NEXT: addi a1, a1, -1 -; RV32I-NEXT: .LBB1_10: -; RV32I-NEXT: beqz a3, .LBB1_12 -; RV32I-NEXT: # %bb.11: -; RV32I-NEXT: ret -; RV32I-NEXT: .LBB1_12: ; RV32I-NEXT: mv a1, t0 ; RV32I-NEXT: ret +; RV32I-NEXT: .LBB1_4: +; RV32I-NEXT: srai a0, t0, 31 +; RV32I-NEXT: ret ; ; RV64I-LABEL: func64: ; RV64I: # %bb.0: @@ -154,49 +136,31 @@ ; ; RV32IZbb-LABEL: func64: ; RV32IZbb: # %bb.0: -; RV32IZbb-NEXT: sltu a2, a0, a4 -; RV32IZbb-NEXT: sub a3, a1, a5 -; RV32IZbb-NEXT: sub t0, a3, a2 -; RV32IZbb-NEXT: addi a6, zero, -1 -; RV32IZbb-NEXT: addi a7, zero, 1 -; RV32IZbb-NEXT: addi a2, zero, 1 -; RV32IZbb-NEXT: beqz t0, .LBB1_2 +; RV32IZbb-NEXT: mv a6, a1 +; RV32IZbb-NEXT: sltu a1, a0, a4 +; RV32IZbb-NEXT: sub a3, a6, a5 +; RV32IZbb-NEXT: sub t0, a3, a1 +; RV32IZbb-NEXT: lui a1, 524288 +; RV32IZbb-NEXT: bgez t0, .LBB1_2 ; RV32IZbb-NEXT: # %bb.1: -; RV32IZbb-NEXT: slt a2, a6, t0 +; RV32IZbb-NEXT: addi a1, a1, -1 ; RV32IZbb-NEXT: .LBB1_2: -; RV32IZbb-NEXT: addi a3, zero, 1 -; RV32IZbb-NEXT: beqz a1, .LBB1_4 -; RV32IZbb-NEXT: # %bb.3: -; RV32IZbb-NEXT: slt a3, a6, a1 -; RV32IZbb-NEXT: .LBB1_4: -; RV32IZbb-NEXT: xor a1, a3, a2 -; RV32IZbb-NEXT: snez a1, a1 -; RV32IZbb-NEXT: beqz a5, .LBB1_6 -; RV32IZbb-NEXT: # %bb.5: -; RV32IZbb-NEXT: slt a7, a6, a5 -; RV32IZbb-NEXT: .LBB1_6: -; RV32IZbb-NEXT: xor a2, a3, a7 +; RV32IZbb-NEXT: slti a7, t0, 0 +; RV32IZbb-NEXT: slti a2, a6, 0 +; RV32IZbb-NEXT: xor a3, a2, a7 +; RV32IZbb-NEXT: snez a3, a3 +; RV32IZbb-NEXT: slti a5, a5, 0 +; RV32IZbb-NEXT: xor a2, a2, a5 ; RV32IZbb-NEXT: snez a2, a2 -; RV32IZbb-NEXT: and a3, a2, a1 -; RV32IZbb-NEXT: bnez a3, .LBB1_8 -; RV32IZbb-NEXT: # %bb.7: +; RV32IZbb-NEXT: and a2, a2, a3 +; RV32IZbb-NEXT: bnez a2, .LBB1_4 +; RV32IZbb-NEXT: # %bb.3: ; RV32IZbb-NEXT: sub a0, a0, a4 -; RV32IZbb-NEXT: lui a1, 524288 -; RV32IZbb-NEXT: bltz t0, .LBB1_9 -; RV32IZbb-NEXT: j .LBB1_10 -; RV32IZbb-NEXT: .LBB1_8: -; RV32IZbb-NEXT: srai a0, t0, 31 -; RV32IZbb-NEXT: lui a1, 524288 -; RV32IZbb-NEXT: bgez t0, .LBB1_10 -; RV32IZbb-NEXT: .LBB1_9: -; RV32IZbb-NEXT: addi a1, a1, -1 -; RV32IZbb-NEXT: .LBB1_10: -; RV32IZbb-NEXT: beqz a3, .LBB1_12 -; RV32IZbb-NEXT: # %bb.11: -; RV32IZbb-NEXT: ret -; RV32IZbb-NEXT: .LBB1_12: ; RV32IZbb-NEXT: mv a1, t0 ; RV32IZbb-NEXT: ret +; RV32IZbb-NEXT: .LBB1_4: +; RV32IZbb-NEXT: srai a0, t0, 31 +; RV32IZbb-NEXT: ret ; ; RV64IZbb-LABEL: func64: ; RV64IZbb: # %bb.0: diff --git a/llvm/test/CodeGen/RISCV/xaluo.ll b/llvm/test/CodeGen/RISCV/xaluo.ll --- a/llvm/test/CodeGen/RISCV/xaluo.ll +++ b/llvm/test/CodeGen/RISCV/xaluo.ll @@ -120,31 +120,18 @@ ; RV32-LABEL: saddo1.i64: ; RV32: # %bb.0: # %entry ; RV32-NEXT: add a5, a1, a3 -; RV32-NEXT: add t0, a0, a2 -; RV32-NEXT: sltu a0, t0, a0 +; RV32-NEXT: add a2, a0, a2 +; RV32-NEXT: sltu a0, a2, a0 ; RV32-NEXT: add a5, a5, a0 -; RV32-NEXT: addi a6, zero, -1 -; RV32-NEXT: addi a7, zero, 1 -; RV32-NEXT: addi a2, zero, 1 -; RV32-NEXT: beqz a5, .LBB4_2 -; RV32-NEXT: # %bb.1: # %entry -; RV32-NEXT: slt a2, a6, a5 -; RV32-NEXT: .LBB4_2: # %entry -; RV32-NEXT: addi a0, zero, 1 -; RV32-NEXT: beqz a1, .LBB4_4 -; RV32-NEXT: # %bb.3: # %entry -; RV32-NEXT: slt a0, a6, a1 -; RV32-NEXT: .LBB4_4: # %entry -; RV32-NEXT: xor a1, a0, a2 -; RV32-NEXT: snez a1, a1 -; RV32-NEXT: beqz a3, .LBB4_6 -; RV32-NEXT: # %bb.5: # %entry -; RV32-NEXT: slt a7, a6, a3 -; RV32-NEXT: .LBB4_6: # %entry -; RV32-NEXT: xor a0, a0, a7 -; RV32-NEXT: seqz a0, a0 -; RV32-NEXT: and a0, a0, a1 -; RV32-NEXT: sw t0, 0(a4) +; RV32-NEXT: slti a0, a5, 0 +; RV32-NEXT: slti a1, a1, 0 +; RV32-NEXT: xor a0, a1, a0 +; RV32-NEXT: snez a0, a0 +; RV32-NEXT: slti a3, a3, 0 +; RV32-NEXT: xor a1, a1, a3 +; RV32-NEXT: seqz a1, a1 +; RV32-NEXT: and a0, a1, a0 +; RV32-NEXT: sw a2, 0(a4) ; RV32-NEXT: sw a5, 4(a4) ; RV32-NEXT: ret ; @@ -167,26 +154,18 @@ define zeroext i1 @saddo2.i64(i64 %v1, i64* %res) { ; RV32-LABEL: saddo2.i64: ; RV32: # %bb.0: # %entry -; RV32-NEXT: addi a3, a0, 4 -; RV32-NEXT: sltu a0, a3, a0 -; RV32-NEXT: add a4, a1, a0 -; RV32-NEXT: addi a6, zero, -1 -; RV32-NEXT: addi a0, zero, 1 -; RV32-NEXT: addi a5, zero, 1 -; RV32-NEXT: beqz a4, .LBB5_2 -; RV32-NEXT: # %bb.1: # %entry -; RV32-NEXT: slt a5, a6, a4 -; RV32-NEXT: .LBB5_2: # %entry -; RV32-NEXT: beqz a1, .LBB5_4 -; RV32-NEXT: # %bb.3: # %entry -; RV32-NEXT: slt a0, a6, a1 -; RV32-NEXT: .LBB5_4: # %entry -; RV32-NEXT: xor a1, a0, a5 -; RV32-NEXT: snez a1, a1 +; RV32-NEXT: addi a3, zero, -1 +; RV32-NEXT: slt a3, a3, a1 +; RV32-NEXT: addi a4, a0, 4 +; RV32-NEXT: sltu a0, a4, a0 +; RV32-NEXT: add a5, a1, a0 +; RV32-NEXT: slti a0, a5, 0 +; RV32-NEXT: slti a1, a1, 0 +; RV32-NEXT: xor a0, a1, a0 ; RV32-NEXT: snez a0, a0 -; RV32-NEXT: and a0, a0, a1 -; RV32-NEXT: sw a3, 0(a2) -; RV32-NEXT: sw a4, 4(a2) +; RV32-NEXT: and a0, a3, a0 +; RV32-NEXT: sw a4, 0(a2) +; RV32-NEXT: sw a5, 4(a2) ; RV32-NEXT: ret ; ; RV64-LABEL: saddo2.i64: @@ -210,21 +189,11 @@ ; RV32-NEXT: sltu a0, a3, a0 ; RV32-NEXT: add a0, a1, a0 ; RV32-NEXT: addi a4, a0, -1 -; RV32-NEXT: addi a6, zero, -1 -; RV32-NEXT: addi a0, zero, 1 -; RV32-NEXT: addi a5, zero, 1 -; RV32-NEXT: beqz a4, .LBB6_2 -; RV32-NEXT: # %bb.1: # %entry -; RV32-NEXT: slt a5, a6, a4 -; RV32-NEXT: .LBB6_2: # %entry -; RV32-NEXT: beqz a1, .LBB6_4 -; RV32-NEXT: # %bb.3: # %entry -; RV32-NEXT: slt a0, a6, a1 -; RV32-NEXT: .LBB6_4: # %entry -; RV32-NEXT: xor a1, a0, a5 -; RV32-NEXT: snez a1, a1 -; RV32-NEXT: seqz a0, a0 -; RV32-NEXT: and a0, a0, a1 +; RV32-NEXT: slti a0, a4, 0 +; RV32-NEXT: slti a1, a1, 0 +; RV32-NEXT: xor a0, a1, a0 +; RV32-NEXT: snez a0, a0 +; RV32-NEXT: and a0, a1, a0 ; RV32-NEXT: sw a3, 0(a2) ; RV32-NEXT: sw a4, 4(a2) ; RV32-NEXT: ret @@ -360,31 +329,18 @@ ; RV32: # %bb.0: # %entry ; RV32-NEXT: sltu a6, a0, a2 ; RV32-NEXT: sub a5, a1, a3 -; RV32-NEXT: sub t0, a5, a6 -; RV32-NEXT: addi a7, zero, -1 -; RV32-NEXT: addi a6, zero, 1 -; RV32-NEXT: addi t1, zero, 1 -; RV32-NEXT: beqz t0, .LBB11_2 -; RV32-NEXT: # %bb.1: # %entry -; RV32-NEXT: slt t1, a7, t0 -; RV32-NEXT: .LBB11_2: # %entry -; RV32-NEXT: addi a5, zero, 1 -; RV32-NEXT: beqz a1, .LBB11_4 -; RV32-NEXT: # %bb.3: # %entry -; RV32-NEXT: slt a5, a7, a1 -; RV32-NEXT: .LBB11_4: # %entry -; RV32-NEXT: xor a1, a5, t1 +; RV32-NEXT: sub a7, a5, a6 +; RV32-NEXT: slti a6, a7, 0 +; RV32-NEXT: slti a1, a1, 0 +; RV32-NEXT: xor a5, a1, a6 +; RV32-NEXT: snez a5, a5 +; RV32-NEXT: slti a3, a3, 0 +; RV32-NEXT: xor a1, a1, a3 ; RV32-NEXT: snez a1, a1 -; RV32-NEXT: beqz a3, .LBB11_6 -; RV32-NEXT: # %bb.5: # %entry -; RV32-NEXT: slt a6, a7, a3 -; RV32-NEXT: .LBB11_6: # %entry -; RV32-NEXT: xor a3, a5, a6 -; RV32-NEXT: snez a3, a3 -; RV32-NEXT: and a1, a3, a1 +; RV32-NEXT: and a1, a1, a5 ; RV32-NEXT: sub a0, a0, a2 ; RV32-NEXT: sw a0, 0(a4) -; RV32-NEXT: sw t0, 4(a4) +; RV32-NEXT: sw a7, 4(a4) ; RV32-NEXT: mv a0, a1 ; RV32-NEXT: ret ; @@ -819,33 +775,20 @@ ; RV32-NEXT: add a4, a1, a3 ; RV32-NEXT: add a5, a0, a2 ; RV32-NEXT: sltu a5, a5, a0 -; RV32-NEXT: add a5, a4, a5 -; RV32-NEXT: addi a6, zero, -1 -; RV32-NEXT: addi a7, zero, 1 -; RV32-NEXT: addi a4, zero, 1 -; RV32-NEXT: beqz a5, .LBB24_2 -; RV32-NEXT: # %bb.1: # %entry -; RV32-NEXT: slt a4, a6, a5 -; RV32-NEXT: .LBB24_2: # %entry -; RV32-NEXT: addi a5, zero, 1 -; RV32-NEXT: beqz a1, .LBB24_4 -; RV32-NEXT: # %bb.3: # %entry -; RV32-NEXT: slt a5, a6, a1 -; RV32-NEXT: .LBB24_4: # %entry +; RV32-NEXT: add a4, a4, a5 +; RV32-NEXT: slti a4, a4, 0 +; RV32-NEXT: slti a5, a1, 0 ; RV32-NEXT: xor a4, a5, a4 -; RV32-NEXT: snez a4, a4 -; RV32-NEXT: beqz a3, .LBB24_6 -; RV32-NEXT: # %bb.5: # %entry -; RV32-NEXT: slt a7, a6, a3 -; RV32-NEXT: .LBB24_6: # %entry -; RV32-NEXT: xor a5, a5, a7 -; RV32-NEXT: seqz a5, a5 -; RV32-NEXT: and a4, a5, a4 -; RV32-NEXT: bnez a4, .LBB24_8 -; RV32-NEXT: # %bb.7: # %entry +; RV32-NEXT: snez a6, a4 +; RV32-NEXT: slti a4, a3, 0 +; RV32-NEXT: xor a4, a5, a4 +; RV32-NEXT: seqz a4, a4 +; RV32-NEXT: and a4, a4, a6 +; RV32-NEXT: bnez a4, .LBB24_2 +; RV32-NEXT: # %bb.1: # %entry ; RV32-NEXT: mv a0, a2 ; RV32-NEXT: mv a1, a3 -; RV32-NEXT: .LBB24_8: # %entry +; RV32-NEXT: .LBB24_2: # %entry ; RV32-NEXT: ret ; ; RV64-LABEL: saddo.select.i64: @@ -872,28 +815,15 @@ ; RV32-NEXT: add a4, a1, a3 ; RV32-NEXT: add a2, a0, a2 ; RV32-NEXT: sltu a0, a2, a0 -; RV32-NEXT: add a4, a4, a0 -; RV32-NEXT: addi a2, zero, -1 -; RV32-NEXT: addi a0, zero, 1 -; RV32-NEXT: addi a5, zero, 1 -; RV32-NEXT: beqz a4, .LBB25_2 -; RV32-NEXT: # %bb.1: # %entry -; RV32-NEXT: slt a5, a2, a4 -; RV32-NEXT: .LBB25_2: # %entry -; RV32-NEXT: addi a4, zero, 1 -; RV32-NEXT: beqz a1, .LBB25_4 -; RV32-NEXT: # %bb.3: # %entry -; RV32-NEXT: slt a4, a2, a1 -; RV32-NEXT: .LBB25_4: # %entry -; RV32-NEXT: xor a1, a4, a5 -; RV32-NEXT: snez a1, a1 -; RV32-NEXT: beqz a3, .LBB25_6 -; RV32-NEXT: # %bb.5: # %entry -; RV32-NEXT: slt a0, a2, a3 -; RV32-NEXT: .LBB25_6: # %entry -; RV32-NEXT: xor a0, a4, a0 -; RV32-NEXT: seqz a0, a0 -; RV32-NEXT: and a0, a0, a1 +; RV32-NEXT: add a0, a4, a0 +; RV32-NEXT: slti a0, a0, 0 +; RV32-NEXT: slti a1, a1, 0 +; RV32-NEXT: xor a0, a1, a0 +; RV32-NEXT: snez a0, a0 +; RV32-NEXT: slti a2, a3, 0 +; RV32-NEXT: xor a1, a1, a2 +; RV32-NEXT: seqz a1, a1 +; RV32-NEXT: and a0, a1, a0 ; RV32-NEXT: xori a0, a0, 1 ; RV32-NEXT: ret ; @@ -1096,33 +1026,20 @@ ; RV32: # %bb.0: # %entry ; RV32-NEXT: sltu a4, a0, a2 ; RV32-NEXT: sub a5, a1, a3 -; RV32-NEXT: sub a5, a5, a4 -; RV32-NEXT: addi a6, zero, -1 -; RV32-NEXT: addi a7, zero, 1 -; RV32-NEXT: addi a4, zero, 1 -; RV32-NEXT: beqz a5, .LBB32_2 -; RV32-NEXT: # %bb.1: # %entry -; RV32-NEXT: slt a4, a6, a5 -; RV32-NEXT: .LBB32_2: # %entry -; RV32-NEXT: addi a5, zero, 1 -; RV32-NEXT: beqz a1, .LBB32_4 -; RV32-NEXT: # %bb.3: # %entry -; RV32-NEXT: slt a5, a6, a1 -; RV32-NEXT: .LBB32_4: # %entry +; RV32-NEXT: sub a4, a5, a4 +; RV32-NEXT: slti a4, a4, 0 +; RV32-NEXT: slti a5, a1, 0 +; RV32-NEXT: xor a4, a5, a4 +; RV32-NEXT: snez a6, a4 +; RV32-NEXT: slti a4, a3, 0 ; RV32-NEXT: xor a4, a5, a4 ; RV32-NEXT: snez a4, a4 -; RV32-NEXT: beqz a3, .LBB32_6 -; RV32-NEXT: # %bb.5: # %entry -; RV32-NEXT: slt a7, a6, a3 -; RV32-NEXT: .LBB32_6: # %entry -; RV32-NEXT: xor a5, a5, a7 -; RV32-NEXT: snez a5, a5 -; RV32-NEXT: and a4, a5, a4 -; RV32-NEXT: bnez a4, .LBB32_8 -; RV32-NEXT: # %bb.7: # %entry +; RV32-NEXT: and a4, a4, a6 +; RV32-NEXT: bnez a4, .LBB32_2 +; RV32-NEXT: # %bb.1: # %entry ; RV32-NEXT: mv a0, a2 ; RV32-NEXT: mv a1, a3 -; RV32-NEXT: .LBB32_8: # %entry +; RV32-NEXT: .LBB32_2: # %entry ; RV32-NEXT: ret ; ; RV64-LABEL: ssubo.select.i64: @@ -1148,28 +1065,15 @@ ; RV32: # %bb.0: # %entry ; RV32-NEXT: sltu a0, a0, a2 ; RV32-NEXT: sub a2, a1, a3 -; RV32-NEXT: sub a4, a2, a0 -; RV32-NEXT: addi a2, zero, -1 -; RV32-NEXT: addi a0, zero, 1 -; RV32-NEXT: addi a5, zero, 1 -; RV32-NEXT: beqz a4, .LBB33_2 -; RV32-NEXT: # %bb.1: # %entry -; RV32-NEXT: slt a5, a2, a4 -; RV32-NEXT: .LBB33_2: # %entry -; RV32-NEXT: addi a4, zero, 1 -; RV32-NEXT: beqz a1, .LBB33_4 -; RV32-NEXT: # %bb.3: # %entry -; RV32-NEXT: slt a4, a2, a1 -; RV32-NEXT: .LBB33_4: # %entry -; RV32-NEXT: xor a1, a4, a5 -; RV32-NEXT: snez a1, a1 -; RV32-NEXT: beqz a3, .LBB33_6 -; RV32-NEXT: # %bb.5: # %entry -; RV32-NEXT: slt a0, a2, a3 -; RV32-NEXT: .LBB33_6: # %entry -; RV32-NEXT: xor a0, a4, a0 +; RV32-NEXT: sub a0, a2, a0 +; RV32-NEXT: slti a0, a0, 0 +; RV32-NEXT: slti a1, a1, 0 +; RV32-NEXT: xor a0, a1, a0 ; RV32-NEXT: snez a0, a0 -; RV32-NEXT: and a0, a0, a1 +; RV32-NEXT: slti a2, a3, 0 +; RV32-NEXT: xor a1, a1, a2 +; RV32-NEXT: snez a1, a1 +; RV32-NEXT: and a0, a1, a0 ; RV32-NEXT: xori a0, a0, 1 ; RV32-NEXT: ret ; @@ -1657,33 +1561,20 @@ ; RV32-NEXT: add a4, a1, a3 ; RV32-NEXT: add a2, a0, a2 ; RV32-NEXT: sltu a0, a2, a0 -; RV32-NEXT: add a4, a4, a0 -; RV32-NEXT: addi a2, zero, -1 -; RV32-NEXT: addi a0, zero, 1 -; RV32-NEXT: addi a5, zero, 1 -; RV32-NEXT: beqz a4, .LBB47_2 -; RV32-NEXT: # %bb.1: # %entry -; RV32-NEXT: slt a5, a2, a4 -; RV32-NEXT: .LBB47_2: # %entry -; RV32-NEXT: addi a4, zero, 1 -; RV32-NEXT: beqz a1, .LBB47_4 -; RV32-NEXT: # %bb.3: # %entry -; RV32-NEXT: slt a4, a2, a1 -; RV32-NEXT: .LBB47_4: # %entry -; RV32-NEXT: xor a1, a4, a5 -; RV32-NEXT: snez a1, a1 -; RV32-NEXT: beqz a3, .LBB47_6 -; RV32-NEXT: # %bb.5: # %entry -; RV32-NEXT: slt a0, a2, a3 -; RV32-NEXT: .LBB47_6: # %entry -; RV32-NEXT: xor a0, a4, a0 -; RV32-NEXT: seqz a0, a0 -; RV32-NEXT: and a0, a0, a1 -; RV32-NEXT: beqz a0, .LBB47_8 -; RV32-NEXT: # %bb.7: # %overflow +; RV32-NEXT: add a0, a4, a0 +; RV32-NEXT: slti a0, a0, 0 +; RV32-NEXT: slti a1, a1, 0 +; RV32-NEXT: xor a0, a1, a0 +; RV32-NEXT: snez a0, a0 +; RV32-NEXT: slti a2, a3, 0 +; RV32-NEXT: xor a1, a1, a2 +; RV32-NEXT: seqz a1, a1 +; RV32-NEXT: and a0, a1, a0 +; RV32-NEXT: beqz a0, .LBB47_2 +; RV32-NEXT: # %bb.1: # %overflow ; RV32-NEXT: mv a0, zero ; RV32-NEXT: ret -; RV32-NEXT: .LBB47_8: # %continue +; RV32-NEXT: .LBB47_2: # %continue ; RV32-NEXT: addi a0, zero, 1 ; RV32-NEXT: ret ; @@ -1842,33 +1733,20 @@ ; RV32: # %bb.0: # %entry ; RV32-NEXT: sltu a0, a0, a2 ; RV32-NEXT: sub a2, a1, a3 -; RV32-NEXT: sub a4, a2, a0 -; RV32-NEXT: addi a2, zero, -1 -; RV32-NEXT: addi a0, zero, 1 -; RV32-NEXT: addi a5, zero, 1 -; RV32-NEXT: beqz a4, .LBB51_2 -; RV32-NEXT: # %bb.1: # %entry -; RV32-NEXT: slt a5, a2, a4 -; RV32-NEXT: .LBB51_2: # %entry -; RV32-NEXT: addi a4, zero, 1 -; RV32-NEXT: beqz a1, .LBB51_4 -; RV32-NEXT: # %bb.3: # %entry -; RV32-NEXT: slt a4, a2, a1 -; RV32-NEXT: .LBB51_4: # %entry -; RV32-NEXT: xor a1, a4, a5 -; RV32-NEXT: snez a1, a1 -; RV32-NEXT: beqz a3, .LBB51_6 -; RV32-NEXT: # %bb.5: # %entry -; RV32-NEXT: slt a0, a2, a3 -; RV32-NEXT: .LBB51_6: # %entry -; RV32-NEXT: xor a0, a4, a0 +; RV32-NEXT: sub a0, a2, a0 +; RV32-NEXT: slti a0, a0, 0 +; RV32-NEXT: slti a1, a1, 0 +; RV32-NEXT: xor a0, a1, a0 ; RV32-NEXT: snez a0, a0 -; RV32-NEXT: and a0, a0, a1 -; RV32-NEXT: beqz a0, .LBB51_8 -; RV32-NEXT: # %bb.7: # %overflow +; RV32-NEXT: slti a2, a3, 0 +; RV32-NEXT: xor a1, a1, a2 +; RV32-NEXT: snez a1, a1 +; RV32-NEXT: and a0, a1, a0 +; RV32-NEXT: beqz a0, .LBB51_2 +; RV32-NEXT: # %bb.1: # %overflow ; RV32-NEXT: mv a0, zero ; RV32-NEXT: ret -; RV32-NEXT: .LBB51_8: # %continue +; RV32-NEXT: .LBB51_2: # %continue ; RV32-NEXT: addi a0, zero, 1 ; RV32-NEXT: ret ; @@ -2081,25 +1959,14 @@ ; RV32-NEXT: add a2, a0, a0 ; RV32-NEXT: sltu a0, a2, a0 ; RV32-NEXT: add a2, a1, a1 -; RV32-NEXT: add a4, a2, a0 -; RV32-NEXT: addi a2, zero, -1 -; RV32-NEXT: addi a0, zero, 1 -; RV32-NEXT: addi a3, zero, 1 -; RV32-NEXT: bnez a4, .LBB56_4 -; RV32-NEXT: # %bb.1: # %entry -; RV32-NEXT: bnez a1, .LBB56_5 -; RV32-NEXT: .LBB56_2: # %entry -; RV32-NEXT: beq a0, a3, .LBB56_6 -; RV32-NEXT: .LBB56_3: # %overflow +; RV32-NEXT: add a0, a2, a0 +; RV32-NEXT: slti a0, a0, 0 +; RV32-NEXT: slti a1, a1, 0 +; RV32-NEXT: beq a1, a0, .LBB56_2 +; RV32-NEXT: # %bb.1: # %overflow ; RV32-NEXT: mv a0, zero ; RV32-NEXT: ret -; RV32-NEXT: .LBB56_4: # %entry -; RV32-NEXT: slt a3, a2, a4 -; RV32-NEXT: beqz a1, .LBB56_2 -; RV32-NEXT: .LBB56_5: # %entry -; RV32-NEXT: slt a0, a2, a1 -; RV32-NEXT: bne a0, a3, .LBB56_3 -; RV32-NEXT: .LBB56_6: # %continue +; RV32-NEXT: .LBB56_2: # %continue ; RV32-NEXT: addi a0, zero, 1 ; RV32-NEXT: ret ; diff --git a/llvm/test/CodeGen/Thumb2/mve-saturating-arith.ll b/llvm/test/CodeGen/Thumb2/mve-saturating-arith.ll --- a/llvm/test/CodeGen/Thumb2/mve-saturating-arith.ll +++ b/llvm/test/CodeGen/Thumb2/mve-saturating-arith.ll @@ -34,64 +34,58 @@ define arm_aapcs_vfpcc <2 x i64> @sadd_int64_t(<2 x i64> %src1, <2 x i64> %src2) { ; CHECK-LABEL: sadd_int64_t: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: .save {r4, r5, r6, lr} -; CHECK-NEXT: push {r4, r5, r6, lr} +; CHECK-NEXT: .save {r4, r5, r6, r7, lr} +; CHECK-NEXT: push {r4, r5, r6, r7, lr} ; CHECK-NEXT: vmov r0, s7 ; CHECK-NEXT: vmov r2, s3 ; CHECK-NEXT: vmov lr, s6 -; CHECK-NEXT: vmov r5, s4 -; CHECK-NEXT: vmov r6, s0 -; CHECK-NEXT: cmp.w r0, #-1 -; CHECK-NEXT: cset r1, gt -; CHECK-NEXT: cmp.w r2, #-1 -; CHECK-NEXT: cset r3, gt +; CHECK-NEXT: vmov r4, s1 +; CHECK-NEXT: vmov r6, s4 +; CHECK-NEXT: vmov r7, s0 +; CHECK-NEXT: cmp r0, #0 +; CHECK-NEXT: cset r1, mi +; CHECK-NEXT: cmp r2, #0 +; CHECK-NEXT: cset r3, mi ; CHECK-NEXT: cmp r3, r1 ; CHECK-NEXT: vmov r1, s2 ; CHECK-NEXT: cset r12, eq ; CHECK-NEXT: adds.w r1, r1, lr -; CHECK-NEXT: adcs r0, r2 -; CHECK-NEXT: cmp.w r0, #-1 -; CHECK-NEXT: cset r2, gt +; CHECK-NEXT: adcs.w lr, r2, r0 +; CHECK-NEXT: cset r2, mi ; CHECK-NEXT: cmp r3, r2 +; CHECK-NEXT: cset r3, ne +; CHECK-NEXT: and.w r3, r3, r12 +; CHECK-NEXT: ands r12, r3, #1 ; CHECK-NEXT: vmov r3, s5 -; CHECK-NEXT: cset r2, ne -; CHECK-NEXT: and.w r2, r2, r12 -; CHECK-NEXT: ands r12, r2, #1 -; CHECK-NEXT: vmov r2, s1 ; CHECK-NEXT: it ne -; CHECK-NEXT: asrne r1, r0, #31 -; CHECK-NEXT: cmp.w r3, #-1 -; CHECK-NEXT: cset lr, gt -; CHECK-NEXT: cmp.w r2, #-1 -; CHECK-NEXT: cset r4, gt -; CHECK-NEXT: cmp r4, lr -; CHECK-NEXT: cset lr, eq -; CHECK-NEXT: adds r5, r5, r6 -; CHECK-NEXT: adcs r2, r3 -; CHECK-NEXT: mvn r6, #-2147483648 -; CHECK-NEXT: cmp.w r2, #-1 -; CHECK-NEXT: cset r3, gt -; CHECK-NEXT: cmp r4, r3 -; CHECK-NEXT: cset r3, ne -; CHECK-NEXT: and.w r3, r3, lr -; CHECK-NEXT: ands r3, r3, #1 +; CHECK-NEXT: asrne.w r1, lr, #31 +; CHECK-NEXT: cmp r3, #0 +; CHECK-NEXT: cset r0, mi +; CHECK-NEXT: cmp r4, #0 +; CHECK-NEXT: cset r5, mi +; CHECK-NEXT: cmp r5, r0 +; CHECK-NEXT: cset r0, eq +; CHECK-NEXT: adds r6, r6, r7 +; CHECK-NEXT: adcs r3, r4 +; CHECK-NEXT: cset r4, mi +; CHECK-NEXT: cmp r5, r4 +; CHECK-NEXT: cset r5, ne +; CHECK-NEXT: ands r0, r5 +; CHECK-NEXT: ands r0, r0, #1 ; CHECK-NEXT: it ne -; CHECK-NEXT: asrne r5, r2, #31 -; CHECK-NEXT: cmp r0, #0 -; CHECK-NEXT: vmov q0[2], q0[0], r5, r1 -; CHECK-NEXT: cset r1, mi -; CHECK-NEXT: tst.w r1, #1 -; CHECK-NEXT: cinv r1, r6, eq +; CHECK-NEXT: asrne r6, r3, #31 +; CHECK-NEXT: vmov q0[2], q0[0], r6, r1 +; CHECK-NEXT: mvn r1, #-2147483648 +; CHECK-NEXT: tst.w r2, #1 +; CHECK-NEXT: cinv r2, r1, eq ; CHECK-NEXT: cmp.w r12, #0 -; CHECK-NEXT: csel r0, r1, r0, ne -; CHECK-NEXT: cmp r2, #0 -; CHECK-NEXT: cset r1, mi -; CHECK-NEXT: tst.w r1, #1 -; CHECK-NEXT: cinv r1, r6, eq -; CHECK-NEXT: cmp r3, #0 -; CHECK-NEXT: csel r1, r1, r2, ne -; CHECK-NEXT: vmov q0[3], q0[1], r1, r0 -; CHECK-NEXT: pop {r4, r5, r6, pc} +; CHECK-NEXT: csel r2, r2, lr, ne +; CHECK-NEXT: tst.w r4, #1 +; CHECK-NEXT: cinv r1, r1, eq +; CHECK-NEXT: cmp r0, #0 +; CHECK-NEXT: csel r0, r1, r3, ne +; CHECK-NEXT: vmov q0[3], q0[1], r0, r2 +; CHECK-NEXT: pop {r4, r5, r6, r7, pc} entry: %0 = call <2 x i64> @llvm.sadd.sat.v2i64(<2 x i64> %src1, <2 x i64> %src2) ret <2 x i64> %0 @@ -199,64 +193,58 @@ define arm_aapcs_vfpcc <2 x i64> @ssub_int64_t(<2 x i64> %src1, <2 x i64> %src2) { ; CHECK-LABEL: ssub_int64_t: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: .save {r4, r5, r6, lr} -; CHECK-NEXT: push {r4, r5, r6, lr} +; CHECK-NEXT: .save {r4, r5, r6, r7, lr} +; CHECK-NEXT: push {r4, r5, r6, r7, lr} ; CHECK-NEXT: vmov r0, s7 ; CHECK-NEXT: vmov r2, s3 ; CHECK-NEXT: vmov lr, s6 -; CHECK-NEXT: vmov r5, s4 -; CHECK-NEXT: vmov r6, s0 -; CHECK-NEXT: cmp.w r0, #-1 -; CHECK-NEXT: cset r1, gt -; CHECK-NEXT: cmp.w r2, #-1 -; CHECK-NEXT: cset r3, gt +; CHECK-NEXT: vmov r4, s1 +; CHECK-NEXT: vmov r6, s4 +; CHECK-NEXT: vmov r7, s0 +; CHECK-NEXT: cmp r0, #0 +; CHECK-NEXT: cset r1, mi +; CHECK-NEXT: cmp r2, #0 +; CHECK-NEXT: cset r3, mi ; CHECK-NEXT: cmp r3, r1 ; CHECK-NEXT: vmov r1, s2 ; CHECK-NEXT: cset r12, ne ; CHECK-NEXT: subs.w r1, r1, lr -; CHECK-NEXT: sbc.w r0, r2, r0 -; CHECK-NEXT: cmp.w r0, #-1 -; CHECK-NEXT: cset r2, gt +; CHECK-NEXT: sbcs.w lr, r2, r0 +; CHECK-NEXT: cset r2, mi ; CHECK-NEXT: cmp r3, r2 +; CHECK-NEXT: cset r3, ne +; CHECK-NEXT: and.w r3, r3, r12 +; CHECK-NEXT: ands r12, r3, #1 ; CHECK-NEXT: vmov r3, s5 -; CHECK-NEXT: cset r2, ne -; CHECK-NEXT: and.w r2, r2, r12 -; CHECK-NEXT: ands r12, r2, #1 -; CHECK-NEXT: vmov r2, s1 ; CHECK-NEXT: it ne -; CHECK-NEXT: asrne r1, r0, #31 -; CHECK-NEXT: cmp.w r3, #-1 -; CHECK-NEXT: cset lr, gt -; CHECK-NEXT: cmp.w r2, #-1 -; CHECK-NEXT: cset r4, gt -; CHECK-NEXT: cmp r4, lr -; CHECK-NEXT: cset lr, ne -; CHECK-NEXT: subs r5, r6, r5 -; CHECK-NEXT: sbcs r2, r3 -; CHECK-NEXT: mvn r6, #-2147483648 -; CHECK-NEXT: cmp.w r2, #-1 -; CHECK-NEXT: cset r3, gt -; CHECK-NEXT: cmp r4, r3 -; CHECK-NEXT: cset r3, ne -; CHECK-NEXT: and.w r3, r3, lr -; CHECK-NEXT: ands r3, r3, #1 +; CHECK-NEXT: asrne.w r1, lr, #31 +; CHECK-NEXT: cmp r3, #0 +; CHECK-NEXT: cset r0, mi +; CHECK-NEXT: cmp r4, #0 +; CHECK-NEXT: cset r5, mi +; CHECK-NEXT: cmp r5, r0 +; CHECK-NEXT: cset r0, ne +; CHECK-NEXT: subs r6, r7, r6 +; CHECK-NEXT: sbcs.w r3, r4, r3 +; CHECK-NEXT: cset r4, mi +; CHECK-NEXT: cmp r5, r4 +; CHECK-NEXT: cset r5, ne +; CHECK-NEXT: ands r0, r5 +; CHECK-NEXT: ands r0, r0, #1 ; CHECK-NEXT: it ne -; CHECK-NEXT: asrne r5, r2, #31 -; CHECK-NEXT: cmp r0, #0 -; CHECK-NEXT: vmov q0[2], q0[0], r5, r1 -; CHECK-NEXT: cset r1, mi -; CHECK-NEXT: tst.w r1, #1 -; CHECK-NEXT: cinv r1, r6, eq +; CHECK-NEXT: asrne r6, r3, #31 +; CHECK-NEXT: vmov q0[2], q0[0], r6, r1 +; CHECK-NEXT: mvn r1, #-2147483648 +; CHECK-NEXT: tst.w r2, #1 +; CHECK-NEXT: cinv r2, r1, eq ; CHECK-NEXT: cmp.w r12, #0 -; CHECK-NEXT: csel r0, r1, r0, ne -; CHECK-NEXT: cmp r2, #0 -; CHECK-NEXT: cset r1, mi -; CHECK-NEXT: tst.w r1, #1 -; CHECK-NEXT: cinv r1, r6, eq -; CHECK-NEXT: cmp r3, #0 -; CHECK-NEXT: csel r1, r1, r2, ne -; CHECK-NEXT: vmov q0[3], q0[1], r1, r0 -; CHECK-NEXT: pop {r4, r5, r6, pc} +; CHECK-NEXT: csel r2, r2, lr, ne +; CHECK-NEXT: tst.w r4, #1 +; CHECK-NEXT: cinv r1, r1, eq +; CHECK-NEXT: cmp r0, #0 +; CHECK-NEXT: csel r0, r1, r3, ne +; CHECK-NEXT: vmov q0[3], q0[1], r0, r2 +; CHECK-NEXT: pop {r4, r5, r6, r7, pc} entry: %0 = call <2 x i64> @llvm.ssub.sat.v2i64(<2 x i64> %src1, <2 x i64> %src2) ret <2 x i64> %0