diff --git a/llvm/include/llvm/CodeGen/ISDOpcodes.h b/llvm/include/llvm/CodeGen/ISDOpcodes.h --- a/llvm/include/llvm/CodeGen/ISDOpcodes.h +++ b/llvm/include/llvm/CodeGen/ISDOpcodes.h @@ -283,6 +283,16 @@ ADDCARRY, SUBCARRY, + /// Carry-using overflow-aware nodes for multiple precision addition and + /// subtraction. These nodes take three operands: The first two are normal lhs + /// and rhs to the add or sub, and the third is a boolean indicating if there + /// is an incoming carry. They produce two results: the normal result of the + /// add or sub, and a boolean that indicates if an overflow occured (*not* + /// flag, because it may be a store to memory, etc.). If the type of the + /// boolean is not i1 then the high bits conform to getBooleanContents. + SADDO_CARRY, + SSUBO_CARRY, + /// RESULT, BOOL = [SU]ADDO(LHS, RHS) - Overflow-aware nodes for addition. /// These nodes take two operands: the normal LHS and RHS to the add. They /// produce two results: the normal result of the add, and a boolean that diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -411,9 +411,11 @@ SDValue visitSUBO(SDNode *N); SDValue visitADDE(SDNode *N); SDValue visitADDCARRY(SDNode *N); + SDValue visitSADDO_CARRY(SDNode *N); SDValue visitADDCARRYLike(SDValue N0, SDValue N1, SDValue CarryIn, SDNode *N); SDValue visitSUBE(SDNode *N); SDValue visitSUBCARRY(SDNode *N); + SDValue visitSSUBO_CARRY(SDNode *N); SDValue visitMUL(SDNode *N); SDValue visitMULFIX(SDNode *N); SDValue useDivRem(SDNode *N); @@ -1600,8 +1602,10 @@ case ISD::USUBO: return visitSUBO(N); case ISD::ADDE: return visitADDE(N); case ISD::ADDCARRY: return visitADDCARRY(N); + case ISD::SADDO_CARRY: return visitSADDO_CARRY(N); case ISD::SUBE: return visitSUBE(N); case ISD::SUBCARRY: return visitSUBCARRY(N); + case ISD::SSUBO_CARRY: return visitSSUBO_CARRY(N); case ISD::SMULFIX: case ISD::SMULFIXSAT: case ISD::UMULFIX: @@ -2836,6 +2840,28 @@ return SDValue(); } +SDValue DAGCombiner::visitSADDO_CARRY(SDNode *N) { + SDValue N0 = N->getOperand(0); + SDValue N1 = N->getOperand(1); + SDValue CarryIn = N->getOperand(2); + SDLoc DL(N); + + // canonicalize constant to RHS + ConstantSDNode *N0C = dyn_cast(N0); + ConstantSDNode *N1C = dyn_cast(N1); + if (N0C && !N1C) + return DAG.getNode(ISD::SADDO_CARRY, DL, N->getVTList(), N1, N0, CarryIn); + + // fold (saddo_carry x, y, false) -> (saddo x, y) + if (isNullConstant(CarryIn)) { + if (!LegalOperations || + TLI.isOperationLegalOrCustom(ISD::SADDO, N->getValueType(0))) + return DAG.getNode(ISD::SADDO, DL, N->getVTList(), N0, N1); + } + + return SDValue(); +} + /** * If we are facing some sort of diamond carry propapagtion pattern try to * break it up to generate something like: @@ -3517,6 +3543,21 @@ return SDValue(); } +SDValue DAGCombiner::visitSSUBO_CARRY(SDNode *N) { + SDValue N0 = N->getOperand(0); + SDValue N1 = N->getOperand(1); + SDValue CarryIn = N->getOperand(2); + + // fold (ssubo_carry x, y, false) -> (ssubo x, y) + if (isNullConstant(CarryIn)) { + if (!LegalOperations || + TLI.isOperationLegalOrCustom(ISD::SSUBO, N->getValueType(0))) + return DAG.getNode(ISD::SSUBO, SDLoc(N), N->getVTList(), N0, N1); + } + + return SDValue(); +} + // Notice that "mulfix" can be any of SMULFIX, SMULFIXSAT, UMULFIX and // UMULFIXSAT here. SDValue DAGCombiner::visitMULFIX(SDNode *N) { diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp @@ -152,6 +152,9 @@ case ISD::ADDCARRY: case ISD::SUBCARRY: Res = PromoteIntRes_ADDSUBCARRY(N, ResNo); break; + case ISD::SADDO_CARRY: + case ISD::SSUBO_CARRY: Res = PromoteIntRes_SADDSUBO_CARRY(N, ResNo); break; + case ISD::SADDSAT: case ISD::UADDSAT: case ISD::SSUBSAT: @@ -1288,6 +1291,12 @@ return SDValue(Res.getNode(), 0); } +SDValue DAGTypeLegalizer::PromoteIntRes_SADDSUBO_CARRY(SDNode *N, + unsigned ResNo) { + assert(ResNo == 1 && "Don't know how to promote other results yet."); + return PromoteIntRes_Overflow(N); +} + SDValue DAGTypeLegalizer::PromoteIntRes_ABS(SDNode *N) { SDValue Op0 = SExtPromotedInteger(N->getOperand(0)); return DAG.getNode(ISD::ABS, SDLoc(N), Op0.getValueType(), Op0); @@ -1470,6 +1479,8 @@ case ISD::ROTL: case ISD::ROTR: Res = PromoteIntOp_Shift(N); break; + case ISD::SADDO_CARRY: + case ISD::SSUBO_CARRY: case ISD::ADDCARRY: case ISD::SUBCARRY: Res = PromoteIntOp_ADDSUBCARRY(N, OpNo); break; @@ -2087,6 +2098,9 @@ case ISD::ADDCARRY: case ISD::SUBCARRY: ExpandIntRes_ADDSUBCARRY(N, Lo, Hi); break; + case ISD::SADDO_CARRY: + case ISD::SSUBO_CARRY: ExpandIntRes_SADDSUBO_CARRY(N, Lo, Hi); break; + case ISD::SHL: case ISD::SRA: case ISD::SRL: ExpandIntRes_Shift(N, Lo, Hi); break; @@ -2710,6 +2724,26 @@ ReplaceValueWith(SDValue(N, 1), Hi.getValue(1)); } +void DAGTypeLegalizer::ExpandIntRes_SADDSUBO_CARRY(SDNode *N, + SDValue &Lo, SDValue &Hi) { + // Expand the subcomponents. + SDValue LHSL, LHSH, RHSL, RHSH; + SDLoc dl(N); + GetExpandedInteger(N->getOperand(0), LHSL, LHSH); + GetExpandedInteger(N->getOperand(1), RHSL, RHSH); + SDVTList VTList = DAG.getVTList(LHSL.getValueType(), N->getValueType(1)); + + // We need to use an unsigned carry op for the lo part. + unsigned CarryOp = N->getOpcode() == ISD::SADDO_CARRY ? ISD::ADDCARRY + : ISD::SUBCARRY; + Lo = DAG.getNode(CarryOp, dl, VTList, { LHSL, RHSL, N->getOperand(2) }); + Hi = DAG.getNode(N->getOpcode(), dl, VTList, { LHSH, RHSH, Lo.getValue(1) }); + + // Legalized the flag result - switch anything that used the old flag to + // use the new one. + ReplaceValueWith(SDValue(N, 1), Hi.getValue(1)); +} + void DAGTypeLegalizer::ExpandIntRes_ANY_EXTEND(SDNode *N, SDValue &Lo, SDValue &Hi) { EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); @@ -3515,40 +3549,66 @@ SDValue RHS = Node->getOperand(1); SDLoc dl(Node); - // Expand the result by simply replacing it with the equivalent - // non-overflow-checking operation. - SDValue Sum = DAG.getNode(Node->getOpcode() == ISD::SADDO ? - ISD::ADD : ISD::SUB, dl, LHS.getValueType(), - LHS, RHS); - SplitInteger(Sum, Lo, Hi); + SDValue Ovf; - // Compute the overflow. - // - // LHSSign -> LHS >= 0 - // RHSSign -> RHS >= 0 - // SumSign -> Sum >= 0 - // - // Add: - // Overflow -> (LHSSign == RHSSign) && (LHSSign != SumSign) - // Sub: - // Overflow -> (LHSSign != RHSSign) && (LHSSign != SumSign) - // - EVT OType = Node->getValueType(1); - SDValue Zero = DAG.getConstant(0, dl, LHS.getValueType()); + unsigned CarryOp; + switch(Node->getOpcode()) { + default: llvm_unreachable("Node has unexpected Opcode"); + case ISD::SADDO: CarryOp = ISD::SADDO_CARRY; break; + case ISD::SSUBO: CarryOp = ISD::SSUBO_CARRY; break; + } - SDValue LHSSign = DAG.getSetCC(dl, OType, LHS, Zero, ISD::SETGE); - SDValue RHSSign = DAG.getSetCC(dl, OType, RHS, Zero, ISD::SETGE); - SDValue SignsMatch = DAG.getSetCC(dl, OType, LHSSign, RHSSign, - Node->getOpcode() == ISD::SADDO ? - ISD::SETEQ : ISD::SETNE); + bool HasCarryOp = TLI.isOperationLegalOrCustom( + CarryOp, TLI.getTypeToExpandTo(*DAG.getContext(), LHS.getValueType())); - SDValue SumSign = DAG.getSetCC(dl, OType, Sum, Zero, ISD::SETGE); - SDValue SumSignNE = DAG.getSetCC(dl, OType, LHSSign, SumSign, ISD::SETNE); + if (HasCarryOp) { + // Expand the subcomponents. + SDValue LHSL, LHSH, RHSL, RHSH; + GetExpandedInteger(LHS, LHSL, LHSH); + GetExpandedInteger(RHS, RHSL, RHSH); + SDVTList VTList = DAG.getVTList(LHSL.getValueType(), Node->getValueType(1)); + + Lo = DAG.getNode(Node->getOpcode() == ISD::SADDO ? + ISD::UADDO : ISD::USUBO, dl, VTList, { LHSL, RHSL }); + Hi = DAG.getNode(CarryOp, dl, VTList, { LHSH, RHSH, Lo.getValue(1) }); - SDValue Cmp = DAG.getNode(ISD::AND, dl, OType, SignsMatch, SumSignNE); + Ovf = Hi.getValue(1); + } else { + // Expand the result by simply replacing it with the equivalent + // non-overflow-checking operation. + SDValue Sum = DAG.getNode(Node->getOpcode() == ISD::SADDO ? + ISD::ADD : ISD::SUB, dl, LHS.getValueType(), + LHS, RHS); + SplitInteger(Sum, Lo, Hi); + + // Compute the overflow. + // + // LHSSign -> LHS >= 0 + // RHSSign -> RHS >= 0 + // SumSign -> Sum >= 0 + // + // Add: + // Overflow -> (LHSSign == RHSSign) && (LHSSign != SumSign) + // Sub: + // Overflow -> (LHSSign != RHSSign) && (LHSSign != SumSign) + // + EVT OType = Node->getValueType(1); + SDValue Zero = DAG.getConstant(0, dl, LHS.getValueType()); + + SDValue LHSSign = DAG.getSetCC(dl, OType, LHS, Zero, ISD::SETGE); + SDValue RHSSign = DAG.getSetCC(dl, OType, RHS, Zero, ISD::SETGE); + SDValue SignsMatch = DAG.getSetCC(dl, OType, LHSSign, RHSSign, + Node->getOpcode() == ISD::SADDO ? + ISD::SETEQ : ISD::SETNE); + + SDValue SumSign = DAG.getSetCC(dl, OType, Sum, Zero, ISD::SETGE); + SDValue SumSignNE = DAG.getSetCC(dl, OType, LHSSign, SumSign, ISD::SETNE); + + Ovf = DAG.getNode(ISD::AND, dl, OType, SignsMatch, SumSignNE); + } // Use the calculated overflow everywhere. - ReplaceValueWith(SDValue(Node, 1), Cmp); + ReplaceValueWith(SDValue(Node, 1), Ovf); } void DAGTypeLegalizer::ExpandIntRes_SDIV(SDNode *N, diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h @@ -337,6 +337,7 @@ SDValue PromoteIntRes_TRUNCATE(SDNode *N); SDValue PromoteIntRes_UADDSUBO(SDNode *N, unsigned ResNo); SDValue PromoteIntRes_ADDSUBCARRY(SDNode *N, unsigned ResNo); + SDValue PromoteIntRes_SADDSUBO_CARRY(SDNode *N, unsigned ResNo); SDValue PromoteIntRes_UNDEF(SDNode *N); SDValue PromoteIntRes_VAARG(SDNode *N); SDValue PromoteIntRes_VSCALE(SDNode *N); @@ -429,6 +430,7 @@ void ExpandIntRes_ADDSUBC (SDNode *N, SDValue &Lo, SDValue &Hi); void ExpandIntRes_ADDSUBE (SDNode *N, SDValue &Lo, SDValue &Hi); void ExpandIntRes_ADDSUBCARRY (SDNode *N, SDValue &Lo, SDValue &Hi); + void ExpandIntRes_SADDSUBO_CARRY (SDNode *N, SDValue &Lo, SDValue &Hi); void ExpandIntRes_BITREVERSE (SDNode *N, SDValue &Lo, SDValue &Hi); void ExpandIntRes_BSWAP (SDNode *N, SDValue &Lo, SDValue &Hi); void ExpandIntRes_PARITY (SDNode *N, SDValue &Lo, SDValue &Hi); diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp @@ -293,6 +293,7 @@ case ISD::ADDC: return "addc"; case ISD::ADDE: return "adde"; case ISD::ADDCARRY: return "addcarry"; + case ISD::SADDO_CARRY: return "saddo_carry"; case ISD::SADDO: return "saddo"; case ISD::UADDO: return "uaddo"; case ISD::SSUBO: return "ssubo"; @@ -302,6 +303,7 @@ case ISD::SUBC: return "subc"; case ISD::SUBE: return "sube"; case ISD::SUBCARRY: return "subcarry"; + case ISD::SSUBO_CARRY: return "ssubo_carry"; case ISD::SHL_PARTS: return "shl_parts"; case ISD::SRA_PARTS: return "sra_parts"; case ISD::SRL_PARTS: return "srl_parts"; diff --git a/llvm/lib/CodeGen/TargetLoweringBase.cpp b/llvm/lib/CodeGen/TargetLoweringBase.cpp --- a/llvm/lib/CodeGen/TargetLoweringBase.cpp +++ b/llvm/lib/CodeGen/TargetLoweringBase.cpp @@ -680,6 +680,8 @@ setOperationAction(ISD::ADDCARRY, VT, Expand); setOperationAction(ISD::SUBCARRY, VT, Expand); setOperationAction(ISD::SETCCCARRY, VT, Expand); + setOperationAction(ISD::SADDO_CARRY, VT, Expand); + setOperationAction(ISD::SSUBO_CARRY, VT, Expand); // ADDC/ADDE/SUBC/SUBE default to expand. setOperationAction(ISD::ADDC, VT, Expand); diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -1914,6 +1914,8 @@ setOperationAction(ISD::ADDCARRY, VT, Custom); setOperationAction(ISD::SUBCARRY, VT, Custom); setOperationAction(ISD::SETCCCARRY, VT, Custom); + setOperationAction(ISD::SADDO_CARRY, VT, Custom); + setOperationAction(ISD::SSUBO_CARRY, VT, Custom); } if (!Subtarget.is64Bit()) { @@ -29241,6 +29243,7 @@ static SDValue LowerADDSUBCARRY(SDValue Op, SelectionDAG &DAG) { SDNode *N = Op.getNode(); MVT VT = N->getSimpleValueType(0); + unsigned Opc = Op.getOpcode(); // Let legalize expand this if it isn't a legal type yet. if (!DAG.getTargetLoweringInfo().isTypeLegal(VT)) @@ -29255,11 +29258,14 @@ Carry = DAG.getNode(X86ISD::ADD, DL, DAG.getVTList(CarryVT, MVT::i32), Carry, DAG.getAllOnesConstant(DL, CarryVT)); - unsigned Opc = Op.getOpcode() == ISD::ADDCARRY ? X86ISD::ADC : X86ISD::SBB; - SDValue Sum = DAG.getNode(Opc, DL, VTs, Op.getOperand(0), - Op.getOperand(1), Carry.getValue(1)); + bool IsAdd = Opc == ISD::ADDCARRY || Opc == ISD::SADDO_CARRY; + SDValue Sum = DAG.getNode(IsAdd ? X86ISD::ADC : X86ISD::SBB, DL, VTs, + Op.getOperand(0), Op.getOperand(1), + Carry.getValue(1)); - SDValue SetCC = getSETCC(X86::COND_B, Sum.getValue(1), DL, DAG); + bool IsSigned = Opc == ISD::SADDO_CARRY || Opc == ISD::SSUBO_CARRY; + SDValue SetCC = getSETCC(IsSigned ? X86::COND_O : X86::COND_B, + Sum.getValue(1), DL, DAG); if (N->getValueType(1) == MVT::i1) SetCC = DAG.getNode(ISD::TRUNCATE, DL, MVT::i1, SetCC); @@ -29784,6 +29790,8 @@ case ISD::UMULO: return LowerXALUO(Op, DAG); case ISD::READCYCLECOUNTER: return LowerREADCYCLECOUNTER(Op, Subtarget,DAG); case ISD::BITCAST: return LowerBITCAST(Op, Subtarget, DAG); + case ISD::SADDO_CARRY: + case ISD::SSUBO_CARRY: case ISD::ADDCARRY: case ISD::SUBCARRY: return LowerADDSUBCARRY(Op, DAG); case ISD::ADD: diff --git a/llvm/test/CodeGen/X86/known-bits.ll b/llvm/test/CodeGen/X86/known-bits.ll --- a/llvm/test/CodeGen/X86/known-bits.ll +++ b/llvm/test/CodeGen/X86/known-bits.ll @@ -139,26 +139,12 @@ define {i32, i1} @knownbits_uaddo_saddo(i64 %a0, i64 %a1) nounwind { ; X32-LABEL: knownbits_uaddo_saddo: ; X32: # %bb.0: -; X32-NEXT: pushl %ebx ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X32-NEXT: movl %ecx, %edx -; X32-NEXT: addl %eax, %edx -; X32-NEXT: setb %bl -; X32-NEXT: testl %eax, %eax -; X32-NEXT: setns %al -; X32-NEXT: testl %ecx, %ecx -; X32-NEXT: setns %cl -; X32-NEXT: cmpb %al, %cl -; X32-NEXT: sete %al -; X32-NEXT: testl %edx, %edx -; X32-NEXT: setns %dl -; X32-NEXT: cmpb %dl, %cl -; X32-NEXT: setne %dl -; X32-NEXT: andb %al, %dl -; X32-NEXT: orb %bl, %dl +; X32-NEXT: addl {{[0-9]+}}(%esp), %eax +; X32-NEXT: setb %al +; X32-NEXT: seto %dl +; X32-NEXT: orb %al, %dl ; X32-NEXT: xorl %eax, %eax -; X32-NEXT: popl %ebx ; X32-NEXT: retl ; ; X64-LABEL: knownbits_uaddo_saddo: @@ -191,20 +177,10 @@ ; X32-LABEL: knownbits_usubo_ssubo: ; X32: # %bb.0: ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X32-NEXT: cmpl %eax, %ecx -; X32-NEXT: setb %dh -; X32-NEXT: setns %dl -; X32-NEXT: testl %ecx, %ecx -; X32-NEXT: setns %cl -; X32-NEXT: cmpb %dl, %cl -; X32-NEXT: setne %ch -; X32-NEXT: testl %eax, %eax -; X32-NEXT: setns %al -; X32-NEXT: cmpb %al, %cl -; X32-NEXT: setne %dl -; X32-NEXT: andb %ch, %dl -; X32-NEXT: orb %dh, %dl +; X32-NEXT: cmpl {{[0-9]+}}(%esp), %eax +; X32-NEXT: setb %al +; X32-NEXT: seto %dl +; X32-NEXT: orb %al, %dl ; X32-NEXT: xorl %eax, %eax ; X32-NEXT: retl ; diff --git a/llvm/test/CodeGen/X86/sadd_sat.ll b/llvm/test/CodeGen/X86/sadd_sat.ll --- a/llvm/test/CodeGen/X86/sadd_sat.ll +++ b/llvm/test/CodeGen/X86/sadd_sat.ll @@ -42,38 +42,25 @@ define i64 @func2(i64 %x, i64 %y) nounwind { ; X86-LABEL: func2: ; X86: # %bb.0: -; X86-NEXT: pushl %ebp ; X86-NEXT: pushl %ebx -; X86-NEXT: pushl %edi ; X86-NEXT: pushl %esi +; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx ; X86-NEXT: movl {{[0-9]+}}(%esp), %esi -; X86-NEXT: movl {{[0-9]+}}(%esp), %edi -; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx -; X86-NEXT: addl {{[0-9]+}}(%esp), %edi -; X86-NEXT: movl %ebx, %ebp -; X86-NEXT: adcl %esi, %ebp -; X86-NEXT: movl %ebp, %eax +; X86-NEXT: addl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: adcl {{[0-9]+}}(%esp), %esi +; X86-NEXT: seto %bl +; X86-NEXT: movl %esi, %eax ; X86-NEXT: sarl $31, %eax -; X86-NEXT: xorl %ecx, %ecx -; X86-NEXT: testl %ebp, %ebp -; X86-NEXT: setns %cl -; X86-NEXT: movl %ecx, %edx -; X86-NEXT: addl $2147483647, %edx # imm = 0x7FFFFFFF -; X86-NEXT: testl %ebx, %ebx -; X86-NEXT: setns %bl -; X86-NEXT: cmpb %cl, %bl -; X86-NEXT: setne %cl +; X86-NEXT: testb %bl, %bl +; X86-NEXT: cmovel %ecx, %eax +; X86-NEXT: xorl %edx, %edx ; X86-NEXT: testl %esi, %esi -; X86-NEXT: setns %ch -; X86-NEXT: cmpb %ch, %bl -; X86-NEXT: sete %ch -; X86-NEXT: testb %cl, %ch -; X86-NEXT: cmovel %ebp, %edx -; X86-NEXT: cmovel %edi, %eax +; X86-NEXT: setns %dl +; X86-NEXT: addl $2147483647, %edx # imm = 0x7FFFFFFF +; X86-NEXT: testb %bl, %bl +; X86-NEXT: cmovel %esi, %edx ; X86-NEXT: popl %esi -; X86-NEXT: popl %edi ; X86-NEXT: popl %ebx -; X86-NEXT: popl %ebp ; X86-NEXT: retl ; ; X64-LABEL: func2: diff --git a/llvm/test/CodeGen/X86/sadd_sat_plus.ll b/llvm/test/CodeGen/X86/sadd_sat_plus.ll --- a/llvm/test/CodeGen/X86/sadd_sat_plus.ll +++ b/llvm/test/CodeGen/X86/sadd_sat_plus.ll @@ -44,38 +44,25 @@ define i64 @func64(i64 %x, i64 %y, i64 %z) nounwind { ; X86-LABEL: func64: ; X86: # %bb.0: -; X86-NEXT: pushl %ebp ; X86-NEXT: pushl %ebx -; X86-NEXT: pushl %edi ; X86-NEXT: pushl %esi +; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx ; X86-NEXT: movl {{[0-9]+}}(%esp), %esi -; X86-NEXT: movl {{[0-9]+}}(%esp), %edi -; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx -; X86-NEXT: addl {{[0-9]+}}(%esp), %edi -; X86-NEXT: movl %ebx, %ebp -; X86-NEXT: adcl %esi, %ebp -; X86-NEXT: movl %ebp, %eax +; X86-NEXT: addl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: adcl {{[0-9]+}}(%esp), %esi +; X86-NEXT: seto %bl +; X86-NEXT: movl %esi, %eax ; X86-NEXT: sarl $31, %eax -; X86-NEXT: xorl %ecx, %ecx -; X86-NEXT: testl %ebp, %ebp -; X86-NEXT: setns %cl -; X86-NEXT: movl %ecx, %edx -; X86-NEXT: addl $2147483647, %edx # imm = 0x7FFFFFFF -; X86-NEXT: testl %ebx, %ebx -; X86-NEXT: setns %bl -; X86-NEXT: cmpb %cl, %bl -; X86-NEXT: setne %cl +; X86-NEXT: testb %bl, %bl +; X86-NEXT: cmovel %ecx, %eax +; X86-NEXT: xorl %edx, %edx ; X86-NEXT: testl %esi, %esi -; X86-NEXT: setns %ch -; X86-NEXT: cmpb %ch, %bl -; X86-NEXT: sete %ch -; X86-NEXT: testb %cl, %ch -; X86-NEXT: cmovel %ebp, %edx -; X86-NEXT: cmovel %edi, %eax +; X86-NEXT: setns %dl +; X86-NEXT: addl $2147483647, %edx # imm = 0x7FFFFFFF +; X86-NEXT: testb %bl, %bl +; X86-NEXT: cmovel %esi, %edx ; X86-NEXT: popl %esi -; X86-NEXT: popl %edi ; X86-NEXT: popl %ebx -; X86-NEXT: popl %ebp ; X86-NEXT: retl ; ; X64-LABEL: func64: diff --git a/llvm/test/CodeGen/X86/sadd_sat_vec.ll b/llvm/test/CodeGen/X86/sadd_sat_vec.ll --- a/llvm/test/CodeGen/X86/sadd_sat_vec.ll +++ b/llvm/test/CodeGen/X86/sadd_sat_vec.ll @@ -1940,124 +1940,78 @@ define <2 x i128> @v2i128(<2 x i128> %x, <2 x i128> %y) nounwind { ; SSE-LABEL: v2i128: ; SSE: # %bb.0: -; SSE-NEXT: pushq %r15 -; SSE-NEXT: pushq %r14 -; SSE-NEXT: pushq %r13 -; SSE-NEXT: pushq %r12 ; SSE-NEXT: pushq %rbx ; SSE-NEXT: movq %rdi, %rax -; SSE-NEXT: movq {{[0-9]+}}(%rsp), %r11 -; SSE-NEXT: movq {{[0-9]+}}(%rsp), %r14 ; SSE-NEXT: addq {{[0-9]+}}(%rsp), %rcx -; SSE-NEXT: movq %r8, %r13 -; SSE-NEXT: adcq %r14, %r13 -; SSE-NEXT: movq %r13, %r10 -; SSE-NEXT: sarq $63, %r10 -; SSE-NEXT: xorl %edi, %edi -; SSE-NEXT: testq %r13, %r13 -; SSE-NEXT: setns %dil -; SSE-NEXT: movabsq $9223372036854775807, %r12 # imm = 0x7FFFFFFFFFFFFFFF -; SSE-NEXT: leaq (%rdi,%r12), %r15 +; SSE-NEXT: adcq {{[0-9]+}}(%rsp), %r8 +; SSE-NEXT: seto %r10b +; SSE-NEXT: movq %r8, %rbx +; SSE-NEXT: sarq $63, %rbx +; SSE-NEXT: testb %r10b, %r10b +; SSE-NEXT: cmoveq %rcx, %rbx +; SSE-NEXT: xorl %ecx, %ecx ; SSE-NEXT: testq %r8, %r8 -; SSE-NEXT: setns %r8b -; SSE-NEXT: cmpb %dil, %r8b -; SSE-NEXT: setne %dil -; SSE-NEXT: testq %r14, %r14 -; SSE-NEXT: setns %bl -; SSE-NEXT: cmpb %bl, %r8b -; SSE-NEXT: sete %bl -; SSE-NEXT: testb %dil, %bl -; SSE-NEXT: cmoveq %r13, %r15 -; SSE-NEXT: cmoveq %rcx, %r10 +; SSE-NEXT: setns %cl +; SSE-NEXT: movabsq $9223372036854775807, %r11 # imm = 0x7FFFFFFFFFFFFFFF +; SSE-NEXT: addq %r11, %rcx +; SSE-NEXT: testb %r10b, %r10b +; SSE-NEXT: cmoveq %r8, %rcx ; SSE-NEXT: addq %r9, %rsi +; SSE-NEXT: adcq {{[0-9]+}}(%rsp), %rdx +; SSE-NEXT: seto %r8b ; SSE-NEXT: movq %rdx, %rdi -; SSE-NEXT: adcq %r11, %rdi -; SSE-NEXT: setns %bl -; SSE-NEXT: movzbl %bl, %ebx -; SSE-NEXT: addq %rbx, %r12 -; SSE-NEXT: movq %rdi, %rcx -; SSE-NEXT: sarq $63, %rcx -; SSE-NEXT: testq %r11, %r11 -; SSE-NEXT: setns %r8b +; SSE-NEXT: sarq $63, %rdi +; SSE-NEXT: testb %r8b, %r8b +; SSE-NEXT: cmoveq %rsi, %rdi +; SSE-NEXT: xorl %esi, %esi ; SSE-NEXT: testq %rdx, %rdx -; SSE-NEXT: setns %dl -; SSE-NEXT: cmpb %r8b, %dl -; SSE-NEXT: sete %r8b -; SSE-NEXT: cmpb %bl, %dl -; SSE-NEXT: setne %dl -; SSE-NEXT: testb %dl, %r8b -; SSE-NEXT: cmoveq %rsi, %rcx -; SSE-NEXT: cmoveq %rdi, %r12 -; SSE-NEXT: movq %r15, 24(%rax) -; SSE-NEXT: movq %r10, 16(%rax) -; SSE-NEXT: movq %r12, 8(%rax) -; SSE-NEXT: movq %rcx, (%rax) +; SSE-NEXT: setns %sil +; SSE-NEXT: addq %r11, %rsi +; SSE-NEXT: testb %r8b, %r8b +; SSE-NEXT: cmoveq %rdx, %rsi +; SSE-NEXT: movq %rbx, 16(%rax) +; SSE-NEXT: movq %rdi, (%rax) +; SSE-NEXT: movq %rcx, 24(%rax) +; SSE-NEXT: movq %rsi, 8(%rax) ; SSE-NEXT: popq %rbx -; SSE-NEXT: popq %r12 -; SSE-NEXT: popq %r13 -; SSE-NEXT: popq %r14 -; SSE-NEXT: popq %r15 ; SSE-NEXT: retq ; ; AVX-LABEL: v2i128: ; AVX: # %bb.0: -; AVX-NEXT: pushq %r15 -; AVX-NEXT: pushq %r14 -; AVX-NEXT: pushq %r13 -; AVX-NEXT: pushq %r12 ; AVX-NEXT: pushq %rbx ; AVX-NEXT: movq %rdi, %rax -; AVX-NEXT: movq {{[0-9]+}}(%rsp), %r11 -; AVX-NEXT: movq {{[0-9]+}}(%rsp), %r14 ; AVX-NEXT: addq {{[0-9]+}}(%rsp), %rcx -; AVX-NEXT: movq %r8, %r13 -; AVX-NEXT: adcq %r14, %r13 -; AVX-NEXT: movq %r13, %r10 -; AVX-NEXT: sarq $63, %r10 -; AVX-NEXT: xorl %edi, %edi -; AVX-NEXT: testq %r13, %r13 -; AVX-NEXT: setns %dil -; AVX-NEXT: movabsq $9223372036854775807, %r12 # imm = 0x7FFFFFFFFFFFFFFF -; AVX-NEXT: leaq (%rdi,%r12), %r15 +; AVX-NEXT: adcq {{[0-9]+}}(%rsp), %r8 +; AVX-NEXT: seto %r10b +; AVX-NEXT: movq %r8, %rbx +; AVX-NEXT: sarq $63, %rbx +; AVX-NEXT: testb %r10b, %r10b +; AVX-NEXT: cmoveq %rcx, %rbx +; AVX-NEXT: xorl %ecx, %ecx ; AVX-NEXT: testq %r8, %r8 -; AVX-NEXT: setns %r8b -; AVX-NEXT: cmpb %dil, %r8b -; AVX-NEXT: setne %dil -; AVX-NEXT: testq %r14, %r14 -; AVX-NEXT: setns %bl -; AVX-NEXT: cmpb %bl, %r8b -; AVX-NEXT: sete %bl -; AVX-NEXT: testb %dil, %bl -; AVX-NEXT: cmoveq %r13, %r15 -; AVX-NEXT: cmoveq %rcx, %r10 +; AVX-NEXT: setns %cl +; AVX-NEXT: movabsq $9223372036854775807, %r11 # imm = 0x7FFFFFFFFFFFFFFF +; AVX-NEXT: addq %r11, %rcx +; AVX-NEXT: testb %r10b, %r10b +; AVX-NEXT: cmoveq %r8, %rcx ; AVX-NEXT: addq %r9, %rsi +; AVX-NEXT: adcq {{[0-9]+}}(%rsp), %rdx +; AVX-NEXT: seto %r8b ; AVX-NEXT: movq %rdx, %rdi -; AVX-NEXT: adcq %r11, %rdi -; AVX-NEXT: setns %bl -; AVX-NEXT: movzbl %bl, %ebx -; AVX-NEXT: addq %rbx, %r12 -; AVX-NEXT: movq %rdi, %rcx -; AVX-NEXT: sarq $63, %rcx -; AVX-NEXT: testq %r11, %r11 -; AVX-NEXT: setns %r8b +; AVX-NEXT: sarq $63, %rdi +; AVX-NEXT: testb %r8b, %r8b +; AVX-NEXT: cmoveq %rsi, %rdi +; AVX-NEXT: xorl %esi, %esi ; AVX-NEXT: testq %rdx, %rdx -; AVX-NEXT: setns %dl -; AVX-NEXT: cmpb %r8b, %dl -; AVX-NEXT: sete %r8b -; AVX-NEXT: cmpb %bl, %dl -; AVX-NEXT: setne %dl -; AVX-NEXT: testb %dl, %r8b -; AVX-NEXT: cmoveq %rsi, %rcx -; AVX-NEXT: cmoveq %rdi, %r12 -; AVX-NEXT: movq %r15, 24(%rax) -; AVX-NEXT: movq %r10, 16(%rax) -; AVX-NEXT: movq %r12, 8(%rax) -; AVX-NEXT: movq %rcx, (%rax) +; AVX-NEXT: setns %sil +; AVX-NEXT: addq %r11, %rsi +; AVX-NEXT: testb %r8b, %r8b +; AVX-NEXT: cmoveq %rdx, %rsi +; AVX-NEXT: movq %rbx, 16(%rax) +; AVX-NEXT: movq %rdi, (%rax) +; AVX-NEXT: movq %rcx, 24(%rax) +; AVX-NEXT: movq %rsi, 8(%rax) ; AVX-NEXT: popq %rbx -; AVX-NEXT: popq %r12 -; AVX-NEXT: popq %r13 -; AVX-NEXT: popq %r14 -; AVX-NEXT: popq %r15 ; AVX-NEXT: retq %z = call <2 x i128> @llvm.sadd.sat.v2i128(<2 x i128> %x, <2 x i128> %y) ret <2 x i128> %z diff --git a/llvm/test/CodeGen/X86/ssub_sat.ll b/llvm/test/CodeGen/X86/ssub_sat.ll --- a/llvm/test/CodeGen/X86/ssub_sat.ll +++ b/llvm/test/CodeGen/X86/ssub_sat.ll @@ -38,38 +38,25 @@ define i64 @func2(i64 %x, i64 %y) nounwind { ; X86-LABEL: func2: ; X86: # %bb.0: -; X86-NEXT: pushl %ebp ; X86-NEXT: pushl %ebx -; X86-NEXT: pushl %edi ; X86-NEXT: pushl %esi +; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx ; X86-NEXT: movl {{[0-9]+}}(%esp), %esi -; X86-NEXT: movl {{[0-9]+}}(%esp), %edi -; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx -; X86-NEXT: subl {{[0-9]+}}(%esp), %edi -; X86-NEXT: movl %ebx, %ebp -; X86-NEXT: sbbl %esi, %ebp -; X86-NEXT: movl %ebp, %eax +; X86-NEXT: subl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: sbbl {{[0-9]+}}(%esp), %esi +; X86-NEXT: seto %bl +; X86-NEXT: movl %esi, %eax ; X86-NEXT: sarl $31, %eax -; X86-NEXT: xorl %ecx, %ecx -; X86-NEXT: testl %ebp, %ebp -; X86-NEXT: setns %cl -; X86-NEXT: movl %ecx, %edx -; X86-NEXT: addl $2147483647, %edx # imm = 0x7FFFFFFF -; X86-NEXT: testl %ebx, %ebx -; X86-NEXT: setns %bl -; X86-NEXT: cmpb %cl, %bl -; X86-NEXT: setne %cl +; X86-NEXT: testb %bl, %bl +; X86-NEXT: cmovel %ecx, %eax +; X86-NEXT: xorl %edx, %edx ; X86-NEXT: testl %esi, %esi -; X86-NEXT: setns %ch -; X86-NEXT: cmpb %ch, %bl -; X86-NEXT: setne %ch -; X86-NEXT: testb %cl, %ch -; X86-NEXT: cmovel %ebp, %edx -; X86-NEXT: cmovel %edi, %eax +; X86-NEXT: setns %dl +; X86-NEXT: addl $2147483647, %edx # imm = 0x7FFFFFFF +; X86-NEXT: testb %bl, %bl +; X86-NEXT: cmovel %esi, %edx ; X86-NEXT: popl %esi -; X86-NEXT: popl %edi ; X86-NEXT: popl %ebx -; X86-NEXT: popl %ebp ; X86-NEXT: retl ; ; X64-LABEL: func2: diff --git a/llvm/test/CodeGen/X86/ssub_sat_plus.ll b/llvm/test/CodeGen/X86/ssub_sat_plus.ll --- a/llvm/test/CodeGen/X86/ssub_sat_plus.ll +++ b/llvm/test/CodeGen/X86/ssub_sat_plus.ll @@ -40,38 +40,25 @@ define i64 @func64(i64 %x, i64 %y, i64 %z) nounwind { ; X86-LABEL: func64: ; X86: # %bb.0: -; X86-NEXT: pushl %ebp ; X86-NEXT: pushl %ebx -; X86-NEXT: pushl %edi ; X86-NEXT: pushl %esi +; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx ; X86-NEXT: movl {{[0-9]+}}(%esp), %esi -; X86-NEXT: movl {{[0-9]+}}(%esp), %edi -; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx -; X86-NEXT: subl {{[0-9]+}}(%esp), %edi -; X86-NEXT: movl %ebx, %ebp -; X86-NEXT: sbbl %esi, %ebp -; X86-NEXT: movl %ebp, %eax +; X86-NEXT: subl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: sbbl {{[0-9]+}}(%esp), %esi +; X86-NEXT: seto %bl +; X86-NEXT: movl %esi, %eax ; X86-NEXT: sarl $31, %eax -; X86-NEXT: xorl %ecx, %ecx -; X86-NEXT: testl %ebp, %ebp -; X86-NEXT: setns %cl -; X86-NEXT: movl %ecx, %edx -; X86-NEXT: addl $2147483647, %edx # imm = 0x7FFFFFFF -; X86-NEXT: testl %ebx, %ebx -; X86-NEXT: setns %bl -; X86-NEXT: cmpb %cl, %bl -; X86-NEXT: setne %cl +; X86-NEXT: testb %bl, %bl +; X86-NEXT: cmovel %ecx, %eax +; X86-NEXT: xorl %edx, %edx ; X86-NEXT: testl %esi, %esi -; X86-NEXT: setns %ch -; X86-NEXT: cmpb %ch, %bl -; X86-NEXT: setne %ch -; X86-NEXT: testb %cl, %ch -; X86-NEXT: cmovel %ebp, %edx -; X86-NEXT: cmovel %edi, %eax +; X86-NEXT: setns %dl +; X86-NEXT: addl $2147483647, %edx # imm = 0x7FFFFFFF +; X86-NEXT: testb %bl, %bl +; X86-NEXT: cmovel %esi, %edx ; X86-NEXT: popl %esi -; X86-NEXT: popl %edi ; X86-NEXT: popl %ebx -; X86-NEXT: popl %ebp ; X86-NEXT: retl ; ; X64-LABEL: func64: diff --git a/llvm/test/CodeGen/X86/ssub_sat_vec.ll b/llvm/test/CodeGen/X86/ssub_sat_vec.ll --- a/llvm/test/CodeGen/X86/ssub_sat_vec.ll +++ b/llvm/test/CodeGen/X86/ssub_sat_vec.ll @@ -2145,124 +2145,78 @@ define <2 x i128> @v2i128(<2 x i128> %x, <2 x i128> %y) nounwind { ; SSE-LABEL: v2i128: ; SSE: # %bb.0: -; SSE-NEXT: pushq %r15 -; SSE-NEXT: pushq %r14 -; SSE-NEXT: pushq %r13 -; SSE-NEXT: pushq %r12 ; SSE-NEXT: pushq %rbx ; SSE-NEXT: movq %rdi, %rax -; SSE-NEXT: movq {{[0-9]+}}(%rsp), %r11 -; SSE-NEXT: movq {{[0-9]+}}(%rsp), %r14 ; SSE-NEXT: subq {{[0-9]+}}(%rsp), %rcx -; SSE-NEXT: movq %r8, %r13 -; SSE-NEXT: sbbq %r14, %r13 -; SSE-NEXT: movq %r13, %r10 -; SSE-NEXT: sarq $63, %r10 -; SSE-NEXT: xorl %edi, %edi -; SSE-NEXT: testq %r13, %r13 -; SSE-NEXT: setns %dil -; SSE-NEXT: movabsq $9223372036854775807, %r12 # imm = 0x7FFFFFFFFFFFFFFF -; SSE-NEXT: leaq (%rdi,%r12), %r15 +; SSE-NEXT: sbbq {{[0-9]+}}(%rsp), %r8 +; SSE-NEXT: seto %r10b +; SSE-NEXT: movq %r8, %rbx +; SSE-NEXT: sarq $63, %rbx +; SSE-NEXT: testb %r10b, %r10b +; SSE-NEXT: cmoveq %rcx, %rbx +; SSE-NEXT: xorl %ecx, %ecx ; SSE-NEXT: testq %r8, %r8 -; SSE-NEXT: setns %r8b -; SSE-NEXT: cmpb %dil, %r8b -; SSE-NEXT: setne %dil -; SSE-NEXT: testq %r14, %r14 -; SSE-NEXT: setns %bl -; SSE-NEXT: cmpb %bl, %r8b -; SSE-NEXT: setne %bl -; SSE-NEXT: testb %dil, %bl -; SSE-NEXT: cmoveq %r13, %r15 -; SSE-NEXT: cmoveq %rcx, %r10 +; SSE-NEXT: setns %cl +; SSE-NEXT: movabsq $9223372036854775807, %r11 # imm = 0x7FFFFFFFFFFFFFFF +; SSE-NEXT: addq %r11, %rcx +; SSE-NEXT: testb %r10b, %r10b +; SSE-NEXT: cmoveq %r8, %rcx ; SSE-NEXT: subq %r9, %rsi +; SSE-NEXT: sbbq {{[0-9]+}}(%rsp), %rdx +; SSE-NEXT: seto %r8b ; SSE-NEXT: movq %rdx, %rdi -; SSE-NEXT: sbbq %r11, %rdi -; SSE-NEXT: setns %bl -; SSE-NEXT: movzbl %bl, %ebx -; SSE-NEXT: addq %rbx, %r12 -; SSE-NEXT: movq %rdi, %rcx -; SSE-NEXT: sarq $63, %rcx -; SSE-NEXT: testq %r11, %r11 -; SSE-NEXT: setns %r8b +; SSE-NEXT: sarq $63, %rdi +; SSE-NEXT: testb %r8b, %r8b +; SSE-NEXT: cmoveq %rsi, %rdi +; SSE-NEXT: xorl %esi, %esi ; SSE-NEXT: testq %rdx, %rdx -; SSE-NEXT: setns %dl -; SSE-NEXT: cmpb %r8b, %dl -; SSE-NEXT: setne %r8b -; SSE-NEXT: cmpb %bl, %dl -; SSE-NEXT: setne %dl -; SSE-NEXT: testb %dl, %r8b -; SSE-NEXT: cmoveq %rsi, %rcx -; SSE-NEXT: cmoveq %rdi, %r12 -; SSE-NEXT: movq %r15, 24(%rax) -; SSE-NEXT: movq %r10, 16(%rax) -; SSE-NEXT: movq %r12, 8(%rax) -; SSE-NEXT: movq %rcx, (%rax) +; SSE-NEXT: setns %sil +; SSE-NEXT: addq %r11, %rsi +; SSE-NEXT: testb %r8b, %r8b +; SSE-NEXT: cmoveq %rdx, %rsi +; SSE-NEXT: movq %rbx, 16(%rax) +; SSE-NEXT: movq %rdi, (%rax) +; SSE-NEXT: movq %rcx, 24(%rax) +; SSE-NEXT: movq %rsi, 8(%rax) ; SSE-NEXT: popq %rbx -; SSE-NEXT: popq %r12 -; SSE-NEXT: popq %r13 -; SSE-NEXT: popq %r14 -; SSE-NEXT: popq %r15 ; SSE-NEXT: retq ; ; AVX-LABEL: v2i128: ; AVX: # %bb.0: -; AVX-NEXT: pushq %r15 -; AVX-NEXT: pushq %r14 -; AVX-NEXT: pushq %r13 -; AVX-NEXT: pushq %r12 ; AVX-NEXT: pushq %rbx ; AVX-NEXT: movq %rdi, %rax -; AVX-NEXT: movq {{[0-9]+}}(%rsp), %r11 -; AVX-NEXT: movq {{[0-9]+}}(%rsp), %r14 ; AVX-NEXT: subq {{[0-9]+}}(%rsp), %rcx -; AVX-NEXT: movq %r8, %r13 -; AVX-NEXT: sbbq %r14, %r13 -; AVX-NEXT: movq %r13, %r10 -; AVX-NEXT: sarq $63, %r10 -; AVX-NEXT: xorl %edi, %edi -; AVX-NEXT: testq %r13, %r13 -; AVX-NEXT: setns %dil -; AVX-NEXT: movabsq $9223372036854775807, %r12 # imm = 0x7FFFFFFFFFFFFFFF -; AVX-NEXT: leaq (%rdi,%r12), %r15 +; AVX-NEXT: sbbq {{[0-9]+}}(%rsp), %r8 +; AVX-NEXT: seto %r10b +; AVX-NEXT: movq %r8, %rbx +; AVX-NEXT: sarq $63, %rbx +; AVX-NEXT: testb %r10b, %r10b +; AVX-NEXT: cmoveq %rcx, %rbx +; AVX-NEXT: xorl %ecx, %ecx ; AVX-NEXT: testq %r8, %r8 -; AVX-NEXT: setns %r8b -; AVX-NEXT: cmpb %dil, %r8b -; AVX-NEXT: setne %dil -; AVX-NEXT: testq %r14, %r14 -; AVX-NEXT: setns %bl -; AVX-NEXT: cmpb %bl, %r8b -; AVX-NEXT: setne %bl -; AVX-NEXT: testb %dil, %bl -; AVX-NEXT: cmoveq %r13, %r15 -; AVX-NEXT: cmoveq %rcx, %r10 +; AVX-NEXT: setns %cl +; AVX-NEXT: movabsq $9223372036854775807, %r11 # imm = 0x7FFFFFFFFFFFFFFF +; AVX-NEXT: addq %r11, %rcx +; AVX-NEXT: testb %r10b, %r10b +; AVX-NEXT: cmoveq %r8, %rcx ; AVX-NEXT: subq %r9, %rsi +; AVX-NEXT: sbbq {{[0-9]+}}(%rsp), %rdx +; AVX-NEXT: seto %r8b ; AVX-NEXT: movq %rdx, %rdi -; AVX-NEXT: sbbq %r11, %rdi -; AVX-NEXT: setns %bl -; AVX-NEXT: movzbl %bl, %ebx -; AVX-NEXT: addq %rbx, %r12 -; AVX-NEXT: movq %rdi, %rcx -; AVX-NEXT: sarq $63, %rcx -; AVX-NEXT: testq %r11, %r11 -; AVX-NEXT: setns %r8b +; AVX-NEXT: sarq $63, %rdi +; AVX-NEXT: testb %r8b, %r8b +; AVX-NEXT: cmoveq %rsi, %rdi +; AVX-NEXT: xorl %esi, %esi ; AVX-NEXT: testq %rdx, %rdx -; AVX-NEXT: setns %dl -; AVX-NEXT: cmpb %r8b, %dl -; AVX-NEXT: setne %r8b -; AVX-NEXT: cmpb %bl, %dl -; AVX-NEXT: setne %dl -; AVX-NEXT: testb %dl, %r8b -; AVX-NEXT: cmoveq %rsi, %rcx -; AVX-NEXT: cmoveq %rdi, %r12 -; AVX-NEXT: movq %r15, 24(%rax) -; AVX-NEXT: movq %r10, 16(%rax) -; AVX-NEXT: movq %r12, 8(%rax) -; AVX-NEXT: movq %rcx, (%rax) +; AVX-NEXT: setns %sil +; AVX-NEXT: addq %r11, %rsi +; AVX-NEXT: testb %r8b, %r8b +; AVX-NEXT: cmoveq %rdx, %rsi +; AVX-NEXT: movq %rbx, 16(%rax) +; AVX-NEXT: movq %rdi, (%rax) +; AVX-NEXT: movq %rcx, 24(%rax) +; AVX-NEXT: movq %rsi, 8(%rax) ; AVX-NEXT: popq %rbx -; AVX-NEXT: popq %r12 -; AVX-NEXT: popq %r13 -; AVX-NEXT: popq %r14 -; AVX-NEXT: popq %r15 ; AVX-NEXT: retq %z = call <2 x i128> @llvm.ssub.sat.v2i128(<2 x i128> %x, <2 x i128> %y) ret <2 x i128> %z diff --git a/llvm/test/CodeGen/X86/vec_saddo.ll b/llvm/test/CodeGen/X86/vec_saddo.ll --- a/llvm/test/CodeGen/X86/vec_saddo.ll +++ b/llvm/test/CodeGen/X86/vec_saddo.ll @@ -1145,275 +1145,131 @@ define <2 x i32> @saddo_v2i128(<2 x i128> %a0, <2 x i128> %a1, <2 x i128>* %p2) nounwind { ; SSE2-LABEL: saddo_v2i128: ; SSE2: # %bb.0: -; SSE2-NEXT: pushq %rbp -; SSE2-NEXT: pushq %rbx -; SSE2-NEXT: movq {{[0-9]+}}(%rsp), %r11 ; SSE2-NEXT: movq {{[0-9]+}}(%rsp), %r10 -; SSE2-NEXT: testq %r9, %r9 -; SSE2-NEXT: setns %al -; SSE2-NEXT: testq %rsi, %rsi -; SSE2-NEXT: setns %bl -; SSE2-NEXT: cmpb %al, %bl -; SSE2-NEXT: sete %bpl ; SSE2-NEXT: addq %r8, %rdi ; SSE2-NEXT: adcq %r9, %rsi -; SSE2-NEXT: setns %al -; SSE2-NEXT: cmpb %al, %bl -; SSE2-NEXT: setne %al -; SSE2-NEXT: andb %bpl, %al +; SSE2-NEXT: seto %r8b ; SSE2-NEXT: addq {{[0-9]+}}(%rsp), %rdx -; SSE2-NEXT: movq %rcx, %rbp -; SSE2-NEXT: adcq %r10, %rbp -; SSE2-NEXT: setns %bl -; SSE2-NEXT: testq %rcx, %rcx -; SSE2-NEXT: setns %cl -; SSE2-NEXT: cmpb %bl, %cl -; SSE2-NEXT: setne %r8b -; SSE2-NEXT: testq %r10, %r10 -; SSE2-NEXT: setns %bl -; SSE2-NEXT: cmpb %bl, %cl -; SSE2-NEXT: sete %cl -; SSE2-NEXT: andb %r8b, %cl -; SSE2-NEXT: movzbl %cl, %ecx -; SSE2-NEXT: negl %ecx -; SSE2-NEXT: movd %ecx, %xmm1 +; SSE2-NEXT: adcq {{[0-9]+}}(%rsp), %rcx +; SSE2-NEXT: seto %al ; SSE2-NEXT: movzbl %al, %eax ; SSE2-NEXT: negl %eax +; SSE2-NEXT: movd %eax, %xmm1 +; SSE2-NEXT: movzbl %r8b, %eax +; SSE2-NEXT: negl %eax ; SSE2-NEXT: movd %eax, %xmm0 ; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] -; SSE2-NEXT: movq %rdx, 16(%r11) -; SSE2-NEXT: movq %rdi, (%r11) -; SSE2-NEXT: movq %rbp, 24(%r11) -; SSE2-NEXT: movq %rsi, 8(%r11) -; SSE2-NEXT: popq %rbx -; SSE2-NEXT: popq %rbp +; SSE2-NEXT: movq %rdx, 16(%r10) +; SSE2-NEXT: movq %rdi, (%r10) +; SSE2-NEXT: movq %rcx, 24(%r10) +; SSE2-NEXT: movq %rsi, 8(%r10) ; SSE2-NEXT: retq ; ; SSSE3-LABEL: saddo_v2i128: ; SSSE3: # %bb.0: -; SSSE3-NEXT: pushq %rbp -; SSSE3-NEXT: pushq %rbx -; SSSE3-NEXT: movq {{[0-9]+}}(%rsp), %r11 ; SSSE3-NEXT: movq {{[0-9]+}}(%rsp), %r10 -; SSSE3-NEXT: testq %r9, %r9 -; SSSE3-NEXT: setns %al -; SSSE3-NEXT: testq %rsi, %rsi -; SSSE3-NEXT: setns %bl -; SSSE3-NEXT: cmpb %al, %bl -; SSSE3-NEXT: sete %bpl ; SSSE3-NEXT: addq %r8, %rdi ; SSSE3-NEXT: adcq %r9, %rsi -; SSSE3-NEXT: setns %al -; SSSE3-NEXT: cmpb %al, %bl -; SSSE3-NEXT: setne %al -; SSSE3-NEXT: andb %bpl, %al +; SSSE3-NEXT: seto %r8b ; SSSE3-NEXT: addq {{[0-9]+}}(%rsp), %rdx -; SSSE3-NEXT: movq %rcx, %rbp -; SSSE3-NEXT: adcq %r10, %rbp -; SSSE3-NEXT: setns %bl -; SSSE3-NEXT: testq %rcx, %rcx -; SSSE3-NEXT: setns %cl -; SSSE3-NEXT: cmpb %bl, %cl -; SSSE3-NEXT: setne %r8b -; SSSE3-NEXT: testq %r10, %r10 -; SSSE3-NEXT: setns %bl -; SSSE3-NEXT: cmpb %bl, %cl -; SSSE3-NEXT: sete %cl -; SSSE3-NEXT: andb %r8b, %cl -; SSSE3-NEXT: movzbl %cl, %ecx -; SSSE3-NEXT: negl %ecx -; SSSE3-NEXT: movd %ecx, %xmm1 +; SSSE3-NEXT: adcq {{[0-9]+}}(%rsp), %rcx +; SSSE3-NEXT: seto %al ; SSSE3-NEXT: movzbl %al, %eax ; SSSE3-NEXT: negl %eax +; SSSE3-NEXT: movd %eax, %xmm1 +; SSSE3-NEXT: movzbl %r8b, %eax +; SSSE3-NEXT: negl %eax ; SSSE3-NEXT: movd %eax, %xmm0 ; SSSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] -; SSSE3-NEXT: movq %rdx, 16(%r11) -; SSSE3-NEXT: movq %rdi, (%r11) -; SSSE3-NEXT: movq %rbp, 24(%r11) -; SSSE3-NEXT: movq %rsi, 8(%r11) -; SSSE3-NEXT: popq %rbx -; SSSE3-NEXT: popq %rbp +; SSSE3-NEXT: movq %rdx, 16(%r10) +; SSSE3-NEXT: movq %rdi, (%r10) +; SSSE3-NEXT: movq %rcx, 24(%r10) +; SSSE3-NEXT: movq %rsi, 8(%r10) ; SSSE3-NEXT: retq ; ; SSE41-LABEL: saddo_v2i128: ; SSE41: # %bb.0: -; SSE41-NEXT: pushq %rbp -; SSE41-NEXT: pushq %rbx -; SSE41-NEXT: movq {{[0-9]+}}(%rsp), %r11 ; SSE41-NEXT: movq {{[0-9]+}}(%rsp), %r10 -; SSE41-NEXT: testq %r9, %r9 -; SSE41-NEXT: setns %al -; SSE41-NEXT: testq %rsi, %rsi -; SSE41-NEXT: setns %bl -; SSE41-NEXT: cmpb %al, %bl -; SSE41-NEXT: sete %bpl ; SSE41-NEXT: addq %r8, %rdi ; SSE41-NEXT: adcq %r9, %rsi -; SSE41-NEXT: setns %al -; SSE41-NEXT: cmpb %al, %bl -; SSE41-NEXT: setne %al -; SSE41-NEXT: andb %bpl, %al +; SSE41-NEXT: seto %r8b ; SSE41-NEXT: addq {{[0-9]+}}(%rsp), %rdx -; SSE41-NEXT: movq %rcx, %rbp -; SSE41-NEXT: adcq %r10, %rbp -; SSE41-NEXT: setns %bl -; SSE41-NEXT: testq %rcx, %rcx -; SSE41-NEXT: setns %cl -; SSE41-NEXT: cmpb %bl, %cl -; SSE41-NEXT: setne %r8b -; SSE41-NEXT: testq %r10, %r10 -; SSE41-NEXT: setns %bl -; SSE41-NEXT: cmpb %bl, %cl -; SSE41-NEXT: sete %cl -; SSE41-NEXT: andb %r8b, %cl -; SSE41-NEXT: movzbl %cl, %ecx -; SSE41-NEXT: negl %ecx -; SSE41-NEXT: movzbl %al, %eax +; SSE41-NEXT: adcq {{[0-9]+}}(%rsp), %rcx +; SSE41-NEXT: seto %al +; SSE41-NEXT: movzbl %al, %r9d +; SSE41-NEXT: negl %r9d +; SSE41-NEXT: movzbl %r8b, %eax ; SSE41-NEXT: negl %eax ; SSE41-NEXT: movd %eax, %xmm0 -; SSE41-NEXT: pinsrd $1, %ecx, %xmm0 -; SSE41-NEXT: movq %rdx, 16(%r11) -; SSE41-NEXT: movq %rdi, (%r11) -; SSE41-NEXT: movq %rbp, 24(%r11) -; SSE41-NEXT: movq %rsi, 8(%r11) -; SSE41-NEXT: popq %rbx -; SSE41-NEXT: popq %rbp +; SSE41-NEXT: pinsrd $1, %r9d, %xmm0 +; SSE41-NEXT: movq %rdx, 16(%r10) +; SSE41-NEXT: movq %rdi, (%r10) +; SSE41-NEXT: movq %rcx, 24(%r10) +; SSE41-NEXT: movq %rsi, 8(%r10) ; SSE41-NEXT: retq ; ; AVX1-LABEL: saddo_v2i128: ; AVX1: # %bb.0: -; AVX1-NEXT: pushq %rbp -; AVX1-NEXT: pushq %rbx -; AVX1-NEXT: movq {{[0-9]+}}(%rsp), %r11 ; AVX1-NEXT: movq {{[0-9]+}}(%rsp), %r10 -; AVX1-NEXT: testq %r9, %r9 -; AVX1-NEXT: setns %al -; AVX1-NEXT: testq %rsi, %rsi -; AVX1-NEXT: setns %bl -; AVX1-NEXT: cmpb %al, %bl -; AVX1-NEXT: sete %bpl ; AVX1-NEXT: addq %r8, %rdi ; AVX1-NEXT: adcq %r9, %rsi -; AVX1-NEXT: setns %al -; AVX1-NEXT: cmpb %al, %bl -; AVX1-NEXT: setne %al -; AVX1-NEXT: andb %bpl, %al +; AVX1-NEXT: seto %r8b ; AVX1-NEXT: addq {{[0-9]+}}(%rsp), %rdx -; AVX1-NEXT: movq %rcx, %rbp -; AVX1-NEXT: adcq %r10, %rbp -; AVX1-NEXT: setns %bl -; AVX1-NEXT: testq %rcx, %rcx -; AVX1-NEXT: setns %cl -; AVX1-NEXT: cmpb %bl, %cl -; AVX1-NEXT: setne %r8b -; AVX1-NEXT: testq %r10, %r10 -; AVX1-NEXT: setns %bl -; AVX1-NEXT: cmpb %bl, %cl -; AVX1-NEXT: sete %cl -; AVX1-NEXT: andb %r8b, %cl -; AVX1-NEXT: movzbl %cl, %ecx -; AVX1-NEXT: negl %ecx -; AVX1-NEXT: movzbl %al, %eax +; AVX1-NEXT: adcq {{[0-9]+}}(%rsp), %rcx +; AVX1-NEXT: seto %al +; AVX1-NEXT: movzbl %al, %r9d +; AVX1-NEXT: negl %r9d +; AVX1-NEXT: movzbl %r8b, %eax ; AVX1-NEXT: negl %eax ; AVX1-NEXT: vmovd %eax, %xmm0 -; AVX1-NEXT: vpinsrd $1, %ecx, %xmm0, %xmm0 -; AVX1-NEXT: movq %rdx, 16(%r11) -; AVX1-NEXT: movq %rdi, (%r11) -; AVX1-NEXT: movq %rbp, 24(%r11) -; AVX1-NEXT: movq %rsi, 8(%r11) -; AVX1-NEXT: popq %rbx -; AVX1-NEXT: popq %rbp +; AVX1-NEXT: vpinsrd $1, %r9d, %xmm0, %xmm0 +; AVX1-NEXT: movq %rdx, 16(%r10) +; AVX1-NEXT: movq %rdi, (%r10) +; AVX1-NEXT: movq %rcx, 24(%r10) +; AVX1-NEXT: movq %rsi, 8(%r10) ; AVX1-NEXT: retq ; ; AVX2-LABEL: saddo_v2i128: ; AVX2: # %bb.0: -; AVX2-NEXT: pushq %rbp -; AVX2-NEXT: pushq %rbx -; AVX2-NEXT: movq {{[0-9]+}}(%rsp), %r11 ; AVX2-NEXT: movq {{[0-9]+}}(%rsp), %r10 -; AVX2-NEXT: testq %r9, %r9 -; AVX2-NEXT: setns %al -; AVX2-NEXT: testq %rsi, %rsi -; AVX2-NEXT: setns %bl -; AVX2-NEXT: cmpb %al, %bl -; AVX2-NEXT: sete %bpl ; AVX2-NEXT: addq %r8, %rdi ; AVX2-NEXT: adcq %r9, %rsi -; AVX2-NEXT: setns %al -; AVX2-NEXT: cmpb %al, %bl -; AVX2-NEXT: setne %al -; AVX2-NEXT: andb %bpl, %al +; AVX2-NEXT: seto %r8b ; AVX2-NEXT: addq {{[0-9]+}}(%rsp), %rdx -; AVX2-NEXT: movq %rcx, %rbp -; AVX2-NEXT: adcq %r10, %rbp -; AVX2-NEXT: setns %bl -; AVX2-NEXT: testq %rcx, %rcx -; AVX2-NEXT: setns %cl -; AVX2-NEXT: cmpb %bl, %cl -; AVX2-NEXT: setne %r8b -; AVX2-NEXT: testq %r10, %r10 -; AVX2-NEXT: setns %bl -; AVX2-NEXT: cmpb %bl, %cl -; AVX2-NEXT: sete %cl -; AVX2-NEXT: andb %r8b, %cl -; AVX2-NEXT: movzbl %cl, %ecx -; AVX2-NEXT: negl %ecx -; AVX2-NEXT: movzbl %al, %eax +; AVX2-NEXT: adcq {{[0-9]+}}(%rsp), %rcx +; AVX2-NEXT: seto %al +; AVX2-NEXT: movzbl %al, %r9d +; AVX2-NEXT: negl %r9d +; AVX2-NEXT: movzbl %r8b, %eax ; AVX2-NEXT: negl %eax ; AVX2-NEXT: vmovd %eax, %xmm0 -; AVX2-NEXT: vpinsrd $1, %ecx, %xmm0, %xmm0 -; AVX2-NEXT: movq %rdx, 16(%r11) -; AVX2-NEXT: movq %rdi, (%r11) -; AVX2-NEXT: movq %rbp, 24(%r11) -; AVX2-NEXT: movq %rsi, 8(%r11) -; AVX2-NEXT: popq %rbx -; AVX2-NEXT: popq %rbp +; AVX2-NEXT: vpinsrd $1, %r9d, %xmm0, %xmm0 +; AVX2-NEXT: movq %rdx, 16(%r10) +; AVX2-NEXT: movq %rdi, (%r10) +; AVX2-NEXT: movq %rcx, 24(%r10) +; AVX2-NEXT: movq %rsi, 8(%r10) ; AVX2-NEXT: retq ; ; AVX512-LABEL: saddo_v2i128: ; AVX512: # %bb.0: -; AVX512-NEXT: pushq %r14 -; AVX512-NEXT: pushq %rbx ; AVX512-NEXT: movq {{[0-9]+}}(%rsp), %r10 -; AVX512-NEXT: movq {{[0-9]+}}(%rsp), %r11 ; AVX512-NEXT: addq {{[0-9]+}}(%rsp), %rdx -; AVX512-NEXT: movq %rcx, %r14 -; AVX512-NEXT: adcq %r11, %r14 -; AVX512-NEXT: setns %bl -; AVX512-NEXT: testq %rcx, %rcx -; AVX512-NEXT: setns %cl -; AVX512-NEXT: cmpb %bl, %cl -; AVX512-NEXT: setne %bl -; AVX512-NEXT: testq %r11, %r11 -; AVX512-NEXT: setns %al -; AVX512-NEXT: cmpb %al, %cl -; AVX512-NEXT: sete %al -; AVX512-NEXT: andb %bl, %al +; AVX512-NEXT: adcq {{[0-9]+}}(%rsp), %rcx +; AVX512-NEXT: seto %al ; AVX512-NEXT: kmovd %eax, %k0 -; AVX512-NEXT: testq %r9, %r9 -; AVX512-NEXT: setns %al -; AVX512-NEXT: testq %rsi, %rsi -; AVX512-NEXT: setns %cl -; AVX512-NEXT: cmpb %al, %cl -; AVX512-NEXT: sete %al ; AVX512-NEXT: addq %r8, %rdi ; AVX512-NEXT: adcq %r9, %rsi -; AVX512-NEXT: setns %bl -; AVX512-NEXT: cmpb %bl, %cl -; AVX512-NEXT: setne %cl -; AVX512-NEXT: andb %al, %cl -; AVX512-NEXT: andl $1, %ecx -; AVX512-NEXT: kmovw %ecx, %k1 +; AVX512-NEXT: seto %al +; AVX512-NEXT: andl $1, %eax +; AVX512-NEXT: kmovw %eax, %k1 ; AVX512-NEXT: kshiftlw $1, %k0, %k0 ; AVX512-NEXT: korw %k0, %k1, %k1 ; AVX512-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 ; AVX512-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z} ; AVX512-NEXT: movq %rdx, 16(%r10) ; AVX512-NEXT: movq %rdi, (%r10) -; AVX512-NEXT: movq %r14, 24(%r10) +; AVX512-NEXT: movq %rcx, 24(%r10) ; AVX512-NEXT: movq %rsi, 8(%r10) -; AVX512-NEXT: popq %rbx -; AVX512-NEXT: popq %r14 ; AVX512-NEXT: retq %t = call {<2 x i128>, <2 x i1>} @llvm.sadd.with.overflow.v2i128(<2 x i128> %a0, <2 x i128> %a1) %val = extractvalue {<2 x i128>, <2 x i1>} %t, 0 diff --git a/llvm/test/CodeGen/X86/vec_ssubo.ll b/llvm/test/CodeGen/X86/vec_ssubo.ll --- a/llvm/test/CodeGen/X86/vec_ssubo.ll +++ b/llvm/test/CodeGen/X86/vec_ssubo.ll @@ -1154,275 +1154,131 @@ define <2 x i32> @ssubo_v2i128(<2 x i128> %a0, <2 x i128> %a1, <2 x i128>* %p2) nounwind { ; SSE2-LABEL: ssubo_v2i128: ; SSE2: # %bb.0: -; SSE2-NEXT: pushq %rbp -; SSE2-NEXT: pushq %rbx -; SSE2-NEXT: movq {{[0-9]+}}(%rsp), %r11 ; SSE2-NEXT: movq {{[0-9]+}}(%rsp), %r10 -; SSE2-NEXT: testq %r9, %r9 -; SSE2-NEXT: setns %al -; SSE2-NEXT: testq %rsi, %rsi -; SSE2-NEXT: setns %bl -; SSE2-NEXT: cmpb %al, %bl -; SSE2-NEXT: setne %bpl ; SSE2-NEXT: subq %r8, %rdi ; SSE2-NEXT: sbbq %r9, %rsi -; SSE2-NEXT: setns %al -; SSE2-NEXT: cmpb %al, %bl -; SSE2-NEXT: setne %al -; SSE2-NEXT: andb %bpl, %al +; SSE2-NEXT: seto %r8b ; SSE2-NEXT: subq {{[0-9]+}}(%rsp), %rdx -; SSE2-NEXT: movq %rcx, %rbp -; SSE2-NEXT: sbbq %r10, %rbp -; SSE2-NEXT: setns %bl -; SSE2-NEXT: testq %rcx, %rcx -; SSE2-NEXT: setns %cl -; SSE2-NEXT: cmpb %bl, %cl -; SSE2-NEXT: setne %r8b -; SSE2-NEXT: testq %r10, %r10 -; SSE2-NEXT: setns %bl -; SSE2-NEXT: cmpb %bl, %cl -; SSE2-NEXT: setne %cl -; SSE2-NEXT: andb %r8b, %cl -; SSE2-NEXT: movzbl %cl, %ecx -; SSE2-NEXT: negl %ecx -; SSE2-NEXT: movd %ecx, %xmm1 +; SSE2-NEXT: sbbq {{[0-9]+}}(%rsp), %rcx +; SSE2-NEXT: seto %al ; SSE2-NEXT: movzbl %al, %eax ; SSE2-NEXT: negl %eax +; SSE2-NEXT: movd %eax, %xmm1 +; SSE2-NEXT: movzbl %r8b, %eax +; SSE2-NEXT: negl %eax ; SSE2-NEXT: movd %eax, %xmm0 ; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] -; SSE2-NEXT: movq %rdx, 16(%r11) -; SSE2-NEXT: movq %rdi, (%r11) -; SSE2-NEXT: movq %rbp, 24(%r11) -; SSE2-NEXT: movq %rsi, 8(%r11) -; SSE2-NEXT: popq %rbx -; SSE2-NEXT: popq %rbp +; SSE2-NEXT: movq %rdx, 16(%r10) +; SSE2-NEXT: movq %rdi, (%r10) +; SSE2-NEXT: movq %rcx, 24(%r10) +; SSE2-NEXT: movq %rsi, 8(%r10) ; SSE2-NEXT: retq ; ; SSSE3-LABEL: ssubo_v2i128: ; SSSE3: # %bb.0: -; SSSE3-NEXT: pushq %rbp -; SSSE3-NEXT: pushq %rbx -; SSSE3-NEXT: movq {{[0-9]+}}(%rsp), %r11 ; SSSE3-NEXT: movq {{[0-9]+}}(%rsp), %r10 -; SSSE3-NEXT: testq %r9, %r9 -; SSSE3-NEXT: setns %al -; SSSE3-NEXT: testq %rsi, %rsi -; SSSE3-NEXT: setns %bl -; SSSE3-NEXT: cmpb %al, %bl -; SSSE3-NEXT: setne %bpl ; SSSE3-NEXT: subq %r8, %rdi ; SSSE3-NEXT: sbbq %r9, %rsi -; SSSE3-NEXT: setns %al -; SSSE3-NEXT: cmpb %al, %bl -; SSSE3-NEXT: setne %al -; SSSE3-NEXT: andb %bpl, %al +; SSSE3-NEXT: seto %r8b ; SSSE3-NEXT: subq {{[0-9]+}}(%rsp), %rdx -; SSSE3-NEXT: movq %rcx, %rbp -; SSSE3-NEXT: sbbq %r10, %rbp -; SSSE3-NEXT: setns %bl -; SSSE3-NEXT: testq %rcx, %rcx -; SSSE3-NEXT: setns %cl -; SSSE3-NEXT: cmpb %bl, %cl -; SSSE3-NEXT: setne %r8b -; SSSE3-NEXT: testq %r10, %r10 -; SSSE3-NEXT: setns %bl -; SSSE3-NEXT: cmpb %bl, %cl -; SSSE3-NEXT: setne %cl -; SSSE3-NEXT: andb %r8b, %cl -; SSSE3-NEXT: movzbl %cl, %ecx -; SSSE3-NEXT: negl %ecx -; SSSE3-NEXT: movd %ecx, %xmm1 +; SSSE3-NEXT: sbbq {{[0-9]+}}(%rsp), %rcx +; SSSE3-NEXT: seto %al ; SSSE3-NEXT: movzbl %al, %eax ; SSSE3-NEXT: negl %eax +; SSSE3-NEXT: movd %eax, %xmm1 +; SSSE3-NEXT: movzbl %r8b, %eax +; SSSE3-NEXT: negl %eax ; SSSE3-NEXT: movd %eax, %xmm0 ; SSSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] -; SSSE3-NEXT: movq %rdx, 16(%r11) -; SSSE3-NEXT: movq %rdi, (%r11) -; SSSE3-NEXT: movq %rbp, 24(%r11) -; SSSE3-NEXT: movq %rsi, 8(%r11) -; SSSE3-NEXT: popq %rbx -; SSSE3-NEXT: popq %rbp +; SSSE3-NEXT: movq %rdx, 16(%r10) +; SSSE3-NEXT: movq %rdi, (%r10) +; SSSE3-NEXT: movq %rcx, 24(%r10) +; SSSE3-NEXT: movq %rsi, 8(%r10) ; SSSE3-NEXT: retq ; ; SSE41-LABEL: ssubo_v2i128: ; SSE41: # %bb.0: -; SSE41-NEXT: pushq %rbp -; SSE41-NEXT: pushq %rbx -; SSE41-NEXT: movq {{[0-9]+}}(%rsp), %r11 ; SSE41-NEXT: movq {{[0-9]+}}(%rsp), %r10 -; SSE41-NEXT: testq %r9, %r9 -; SSE41-NEXT: setns %al -; SSE41-NEXT: testq %rsi, %rsi -; SSE41-NEXT: setns %bl -; SSE41-NEXT: cmpb %al, %bl -; SSE41-NEXT: setne %bpl ; SSE41-NEXT: subq %r8, %rdi ; SSE41-NEXT: sbbq %r9, %rsi -; SSE41-NEXT: setns %al -; SSE41-NEXT: cmpb %al, %bl -; SSE41-NEXT: setne %al -; SSE41-NEXT: andb %bpl, %al +; SSE41-NEXT: seto %r8b ; SSE41-NEXT: subq {{[0-9]+}}(%rsp), %rdx -; SSE41-NEXT: movq %rcx, %rbp -; SSE41-NEXT: sbbq %r10, %rbp -; SSE41-NEXT: setns %bl -; SSE41-NEXT: testq %rcx, %rcx -; SSE41-NEXT: setns %cl -; SSE41-NEXT: cmpb %bl, %cl -; SSE41-NEXT: setne %r8b -; SSE41-NEXT: testq %r10, %r10 -; SSE41-NEXT: setns %bl -; SSE41-NEXT: cmpb %bl, %cl -; SSE41-NEXT: setne %cl -; SSE41-NEXT: andb %r8b, %cl -; SSE41-NEXT: movzbl %cl, %ecx -; SSE41-NEXT: negl %ecx -; SSE41-NEXT: movzbl %al, %eax +; SSE41-NEXT: sbbq {{[0-9]+}}(%rsp), %rcx +; SSE41-NEXT: seto %al +; SSE41-NEXT: movzbl %al, %r9d +; SSE41-NEXT: negl %r9d +; SSE41-NEXT: movzbl %r8b, %eax ; SSE41-NEXT: negl %eax ; SSE41-NEXT: movd %eax, %xmm0 -; SSE41-NEXT: pinsrd $1, %ecx, %xmm0 -; SSE41-NEXT: movq %rdx, 16(%r11) -; SSE41-NEXT: movq %rdi, (%r11) -; SSE41-NEXT: movq %rbp, 24(%r11) -; SSE41-NEXT: movq %rsi, 8(%r11) -; SSE41-NEXT: popq %rbx -; SSE41-NEXT: popq %rbp +; SSE41-NEXT: pinsrd $1, %r9d, %xmm0 +; SSE41-NEXT: movq %rdx, 16(%r10) +; SSE41-NEXT: movq %rdi, (%r10) +; SSE41-NEXT: movq %rcx, 24(%r10) +; SSE41-NEXT: movq %rsi, 8(%r10) ; SSE41-NEXT: retq ; ; AVX1-LABEL: ssubo_v2i128: ; AVX1: # %bb.0: -; AVX1-NEXT: pushq %rbp -; AVX1-NEXT: pushq %rbx -; AVX1-NEXT: movq {{[0-9]+}}(%rsp), %r11 ; AVX1-NEXT: movq {{[0-9]+}}(%rsp), %r10 -; AVX1-NEXT: testq %r9, %r9 -; AVX1-NEXT: setns %al -; AVX1-NEXT: testq %rsi, %rsi -; AVX1-NEXT: setns %bl -; AVX1-NEXT: cmpb %al, %bl -; AVX1-NEXT: setne %bpl ; AVX1-NEXT: subq %r8, %rdi ; AVX1-NEXT: sbbq %r9, %rsi -; AVX1-NEXT: setns %al -; AVX1-NEXT: cmpb %al, %bl -; AVX1-NEXT: setne %al -; AVX1-NEXT: andb %bpl, %al +; AVX1-NEXT: seto %r8b ; AVX1-NEXT: subq {{[0-9]+}}(%rsp), %rdx -; AVX1-NEXT: movq %rcx, %rbp -; AVX1-NEXT: sbbq %r10, %rbp -; AVX1-NEXT: setns %bl -; AVX1-NEXT: testq %rcx, %rcx -; AVX1-NEXT: setns %cl -; AVX1-NEXT: cmpb %bl, %cl -; AVX1-NEXT: setne %r8b -; AVX1-NEXT: testq %r10, %r10 -; AVX1-NEXT: setns %bl -; AVX1-NEXT: cmpb %bl, %cl -; AVX1-NEXT: setne %cl -; AVX1-NEXT: andb %r8b, %cl -; AVX1-NEXT: movzbl %cl, %ecx -; AVX1-NEXT: negl %ecx -; AVX1-NEXT: movzbl %al, %eax +; AVX1-NEXT: sbbq {{[0-9]+}}(%rsp), %rcx +; AVX1-NEXT: seto %al +; AVX1-NEXT: movzbl %al, %r9d +; AVX1-NEXT: negl %r9d +; AVX1-NEXT: movzbl %r8b, %eax ; AVX1-NEXT: negl %eax ; AVX1-NEXT: vmovd %eax, %xmm0 -; AVX1-NEXT: vpinsrd $1, %ecx, %xmm0, %xmm0 -; AVX1-NEXT: movq %rdx, 16(%r11) -; AVX1-NEXT: movq %rdi, (%r11) -; AVX1-NEXT: movq %rbp, 24(%r11) -; AVX1-NEXT: movq %rsi, 8(%r11) -; AVX1-NEXT: popq %rbx -; AVX1-NEXT: popq %rbp +; AVX1-NEXT: vpinsrd $1, %r9d, %xmm0, %xmm0 +; AVX1-NEXT: movq %rdx, 16(%r10) +; AVX1-NEXT: movq %rdi, (%r10) +; AVX1-NEXT: movq %rcx, 24(%r10) +; AVX1-NEXT: movq %rsi, 8(%r10) ; AVX1-NEXT: retq ; ; AVX2-LABEL: ssubo_v2i128: ; AVX2: # %bb.0: -; AVX2-NEXT: pushq %rbp -; AVX2-NEXT: pushq %rbx -; AVX2-NEXT: movq {{[0-9]+}}(%rsp), %r11 ; AVX2-NEXT: movq {{[0-9]+}}(%rsp), %r10 -; AVX2-NEXT: testq %r9, %r9 -; AVX2-NEXT: setns %al -; AVX2-NEXT: testq %rsi, %rsi -; AVX2-NEXT: setns %bl -; AVX2-NEXT: cmpb %al, %bl -; AVX2-NEXT: setne %bpl ; AVX2-NEXT: subq %r8, %rdi ; AVX2-NEXT: sbbq %r9, %rsi -; AVX2-NEXT: setns %al -; AVX2-NEXT: cmpb %al, %bl -; AVX2-NEXT: setne %al -; AVX2-NEXT: andb %bpl, %al +; AVX2-NEXT: seto %r8b ; AVX2-NEXT: subq {{[0-9]+}}(%rsp), %rdx -; AVX2-NEXT: movq %rcx, %rbp -; AVX2-NEXT: sbbq %r10, %rbp -; AVX2-NEXT: setns %bl -; AVX2-NEXT: testq %rcx, %rcx -; AVX2-NEXT: setns %cl -; AVX2-NEXT: cmpb %bl, %cl -; AVX2-NEXT: setne %r8b -; AVX2-NEXT: testq %r10, %r10 -; AVX2-NEXT: setns %bl -; AVX2-NEXT: cmpb %bl, %cl -; AVX2-NEXT: setne %cl -; AVX2-NEXT: andb %r8b, %cl -; AVX2-NEXT: movzbl %cl, %ecx -; AVX2-NEXT: negl %ecx -; AVX2-NEXT: movzbl %al, %eax +; AVX2-NEXT: sbbq {{[0-9]+}}(%rsp), %rcx +; AVX2-NEXT: seto %al +; AVX2-NEXT: movzbl %al, %r9d +; AVX2-NEXT: negl %r9d +; AVX2-NEXT: movzbl %r8b, %eax ; AVX2-NEXT: negl %eax ; AVX2-NEXT: vmovd %eax, %xmm0 -; AVX2-NEXT: vpinsrd $1, %ecx, %xmm0, %xmm0 -; AVX2-NEXT: movq %rdx, 16(%r11) -; AVX2-NEXT: movq %rdi, (%r11) -; AVX2-NEXT: movq %rbp, 24(%r11) -; AVX2-NEXT: movq %rsi, 8(%r11) -; AVX2-NEXT: popq %rbx -; AVX2-NEXT: popq %rbp +; AVX2-NEXT: vpinsrd $1, %r9d, %xmm0, %xmm0 +; AVX2-NEXT: movq %rdx, 16(%r10) +; AVX2-NEXT: movq %rdi, (%r10) +; AVX2-NEXT: movq %rcx, 24(%r10) +; AVX2-NEXT: movq %rsi, 8(%r10) ; AVX2-NEXT: retq ; ; AVX512-LABEL: ssubo_v2i128: ; AVX512: # %bb.0: -; AVX512-NEXT: pushq %r14 -; AVX512-NEXT: pushq %rbx ; AVX512-NEXT: movq {{[0-9]+}}(%rsp), %r10 -; AVX512-NEXT: movq {{[0-9]+}}(%rsp), %r11 ; AVX512-NEXT: subq {{[0-9]+}}(%rsp), %rdx -; AVX512-NEXT: movq %rcx, %r14 -; AVX512-NEXT: sbbq %r11, %r14 -; AVX512-NEXT: setns %bl -; AVX512-NEXT: testq %rcx, %rcx -; AVX512-NEXT: setns %cl -; AVX512-NEXT: cmpb %bl, %cl -; AVX512-NEXT: setne %bl -; AVX512-NEXT: testq %r11, %r11 -; AVX512-NEXT: setns %al -; AVX512-NEXT: cmpb %al, %cl -; AVX512-NEXT: setne %al -; AVX512-NEXT: andb %bl, %al +; AVX512-NEXT: sbbq {{[0-9]+}}(%rsp), %rcx +; AVX512-NEXT: seto %al ; AVX512-NEXT: kmovd %eax, %k0 -; AVX512-NEXT: testq %r9, %r9 -; AVX512-NEXT: setns %al -; AVX512-NEXT: testq %rsi, %rsi -; AVX512-NEXT: setns %cl -; AVX512-NEXT: cmpb %al, %cl -; AVX512-NEXT: setne %al ; AVX512-NEXT: subq %r8, %rdi ; AVX512-NEXT: sbbq %r9, %rsi -; AVX512-NEXT: setns %bl -; AVX512-NEXT: cmpb %bl, %cl -; AVX512-NEXT: setne %cl -; AVX512-NEXT: andb %al, %cl -; AVX512-NEXT: andl $1, %ecx -; AVX512-NEXT: kmovw %ecx, %k1 +; AVX512-NEXT: seto %al +; AVX512-NEXT: andl $1, %eax +; AVX512-NEXT: kmovw %eax, %k1 ; AVX512-NEXT: kshiftlw $1, %k0, %k0 ; AVX512-NEXT: korw %k0, %k1, %k1 ; AVX512-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 ; AVX512-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z} ; AVX512-NEXT: movq %rdx, 16(%r10) ; AVX512-NEXT: movq %rdi, (%r10) -; AVX512-NEXT: movq %r14, 24(%r10) +; AVX512-NEXT: movq %rcx, 24(%r10) ; AVX512-NEXT: movq %rsi, 8(%r10) -; AVX512-NEXT: popq %rbx -; AVX512-NEXT: popq %r14 ; AVX512-NEXT: retq %t = call {<2 x i128>, <2 x i1>} @llvm.ssub.with.overflow.v2i128(<2 x i128> %a0, <2 x i128> %a1) %val = extractvalue {<2 x i128>, <2 x i1>} %t, 0 diff --git a/llvm/test/CodeGen/X86/xaluo128.ll b/llvm/test/CodeGen/X86/xaluo128.ll --- a/llvm/test/CodeGen/X86/xaluo128.ll +++ b/llvm/test/CodeGen/X86/xaluo128.ll @@ -5,55 +5,35 @@ define zeroext i1 @saddoi128(i128 %v1, i128 %v2, i128* %res) nounwind { ; X64-LABEL: saddoi128: ; X64: ## %bb.0: -; X64-NEXT: testq %rcx, %rcx -; X64-NEXT: setns %r9b -; X64-NEXT: testq %rsi, %rsi -; X64-NEXT: setns %al -; X64-NEXT: cmpb %r9b, %al -; X64-NEXT: sete %r9b ; X64-NEXT: addq %rdx, %rdi ; X64-NEXT: adcq %rcx, %rsi -; X64-NEXT: setns %cl -; X64-NEXT: cmpb %cl, %al -; X64-NEXT: setne %al -; X64-NEXT: andb %r9b, %al +; X64-NEXT: seto %al ; X64-NEXT: movq %rdi, (%r8) ; X64-NEXT: movq %rsi, 8(%r8) ; X64-NEXT: retq ; ; X86-LABEL: saddoi128: ; X86: ## %bb.0: -; X86-NEXT: pushl %ebp ; X86-NEXT: pushl %ebx ; X86-NEXT: pushl %edi ; X86-NEXT: pushl %esi -; X86-NEXT: movl {{[0-9]+}}(%esp), %ebp +; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx ; X86-NEXT: movl {{[0-9]+}}(%esp), %esi ; X86-NEXT: movl {{[0-9]+}}(%esp), %edi ; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx -; X86-NEXT: cmpl $0, {{[0-9]+}}(%esp) -; X86-NEXT: setns %al -; X86-NEXT: testl %ebx, %ebx -; X86-NEXT: setns %ah -; X86-NEXT: cmpb %al, %ah -; X86-NEXT: sete %cl -; X86-NEXT: addl {{[0-9]+}}(%esp), %esi -; X86-NEXT: adcl {{[0-9]+}}(%esp), %edi -; X86-NEXT: adcl {{[0-9]+}}(%esp), %edx +; X86-NEXT: addl {{[0-9]+}}(%esp), %edi ; X86-NEXT: adcl {{[0-9]+}}(%esp), %ebx -; X86-NEXT: setns %al -; X86-NEXT: cmpb %al, %ah -; X86-NEXT: setne %al -; X86-NEXT: andb %cl, %al -; X86-NEXT: movl %esi, (%ebp) -; X86-NEXT: movl %edi, 4(%ebp) -; X86-NEXT: movl %edx, 8(%ebp) -; X86-NEXT: movl %ebx, 12(%ebp) +; X86-NEXT: adcl {{[0-9]+}}(%esp), %esi +; X86-NEXT: adcl {{[0-9]+}}(%esp), %edx +; X86-NEXT: seto %al +; X86-NEXT: movl %edi, (%ecx) +; X86-NEXT: movl %ebx, 4(%ecx) +; X86-NEXT: movl %esi, 8(%ecx) +; X86-NEXT: movl %edx, 12(%ecx) ; X86-NEXT: popl %esi ; X86-NEXT: popl %edi ; X86-NEXT: popl %ebx -; X86-NEXT: popl %ebp ; X86-NEXT: retl %t = call {i128, i1} @llvm.sadd.with.overflow.i128(i128 %v1, i128 %v2) %val = extractvalue {i128, i1} %t, 0 @@ -106,55 +86,35 @@ define zeroext i1 @ssuboi128(i128 %v1, i128 %v2, i128* %res) nounwind { ; X64-LABEL: ssuboi128: ; X64: ## %bb.0: -; X64-NEXT: testq %rcx, %rcx -; X64-NEXT: setns %r9b -; X64-NEXT: testq %rsi, %rsi -; X64-NEXT: setns %al -; X64-NEXT: cmpb %r9b, %al -; X64-NEXT: setne %r9b ; X64-NEXT: subq %rdx, %rdi ; X64-NEXT: sbbq %rcx, %rsi -; X64-NEXT: setns %cl -; X64-NEXT: cmpb %cl, %al -; X64-NEXT: setne %al -; X64-NEXT: andb %r9b, %al +; X64-NEXT: seto %al ; X64-NEXT: movq %rdi, (%r8) ; X64-NEXT: movq %rsi, 8(%r8) ; X64-NEXT: retq ; ; X86-LABEL: ssuboi128: ; X86: ## %bb.0: -; X86-NEXT: pushl %ebp ; X86-NEXT: pushl %ebx ; X86-NEXT: pushl %edi ; X86-NEXT: pushl %esi -; X86-NEXT: movl {{[0-9]+}}(%esp), %ebp +; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx ; X86-NEXT: movl {{[0-9]+}}(%esp), %esi ; X86-NEXT: movl {{[0-9]+}}(%esp), %edi ; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx -; X86-NEXT: cmpl $0, {{[0-9]+}}(%esp) -; X86-NEXT: setns %al -; X86-NEXT: testl %ebx, %ebx -; X86-NEXT: setns %ah -; X86-NEXT: cmpb %al, %ah -; X86-NEXT: setne %cl -; X86-NEXT: subl {{[0-9]+}}(%esp), %esi -; X86-NEXT: sbbl {{[0-9]+}}(%esp), %edi -; X86-NEXT: sbbl {{[0-9]+}}(%esp), %edx +; X86-NEXT: subl {{[0-9]+}}(%esp), %edi ; X86-NEXT: sbbl {{[0-9]+}}(%esp), %ebx -; X86-NEXT: setns %al -; X86-NEXT: cmpb %al, %ah -; X86-NEXT: setne %al -; X86-NEXT: andb %cl, %al -; X86-NEXT: movl %esi, (%ebp) -; X86-NEXT: movl %edi, 4(%ebp) -; X86-NEXT: movl %edx, 8(%ebp) -; X86-NEXT: movl %ebx, 12(%ebp) +; X86-NEXT: sbbl {{[0-9]+}}(%esp), %esi +; X86-NEXT: sbbl {{[0-9]+}}(%esp), %edx +; X86-NEXT: seto %al +; X86-NEXT: movl %edi, (%ecx) +; X86-NEXT: movl %ebx, 4(%ecx) +; X86-NEXT: movl %esi, 8(%ecx) +; X86-NEXT: movl %edx, 12(%ecx) ; X86-NEXT: popl %esi ; X86-NEXT: popl %edi ; X86-NEXT: popl %ebx -; X86-NEXT: popl %ebp ; X86-NEXT: retl %t = call {i128, i1} @llvm.ssub.with.overflow.i128(i128 %v1, i128 %v2) %val = extractvalue {i128, i1} %t, 0