Index: lib/Target/AArch64/AArch64ISelLowering.cpp =================================================================== --- lib/Target/AArch64/AArch64ISelLowering.cpp +++ lib/Target/AArch64/AArch64ISelLowering.cpp @@ -1972,10 +1972,41 @@ return makeLibCall(DAG, Call, MVT::f128, Ops, false, SDLoc(Op)).first; } +// Returns true if the given Op is the overflow flag result of an overflow +// intrinsic operation. +static bool isOverflowIntrOpRes(SDValue Op) { + unsigned Opc = Op.getOpcode(); + return (Op.getResNo() == 1 && + (Opc == ISD::SADDO || Opc == ISD::UADDO || Opc == ISD::SSUBO || + Opc == ISD::USUBO || Opc == ISD::SMULO || Opc == ISD::UMULO)); +} + static SDValue LowerXOR(SDValue Op, SelectionDAG &DAG) { SDValue Sel = Op.getOperand(0); SDValue Other = Op.getOperand(1); + SDLoc dl(Sel); + // If the operand is an overflow checking operation, invert the condition + // code and kill the Not operation. I.e., transform: + // (xor (overflow_op_bool, 1)) + // --> + // (csel 1, 0, invert(cc), overflow_op_bool) + // ... which later gets transformed to just a cset instruction with an + // inverted condition code, rather than a cset + eor sequence. + if (isOneConstant(Other) && isOverflowIntrOpRes(Sel)) { + // Only lower legal XALUO ops. + if (!DAG.getTargetLoweringInfo().isTypeLegal(Sel->getValueType(0))) + return SDValue(); + + SDValue TVal = DAG.getConstant(1, dl, MVT::i32); + SDValue FVal = DAG.getConstant(0, dl, MVT::i32); + AArch64CC::CondCode CC; + SDValue Value, Overflow; + std::tie(Value, Overflow) = getAArch64XALUOOp(CC, Sel.getValue(0), DAG); + SDValue CCVal = DAG.getConstant(getInvertedCondCode(CC), dl, MVT::i32); + return DAG.getNode(AArch64ISD::CSEL, dl, Op.getValueType(), TVal, FVal, + CCVal, Overflow); + } // If neither operand is a SELECT_CC, give up. if (Sel.getOpcode() != ISD::SELECT_CC) std::swap(Sel, Other); @@ -1994,7 +2025,6 @@ SDValue RHS = Sel.getOperand(1); SDValue TVal = Sel.getOperand(2); SDValue FVal = Sel.getOperand(3); - SDLoc dl(Sel); // FIXME: This could be generalized to non-integer comparisons. if (LHS.getValueType() != MVT::i32 && LHS.getValueType() != MVT::i64) @@ -3958,10 +3988,7 @@ // Optimize {s|u}{add|sub|mul}.with.overflow feeding into a branch // instruction. - unsigned Opc = LHS.getOpcode(); - if (LHS.getResNo() == 1 && isOneConstant(RHS) && - (Opc == ISD::SADDO || Opc == ISD::UADDO || Opc == ISD::SSUBO || - Opc == ISD::USUBO || Opc == ISD::SMULO || Opc == ISD::UMULO)) { + if (isOverflowIntrOpRes(LHS) && isOneConstant(RHS)) { assert((CC == ISD::SETEQ || CC == ISD::SETNE) && "Unexpected condition code."); // Only lower legal XALUO ops. @@ -4453,12 +4480,9 @@ SDValue FVal = Op->getOperand(2); SDLoc DL(Op); - unsigned Opc = CCVal.getOpcode(); // Optimize {s|u}{add|sub|mul}.with.overflow feeding into a select // instruction. - if (CCVal.getResNo() == 1 && - (Opc == ISD::SADDO || Opc == ISD::UADDO || Opc == ISD::SSUBO || - Opc == ISD::USUBO || Opc == ISD::SMULO || Opc == ISD::UMULO)) { + if (isOverflowIntrOpRes(CCVal)) { // Only lower legal XALUO ops. if (!DAG.getTargetLoweringInfo().isTypeLegal(CCVal->getValueType(0))) return SDValue(); Index: test/CodeGen/AArch64/arm64-xaluo.ll =================================================================== --- test/CodeGen/AArch64/arm64-xaluo.ll +++ test/CodeGen/AArch64/arm64-xaluo.ll @@ -656,6 +656,146 @@ ret i1 true } +; Check cc optimization of not(overflow) + +define i1 @saddo.not.i32(i32 %v1, i32 %v2) { +entry: +; CHECK-LABEL: saddo.not.i32 +; CHECK: cmn w0, w1 +; CHECK-NEXT: cset w0, vc + %t = call {i32, i1} @llvm.sadd.with.overflow.i32(i32 %v1, i32 %v2) + %obit = extractvalue {i32, i1} %t, 1 + %ret = xor i1 %obit, true + ret i1 %ret +} + +define i1 @saddo.not.i64(i64 %v1, i64 %v2) { +entry: +; CHECK-LABEL: saddo.not.i64 +; CHECK: cmn x0, x1 +; CHECK-NEXT: cset w0, vc + %t = call {i64, i1} @llvm.sadd.with.overflow.i64(i64 %v1, i64 %v2) + %obit = extractvalue {i64, i1} %t, 1 + %ret = xor i1 %obit, true + ret i1 %ret +} + +define i1 @uaddo.not.i32(i32 %v1, i32 %v2) { +entry: +; CHECK-LABEL: uaddo.not.i32 +; CHECK: cmn w0, w1 +; CHECK-NEXT: cset w0, lo + %t = call {i32, i1} @llvm.uadd.with.overflow.i32(i32 %v1, i32 %v2) + %obit = extractvalue {i32, i1} %t, 1 + %ret = xor i1 %obit, true + ret i1 %ret +} + +define i1 @uaddo.not.i64(i64 %v1, i64 %v2) { +entry: +; CHECK-LABEL: uaddo.not.i64 +; CHECK: cmn x0, x1 +; CHECK-NEXT: cset w0, lo + %t = call {i64, i1} @llvm.uadd.with.overflow.i64(i64 %v1, i64 %v2) + %obit = extractvalue {i64, i1} %t, 1 + %ret = xor i1 %obit, true + ret i1 %ret +} + +define i1 @ssubo.not.i32(i32 %v1, i32 %v2) { +entry: +; CHECK-LABEL: ssubo.not.i32 +; CHECK: cmp w0, w1 +; CHECK-NEXT: cset w0, vc + %t = call {i32, i1} @llvm.ssub.with.overflow.i32(i32 %v1, i32 %v2) + %obit = extractvalue {i32, i1} %t, 1 + %ret = xor i1 %obit, true + ret i1 %ret +} + +define i1 @ssub.not.i64(i64 %v1, i64 %v2) { +entry: +; CHECK-LABEL: ssub.not.i64 +; CHECK: cmp x0, x1 +; CHECK-NEXT: cset w0, vc + %t = call {i64, i1} @llvm.ssub.with.overflow.i64(i64 %v1, i64 %v2) + %obit = extractvalue {i64, i1} %t, 1 + %ret = xor i1 %obit, true + ret i1 %ret +} + +define i1 @usubo.not.i32(i32 %v1, i32 %v2) { +entry: +; CHECK-LABEL: usubo.not.i32 +; CHECK: cmp w0, w1 +; CHECK-NEXT: cset w0, hs + %t = call {i32, i1} @llvm.usub.with.overflow.i32(i32 %v1, i32 %v2) + %obit = extractvalue {i32, i1} %t, 1 + %ret = xor i1 %obit, true + ret i1 %ret +} + +define i1 @usubo.not.i64(i64 %v1, i64 %v2) { +entry: +; CHECK-LABEL: usubo.not.i64 +; CHECK: cmp x0, x1 +; CHECK-NEXT: cset w0, hs + %t = call {i64, i1} @llvm.usub.with.overflow.i64(i64 %v1, i64 %v2) + %obit = extractvalue {i64, i1} %t, 1 + %ret = xor i1 %obit, true + ret i1 %ret +} + +define i1 @smulo.not.i32(i32 %v1, i32 %v2) { +entry: +; CHECK-LABEL: smulo.not.i32 +; CHECK: smull x[[MREG:[0-9]+]], w0, w1 +; CHECK-NEXT: lsr x[[SREG:[0-9]+]], x[[MREG]], #32 +; CHECK-NEXT: cmp w[[SREG]], w[[MREG]], asr #31 +; CHECK-NEXT: cset w0, eq + %t = call {i32, i1} @llvm.smul.with.overflow.i32(i32 %v1, i32 %v2) + %obit = extractvalue {i32, i1} %t, 1 + %ret = xor i1 %obit, true + ret i1 %ret +} + +define i1 @smulo.not.i64(i64 %v1, i64 %v2) { +entry: +; CHECK-LABEL: smulo.not.i64 +; CHECK: mul [[MREG:x[0-9]+]], x0, x1 +; CHECK-NEXT: smulh [[HREG:x[0-9]+]], x0, x1 +; CHECK-NEXT: cmp [[HREG]], [[MREG]], asr #63 +; CHECK-NEXT: cset w0, eq + %t = call {i64, i1} @llvm.smul.with.overflow.i64(i64 %v1, i64 %v2) + %obit = extractvalue {i64, i1} %t, 1 + %ret = xor i1 %obit, true + ret i1 %ret +} + +define i1 @umulo.not.i32(i32 %v1, i32 %v2) { +entry: +; CHECK-LABEL: umulo.not.i32 +; CHECK: umull [[MREG:x[0-9]+]], w0, w1 +; CHECK-NEXT: cmp xzr, [[MREG]], lsr #32 +; CHECK-NEXT: cset w0, eq + %t = call {i32, i1} @llvm.umul.with.overflow.i32(i32 %v1, i32 %v2) + %obit = extractvalue {i32, i1} %t, 1 + %ret = xor i1 %obit, true + ret i1 %ret +} + +define i1 @umulo.not.i64(i64 %v1, i64 %v2) { +entry: +; CHECK-LABEL: umulo.not.i64 +; CHECK: umulh [[MREG:x[0-9]+]], x0, x1 +; CHECK-NEXT: cmp xzr, [[MREG]] +; CHECK-NEXT: cset w0, eq + %t = call {i64, i1} @llvm.umul.with.overflow.i64(i64 %v1, i64 %v2) + %obit = extractvalue {i64, i1} %t, 1 + %ret = xor i1 %obit, true + ret i1 %ret +} + declare {i32, i1} @llvm.sadd.with.overflow.i32(i32, i32) nounwind readnone declare {i64, i1} @llvm.sadd.with.overflow.i64(i64, i64) nounwind readnone declare {i32, i1} @llvm.uadd.with.overflow.i32(i32, i32) nounwind readnone