diff --git a/llvm/include/llvm/CodeGen/TargetLowering.h b/llvm/include/llvm/CodeGen/TargetLowering.h --- a/llvm/include/llvm/CodeGen/TargetLowering.h +++ b/llvm/include/llvm/CodeGen/TargetLowering.h @@ -691,6 +691,10 @@ return true; } + /// Return true if it is more efficient to deduce add/sub overflow by sign + /// bits. + virtual bool useSignBitsToDeduceOverflow(EVT VT) const { return false; } + /// Use bitwise logic to make pairs of compares more efficient. For example: /// and (seteq A, B), (seteq C, D) --> seteq (or (xor A, B), (xor C, D)), 0 /// This should be true when it takes more than one instruction to lower diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp --- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp @@ -10082,6 +10082,27 @@ SDValue Zero = DAG.getConstant(0, dl, LHS.getValueType()); + if (useSignBitsToDeduceOverflow(LHS.getValueType())) { + SDValue COND1, COND2; + if (IsAdd) { + // For an addition, there can be overflow iff result sign is different + // from both operands. + COND1 = DAG.getNode(ISD::XOR, dl, LHS.getValueType(), LHS, Result); + COND2 = DAG.getNode(ISD::XOR, dl, LHS.getValueType(), RHS, Result); + } else { + // For a subtraction, there can be overflow iff operands have different + // sign, and result sign is different from left operand sign. + COND1 = DAG.getNode(ISD::XOR, dl, LHS.getValueType(), LHS, RHS); + COND2 = DAG.getNode(ISD::XOR, dl, LHS.getValueType(), Result, LHS); + } + SDValue FirstBitOverflow = + DAG.getNode(ISD::AND, dl, LHS.getValueType(), COND1, COND2); + Overflow = DAG.getBoolExtOrTrunc( + DAG.getSetCC(dl, OType, FirstBitOverflow, Zero, ISD::SETLT), dl, + ResultType, ResultType); + return; + } + // For an addition, the result should be less than one of the operands (LHS) // if and only if the other operand (RHS) is negative, otherwise there will // be overflow. diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.h b/llvm/lib/Target/PowerPC/PPCISelLowering.h --- a/llvm/lib/Target/PowerPC/PPCISelLowering.h +++ b/llvm/lib/Target/PowerPC/PPCISelLowering.h @@ -800,6 +800,10 @@ bool preferIncOfAddToSubOfNot(EVT VT) const override; + bool useSignBitsToDeduceOverflow(EVT VT) const override { + return VT.isScalarInteger(); + } + bool convertSetCCLogicToBitwiseLogic(EVT VT) const override { return VT.isScalarInteger(); } diff --git a/llvm/test/CodeGen/PowerPC/saddo-ssubo.ll b/llvm/test/CodeGen/PowerPC/saddo-ssubo.ll --- a/llvm/test/CodeGen/PowerPC/saddo-ssubo.ll +++ b/llvm/test/CodeGen/PowerPC/saddo-ssubo.ll @@ -50,11 +50,10 @@ ; CHECK-LABEL: test_saddo_i32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: add 5, 3, 4 -; CHECK-NEXT: cmpwi 1, 4, 0 -; CHECK-NEXT: cmpw 5, 3 -; CHECK-NEXT: li 3, 1 -; CHECK-NEXT: creqv 20, 4, 0 -; CHECK-NEXT: isel 3, 0, 3, 20 +; CHECK-NEXT: xor 4, 4, 5 +; CHECK-NEXT: xor 3, 3, 5 +; CHECK-NEXT: and 3, 3, 4 +; CHECK-NEXT: srwi 3, 3, 31 ; CHECK-NEXT: blr entry: %res = call { i32, i1 } @llvm.sadd.with.overflow.i32(i32 %a, i32 %b) nounwind @@ -66,11 +65,10 @@ ; CHECK-LABEL: test_saddo_i64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: add 5, 3, 4 -; CHECK-NEXT: cmpdi 1, 4, 0 -; CHECK-NEXT: cmpd 5, 3 -; CHECK-NEXT: li 3, 1 -; CHECK-NEXT: creqv 20, 4, 0 -; CHECK-NEXT: isel 3, 0, 3, 20 +; CHECK-NEXT: xor 4, 4, 5 +; CHECK-NEXT: xor 3, 3, 5 +; CHECK-NEXT: and 3, 3, 4 +; CHECK-NEXT: rldicl 3, 3, 1, 63 ; CHECK-NEXT: blr entry: %res = call { i64, i1 } @llvm.sadd.with.overflow.i64(i64 %a, i64 %b) nounwind @@ -130,11 +128,10 @@ ; CHECK-LABEL: test_ssubo_i32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: sub 5, 3, 4 -; CHECK-NEXT: cmpwi 1, 4, 0 -; CHECK-NEXT: cmpw 5, 3 -; CHECK-NEXT: li 3, 1 -; CHECK-NEXT: creqv 20, 5, 0 -; CHECK-NEXT: isel 3, 0, 3, 20 +; CHECK-NEXT: xor 4, 3, 4 +; CHECK-NEXT: xor 3, 5, 3 +; CHECK-NEXT: and 3, 4, 3 +; CHECK-NEXT: srwi 3, 3, 31 ; CHECK-NEXT: blr entry: %res = call { i32, i1 } @llvm.ssub.with.overflow.i32(i32 %a, i32 %b) nounwind @@ -146,11 +143,10 @@ ; CHECK-LABEL: test_ssubo_i64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: sub 5, 3, 4 -; CHECK-NEXT: cmpdi 1, 4, 0 -; CHECK-NEXT: cmpd 5, 3 -; CHECK-NEXT: li 3, 1 -; CHECK-NEXT: creqv 20, 5, 0 -; CHECK-NEXT: isel 3, 0, 3, 20 +; CHECK-NEXT: xor 4, 3, 4 +; CHECK-NEXT: xor 3, 5, 3 +; CHECK-NEXT: and 3, 4, 3 +; CHECK-NEXT: rldicl 3, 3, 1, 63 ; CHECK-NEXT: blr entry: %res = call { i64, i1 } @llvm.ssub.with.overflow.i64(i64 %a, i64 %b) nounwind