Index: llvm/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp =================================================================== --- llvm/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp +++ llvm/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp @@ -63,6 +63,8 @@ STATISTIC(NumAShrs, "Number of ashr converted to lshr"); STATISTIC(NumSRems, "Number of srem converted to urem"); STATISTIC(NumOverflows, "Number of overflow checks removed"); +STATISTIC(NumSaturating, + "Number of saturating arithmetics converted to normal arithmetics"); static cl::opt DontAddNoWrapFlags("cvp-dont-add-nowrap-flags", cl::init(true)); @@ -413,7 +415,7 @@ IRBuilder<> B(WO); Value *NewOp = B.CreateBinOp( WO->getBinaryOp(), WO->getLHS(), WO->getRHS(), WO->getName()); - // Constant-holing could have happened. + // Constant-folding could have happened. if (auto *Inst = dyn_cast(NewOp)) { if (WO->isSigned()) Inst->setHasNoSignedWrap(); @@ -428,18 +430,39 @@ ++NumOverflows; } +static void processSaturatingInst(SaturatingInst *SI) { + BinaryOperator *BinOp = BinaryOperator::Create( + SI->getBinaryOp(), SI->getLHS(), SI->getRHS(), SI->getName(), SI); + BinOp->setDebugLoc(SI->getDebugLoc()); + if (SI->isSigned()) + BinOp->setHasNoSignedWrap(); + else + BinOp->setHasNoUnsignedWrap(); + + SI->replaceAllUsesWith(BinOp); + SI->eraseFromParent(); + ++NumSaturating; +} + /// Infer nonnull attributes for the arguments at the specified callsite. static bool processCallSite(CallSite CS, LazyValueInfo *LVI) { SmallVector ArgNos; unsigned ArgNo = 0; if (auto *WO = dyn_cast(CS.getInstruction())) { - if (willNotOverflow(WO, LVI)) { + if (WO->getLHS()->getType()->isIntegerTy() && willNotOverflow(WO, LVI)) { processOverflowIntrinsic(WO); return true; } } + if (auto *SI = dyn_cast(CS.getInstruction())) { + if (SI->getType()->isIntegerTy() && willNotOverflow(SI, LVI)) { + processSaturatingInst(SI); + return true; + } + } + // Deopt bundle operands are intended to capture state with minimal // perturbance of the code otherwise. If we can find a constant value for // any such operand and remove a use of the original value, that's Index: llvm/test/Transforms/CorrelatedValuePropagation/overflows.ll =================================================================== --- llvm/test/Transforms/CorrelatedValuePropagation/overflows.ll +++ llvm/test/Transforms/CorrelatedValuePropagation/overflows.ll @@ -21,10 +21,13 @@ declare { i8, i1 } @llvm.umul.with.overflow.i8(i8, i8) +declare { <2 x i32>, <2 x i1> } @llvm.uadd.with.overflow.v2i32(<2 x i32>, <2 x i32>) + declare i8 @llvm.uadd.sat.i8(i8, i8) declare i8 @llvm.sadd.sat.i8(i8, i8) declare i8 @llvm.usub.sat.i8(i8, i8) declare i8 @llvm.ssub.sat.i8(i8, i8) +declare <2 x i8> @llvm.uadd.sat.v2i8(<2 x i8>, <2 x i8>) declare void @llvm.trap() @@ -739,8 +742,8 @@ ; CHECK-NEXT: call void @llvm.trap() ; CHECK-NEXT: unreachable ; CHECK: cont: -; CHECK-NEXT: [[RES:%.*]] = call i8 @llvm.uadd.sat.i8(i8 [[X]], i8 100) -; CHECK-NEXT: ret i8 [[RES]] +; CHECK-NEXT: [[RES1:%.*]] = add nuw i8 [[X]], 100 +; CHECK-NEXT: ret i8 [[RES1]] ; %cmp = icmp ugt i8 %x, 100 br i1 %cmp, label %trap, label %cont @@ -762,8 +765,8 @@ ; CHECK-NEXT: call void @llvm.trap() ; CHECK-NEXT: unreachable ; CHECK: cont: -; CHECK-NEXT: [[RES:%.*]] = call i8 @llvm.sadd.sat.i8(i8 [[X]], i8 20) -; CHECK-NEXT: ret i8 [[RES]] +; CHECK-NEXT: [[RES1:%.*]] = add nsw i8 [[X]], 20 +; CHECK-NEXT: ret i8 [[RES1]] ; %cmp = icmp sgt i8 %x, 100 br i1 %cmp, label %trap, label %cont @@ -785,8 +788,8 @@ ; CHECK-NEXT: call void @llvm.trap() ; CHECK-NEXT: unreachable ; CHECK: cont: -; CHECK-NEXT: [[RES:%.*]] = call i8 @llvm.usub.sat.i8(i8 [[X]], i8 100) -; CHECK-NEXT: ret i8 [[RES]] +; CHECK-NEXT: [[RES1:%.*]] = sub nuw i8 [[X]], 100 +; CHECK-NEXT: ret i8 [[RES1]] ; %cmp = icmp ult i8 %x, 100 br i1 %cmp, label %trap, label %cont @@ -808,8 +811,8 @@ ; CHECK-NEXT: call void @llvm.trap() ; CHECK-NEXT: unreachable ; CHECK: cont: -; CHECK-NEXT: [[RES:%.*]] = call i8 @llvm.ssub.sat.i8(i8 [[X]], i8 20) -; CHECK-NEXT: ret i8 [[RES]] +; CHECK-NEXT: [[RES1:%.*]] = sub nsw i8 [[X]], 20 +; CHECK-NEXT: ret i8 [[RES1]] ; %cmp = icmp slt i8 %x, -100 br i1 %cmp, label %trap, label %cont @@ -822,3 +825,21 @@ %res = call i8 @llvm.ssub.sat.i8(i8 %x, i8 20) ret i8 %res } + +define { <2 x i32>, <2 x i1> } @uaddo_vec(<2 x i32> %a) { +; CHECK-LABEL: @uaddo_vec( +; CHECK-NEXT: [[ADD:%.*]] = call { <2 x i32>, <2 x i1> } @llvm.uadd.with.overflow.v2i32(<2 x i32> [[A:%.*]], <2 x i32> ) +; CHECK-NEXT: ret { <2 x i32>, <2 x i1> } [[ADD]] +; + %add = call { <2 x i32>, <2 x i1> } @llvm.uadd.with.overflow.v2i32(<2 x i32> %a, <2 x i32> ) + ret { <2 x i32>, <2 x i1> } %add +} + +define <2 x i8> @uadd_sat_vec(<2 x i8> %a) { +; CHECK-LABEL: @uadd_sat_vec( +; CHECK-NEXT: [[ADD:%.*]] = call <2 x i8> @llvm.uadd.sat.v2i8(<2 x i8> [[A:%.*]], <2 x i8> ) +; CHECK-NEXT: ret <2 x i8> [[ADD]] +; + %add = call <2 x i8> @llvm.uadd.sat.v2i8(<2 x i8> %a, <2 x i8> ) + ret <2 x i8> %add +}