Index: llvm/trunk/lib/Transforms/Utils/SimplifyIndVar.cpp =================================================================== --- llvm/trunk/lib/Transforms/Utils/SimplifyIndVar.cpp +++ llvm/trunk/lib/Transforms/Utils/SimplifyIndVar.cpp @@ -71,6 +71,7 @@ bool eliminateIdentitySCEV(Instruction *UseInst, Instruction *IVOperand); + bool eliminateOverflowIntrinsic(CallInst *CI); bool eliminateIVUser(Instruction *UseInst, Instruction *IVOperand); void eliminateIVComparison(ICmpInst *ICmp, Value *IVOperand); void eliminateIVRemainder(BinaryOperator *Rem, Value *IVOperand, @@ -318,6 +319,108 @@ DeadInsts.emplace_back(Rem); } +bool SimplifyIndvar::eliminateOverflowIntrinsic(CallInst *CI) { + auto *F = CI->getCalledFunction(); + if (!F) + return false; + + typedef const SCEV *(ScalarEvolution::*OperationFunctionTy)( + const SCEV *, const SCEV *, SCEV::NoWrapFlags); + typedef const SCEV *(ScalarEvolution::*ExtensionFunctionTy)( + const SCEV *, Type *); + + OperationFunctionTy Operation; + ExtensionFunctionTy Extension; + + Instruction::BinaryOps RawOp; + + // We always have exactly one of nsw or nuw. If NoSignedOverflow is false, we + // have nuw. + bool NoSignedOverflow; + + switch (F->getIntrinsicID()) { + default: + return false; + + case Intrinsic::sadd_with_overflow: + Operation = &ScalarEvolution::getAddExpr; + Extension = &ScalarEvolution::getSignExtendExpr; + RawOp = Instruction::Add; + NoSignedOverflow = true; + break; + + case Intrinsic::uadd_with_overflow: + Operation = &ScalarEvolution::getAddExpr; + Extension = &ScalarEvolution::getZeroExtendExpr; + RawOp = Instruction::Add; + NoSignedOverflow = false; + break; + + case Intrinsic::ssub_with_overflow: + Operation = &ScalarEvolution::getMinusSCEV; + Extension = &ScalarEvolution::getSignExtendExpr; + RawOp = Instruction::Sub; + NoSignedOverflow = true; + break; + + case Intrinsic::usub_with_overflow: + Operation = &ScalarEvolution::getMinusSCEV; + Extension = &ScalarEvolution::getZeroExtendExpr; + RawOp = Instruction::Sub; + NoSignedOverflow = false; + break; + } + + const SCEV *LHS = SE->getSCEV(CI->getArgOperand(0)); + const SCEV *RHS = SE->getSCEV(CI->getArgOperand(1)); + + auto *NarrowTy = cast(LHS->getType()); + auto *WideTy = + IntegerType::get(NarrowTy->getContext(), NarrowTy->getBitWidth() * 2); + + const SCEV *A = + (SE->*Extension)((SE->*Operation)(LHS, RHS, SCEV::FlagAnyWrap), WideTy); + const SCEV *B = + (SE->*Operation)((SE->*Extension)(LHS, WideTy), + (SE->*Extension)(RHS, WideTy), SCEV::FlagAnyWrap); + + if (A != B) + return false; + + // Proved no overflow, nuke the overflow check and, if possible, the overflow + // intrinsic as well. + + BinaryOperator *NewResult = BinaryOperator::Create( + RawOp, CI->getArgOperand(0), CI->getArgOperand(1), "", CI); + + if (NoSignedOverflow) + NewResult->setHasNoSignedWrap(true); + else + NewResult->setHasNoUnsignedWrap(true); + + SmallVector ToDelete; + + for (auto *U : CI->users()) { + if (auto *EVI = dyn_cast(U)) { + if (EVI->getIndices()[0] == 1) + EVI->replaceAllUsesWith(ConstantInt::getFalse(CI->getContext())); + else { + assert(EVI->getIndices()[0] == 0 && "Only two possibilities!"); + EVI->replaceAllUsesWith(NewResult); + } + ToDelete.push_back(EVI); + } + } + + for (auto *EVI : ToDelete) + EVI->eraseFromParent(); + + if (CI->use_empty()) + CI->eraseFromParent(); + + return true; +} + /// Eliminate an operation that consumes a simple IV and has no observable /// side-effect given the range of IV values. IVOperand is guaranteed SCEVable, /// but UseInst may not be. @@ -335,6 +438,10 @@ } } + if (auto *CI = dyn_cast(UseInst)) + if (eliminateOverflowIntrinsic(CI)) + return true; + if (eliminateIdentitySCEV(UseInst, IVOperand)) return true; Index: llvm/trunk/test/Transforms/IndVarSimplify/overflow-intrinsics.ll =================================================================== --- llvm/trunk/test/Transforms/IndVarSimplify/overflow-intrinsics.ll +++ llvm/trunk/test/Transforms/IndVarSimplify/overflow-intrinsics.ll @@ -0,0 +1,137 @@ +; RUN: opt -S -indvars < %s | FileCheck %s + +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +define void @f_sadd(i8* %a) { +; CHECK-LABEL: @f_sadd( +entry: + br label %for.body + +for.cond.cleanup: ; preds = %cont + ret void + +for.body: ; preds = %entry, %cont + %i.04 = phi i32 [ 0, %entry ], [ %2, %cont ] + %idxprom = sext i32 %i.04 to i64 + %arrayidx = getelementptr inbounds i8, i8* %a, i64 %idxprom + store i8 0, i8* %arrayidx, align 1 + %0 = tail call { i32, i1 } @llvm.sadd.with.overflow.i32(i32 %i.04, i32 1) + %1 = extractvalue { i32, i1 } %0, 1 +; CHECK: for.body: +; CHECK-NOT: @llvm.sadd.with.overflow +; CHECK: br i1 false, label %trap, label %cont, !nosanitize !0 + br i1 %1, label %trap, label %cont, !nosanitize !{} + +trap: ; preds = %for.body + tail call void @llvm.trap() #2, !nosanitize !{} + unreachable, !nosanitize !{} + +cont: ; preds = %for.body + %2 = extractvalue { i32, i1 } %0, 0 + %cmp = icmp slt i32 %2, 16 + br i1 %cmp, label %for.body, label %for.cond.cleanup +} + +define void @f_uadd(i8* %a) { +; CHECK-LABEL: @f_uadd( +entry: + br label %for.body + +for.cond.cleanup: ; preds = %cont + ret void + +for.body: ; preds = %entry, %cont + %i.04 = phi i32 [ 0, %entry ], [ %2, %cont ] + %idxprom = sext i32 %i.04 to i64 + %arrayidx = getelementptr inbounds i8, i8* %a, i64 %idxprom + store i8 0, i8* %arrayidx, align 1 + %0 = tail call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 %i.04, i32 1) + %1 = extractvalue { i32, i1 } %0, 1 +; CHECK: for.body: +; CHECK-NOT: @llvm.uadd.with.overflow +; CHECK: br i1 false, label %trap, label %cont, !nosanitize !0 + br i1 %1, label %trap, label %cont, !nosanitize !{} + +trap: ; preds = %for.body + tail call void @llvm.trap(), !nosanitize !{} + unreachable, !nosanitize !{} + +cont: ; preds = %for.body + %2 = extractvalue { i32, i1 } %0, 0 + %cmp = icmp slt i32 %2, 16 + br i1 %cmp, label %for.body, label %for.cond.cleanup +} + +define void @f_ssub(i8* nocapture %a) { +; CHECK-LABEL: @f_ssub( +entry: + br label %for.body + +for.cond.cleanup: ; preds = %cont + ret void + +for.body: ; preds = %entry, %cont + %i.04 = phi i32 [ 15, %entry ], [ %2, %cont ] + %idxprom = sext i32 %i.04 to i64 + %arrayidx = getelementptr inbounds i8, i8* %a, i64 %idxprom + store i8 0, i8* %arrayidx, align 1 + %0 = tail call { i32, i1 } @llvm.ssub.with.overflow.i32(i32 %i.04, i32 1) + %1 = extractvalue { i32, i1 } %0, 1 +; CHECK: for.body: +; CHECK-NOT: @llvm.ssub.with.overflow.i32 +; CHECK: br i1 false, label %trap, label %cont, !nosanitize !0 + br i1 %1, label %trap, label %cont, !nosanitize !{} + +trap: ; preds = %for.body + tail call void @llvm.trap(), !nosanitize !{} + unreachable, !nosanitize !{} + +cont: ; preds = %for.body + %2 = extractvalue { i32, i1 } %0, 0 + %cmp = icmp sgt i32 %2, -1 + br i1 %cmp, label %for.body, label %for.cond.cleanup +} + +define void @f_usub(i8* nocapture %a) { +; CHECK-LABEL: @f_usub( +entry: + br label %for.body + +for.cond.cleanup: ; preds = %cont + ret void + +for.body: ; preds = %entry, %cont + %i.04 = phi i32 [ 15, %entry ], [ %2, %cont ] + %idxprom = sext i32 %i.04 to i64 + %arrayidx = getelementptr inbounds i8, i8* %a, i64 %idxprom + store i8 0, i8* %arrayidx, align 1 + %0 = tail call { i32, i1 } @llvm.usub.with.overflow.i32(i32 %i.04, i32 1) + %1 = extractvalue { i32, i1 } %0, 1 + +; It is theoretically possible to prove this, but SCEV cannot +; represent non-unsigned-wrapping subtraction operations. + +; CHECK: for.body: +; CHECK: [[COND:%[^ ]+]] = extractvalue { i32, i1 } %1, 1 +; CHECK-NEXT: br i1 [[COND]], label %trap, label %cont, !nosanitize !0 + br i1 %1, label %trap, label %cont, !nosanitize !{} + +trap: ; preds = %for.body + tail call void @llvm.trap(), !nosanitize !{} + unreachable, !nosanitize !{} + +cont: ; preds = %for.body + %2 = extractvalue { i32, i1 } %0, 0 + %cmp = icmp sgt i32 %2, -1 + br i1 %cmp, label %for.body, label %for.cond.cleanup +} + +declare { i32, i1 } @llvm.sadd.with.overflow.i32(i32, i32) nounwind readnone +declare { i32, i1 } @llvm.uadd.with.overflow.i32(i32, i32) nounwind readnone +declare { i32, i1 } @llvm.ssub.with.overflow.i32(i32, i32) nounwind readnone +declare { i32, i1 } @llvm.usub.with.overflow.i32(i32, i32) nounwind readnone +declare { i32, i1 } @llvm.smul.with.overflow.i32(i32, i32) nounwind readnone +declare { i32, i1 } @llvm.umul.with.overflow.i32(i32, i32) nounwind readnone + +declare void @llvm.trap() #2 Index: llvm/trunk/test/Transforms/IndVarSimplify/overflowcheck.ll =================================================================== --- llvm/trunk/test/Transforms/IndVarSimplify/overflowcheck.ll +++ llvm/trunk/test/Transforms/IndVarSimplify/overflowcheck.ll @@ -10,7 +10,7 @@ ; CHECK-LABEL: loop2: ; CHECK-NOT: extractvalue ; CHECK: add nuw -; CHECK: @llvm.sadd.with.overflow +; CHECK-NOT: @llvm.sadd.with.overflow ; CHECK-LABEL: loop3: ; CHECK-NOT: extractvalue ; CHECK: ret