Index: llvm/lib/Transforms/Scalar/IndVarSimplify.cpp =================================================================== --- llvm/lib/Transforms/Scalar/IndVarSimplify.cpp +++ llvm/lib/Transforms/Scalar/IndVarSimplify.cpp @@ -1123,8 +1123,27 @@ ExtendKind ExtKind = getExtendKind(NarrowDef); bool CanSignExtend = ExtKind == SignExtended && OBO->hasNoSignedWrap(); bool CanZeroExtend = ExtKind == ZeroExtended && OBO->hasNoUnsignedWrap(); - if (!CanSignExtend && !CanZeroExtend) - return false; + auto AnotherOpExtKind = ExtKind; + if (!CanSignExtend && !CanZeroExtend) { + // Because InstCombine turns 'sub nuw' to 'add' losing the no-wrap flag, we + // will most likely not see it. Let's try to prove it. + if (OpCode != Instruction::Add) + return false; + if (ExtKind != ZeroExtended) + return false; + const SCEV *LHS = SE->getSCEV(OBO->getOperand(0)); + const SCEV *RHS = SE->getSCEV(OBO->getOperand(1)); + auto GEPred = ICmpInst::ICMP_UGE; + if (!SE->isKnownNegative(RHS)) + return false; + bool ProvedSubNUW = SE->isKnownPredicateAt( + GEPred, LHS, SE->getNegativeSCEV(RHS), NarrowUse); + if (!ProvedSubNUW) + return false; + // In fact, our 'add' is 'sub nuw'. We will need to widen the 2nd operand as + // neg(zext(neg(op))), which is basically sext(op). + AnotherOpExtKind = SignExtended; + } // Verifying that Defining operand is an AddRec const SCEV *Op1 = SE->getSCEV(WideDef); @@ -1132,14 +1151,20 @@ if (!AddRecOp1 || AddRecOp1->getLoop() != L) return false; + // Check that all uses are either s/zext, or narrow def (in case of we are + // widening the IV increment). if (ExtKind == SignExtended) { for (Use &U : NarrowUse->uses()) { + if (U.getUser() == NarrowDef) + continue; SExtInst *User = dyn_cast(U.getUser()); if (!User || User->getType() != WideType) return false; } } else { // ExtKind == ZeroExtended for (Use &U : NarrowUse->uses()) { + if (U.getUser() == NarrowDef) + continue; ZExtInst *User = dyn_cast(U.getUser()); if (!User || User->getType() != WideType) return false; @@ -1152,11 +1177,11 @@ Value *LHS = (NarrowUse->getOperand(0) == NarrowDef) ? WideDef : createExtendInst(NarrowUse->getOperand(0), WideType, - ExtKind, NarrowUse); + AnotherOpExtKind, NarrowUse); Value *RHS = (NarrowUse->getOperand(1) == NarrowDef) ? WideDef : createExtendInst(NarrowUse->getOperand(1), WideType, - ExtKind, NarrowUse); + AnotherOpExtKind, NarrowUse); auto *NarrowBO = cast(NarrowUse); auto *WideBO = BinaryOperator::Create(NarrowBO->getOpcode(), LHS, RHS, @@ -1167,6 +1192,9 @@ ExtendKindMap[NarrowUse] = ExtKind; for (Use &U : NarrowUse->uses()) { + // Ignore narrow def: it will be removed after the transform. + if (U.getUser() == NarrowDef) + continue; Instruction *User = nullptr; if (ExtKind == SignExtended) User = cast(U.getUser()); Index: llvm/test/Transforms/IndVarSimplify/widen-loop-comp.ll =================================================================== --- llvm/test/Transforms/IndVarSimplify/widen-loop-comp.ll +++ llvm/test/Transforms/IndVarSimplify/widen-loop-comp.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt < %s -indvars -S | FileCheck %s +; RUN: opt < %s -indvars -scalar-evolution-use-expensive-range-sharpening -S | FileCheck %s target triple = "aarch64--linux-gnu" ; Provide legal integer types. @@ -607,13 +607,10 @@ ; CHECK-NEXT: [[COND:%.*]] = icmp eq i64 [[INDVARS_IV]], 0 ; CHECK-NEXT: br i1 [[COND]], label [[EXIT:%.*]], label [[BACKEDGE]] ; CHECK: backedge: -; CHECK-NEXT: [[TMP1:%.*]] = trunc i64 [[INDVARS_IV]] to i32 -; CHECK-NEXT: [[IV_NEXT:%.*]] = add i32 [[TMP1]], -1 -; CHECK-NEXT: [[INDEX:%.*]] = zext i32 [[IV_NEXT]] to i64 -; CHECK-NEXT: [[STORE_ADDR:%.*]] = getelementptr i32, i32* [[P:%.*]], i64 [[INDEX]] +; CHECK-NEXT: [[TMP1:%.*]] = add nsw i64 [[INDVARS_IV]], -1 +; CHECK-NEXT: [[STORE_ADDR:%.*]] = getelementptr i32, i32* [[P:%.*]], i64 [[TMP1]] ; CHECK-NEXT: store i32 1, i32* [[STORE_ADDR]], align 4 -; CHECK-NEXT: [[LOAD_ADDR:%.*]] = getelementptr i32, i32* [[Q:%.*]], i64 [[INDEX]] -; CHECK-NEXT: [[STOP:%.*]] = load i32, i32* [[Q]], align 4 +; CHECK-NEXT: [[STOP:%.*]] = load i32, i32* [[Q:%.*]], align 4 ; CHECK-NEXT: [[LOOP_COND:%.*]] = icmp eq i32 [[STOP]], 0 ; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nsw i64 [[INDVARS_IV]], -1 ; CHECK-NEXT: br i1 [[LOOP_COND]], label [[LOOP]], label [[FAILURE:%.*]]