diff --git a/llvm/lib/Transforms/Scalar/SCCP.cpp b/llvm/lib/Transforms/Scalar/SCCP.cpp --- a/llvm/lib/Transforms/Scalar/SCCP.cpp +++ b/llvm/lib/Transforms/Scalar/SCCP.cpp @@ -67,10 +67,15 @@ STATISTIC(NumInstRemoved, "Number of instructions removed"); STATISTIC(NumDeadBlocks , "Number of basic blocks unreachable"); +STATISTIC(NumInstReplaced, + "Number of instructions replaced with (simpler) instruction"); STATISTIC(IPNumInstRemoved, "Number of instructions removed by IPSCCP"); STATISTIC(IPNumArgsElimed ,"Number of arguments constant propagated by IPSCCP"); STATISTIC(IPNumGlobalConst, "Number of globals found to be constant by IPSCCP"); +STATISTIC( + IPNumInstReplaced, + "Number of instructions replaced with (simpler) instruction by IPSCCP"); // The maximum number of range extensions allowed for operations requiring // widening. @@ -1296,10 +1301,6 @@ NewCR = NewCR.intersectWith(OriginalCR); addAdditionalUser(CmpOp1, &CB); - // TODO: Actually filp MayIncludeUndef for the created range to false, - // once most places in the optimizer respect the branches on - // undef/poison are UB rule. The reason why the new range cannot be - // undef is as follows below: // The new range is based on a branch condition. That guarantees that // neither of the compare operands can be undef in the branch targets, // unless we have conditions that are always true/false (e.g. icmp ule @@ -1307,7 +1308,7 @@ // inferred, but the branch will get folded accordingly anyways. mergeInValue( IV, &CB, - ValueLatticeElement::getRange(NewCR, /*MayIncludeUndef=*/true)); + ValueLatticeElement::getRange(NewCR, /*MayIncludeUndef=*/false)); return; } else if (Pred == CmpInst::ICMP_EQ && CondVal.isConstant()) { // For non-integer values or integer constant expressions, only @@ -1607,7 +1608,9 @@ } static bool simplifyInstsInBlock(SCCPSolver &Solver, BasicBlock &BB, - Statistic &InstRemovedStat) { + SmallPtrSetImpl &InsertedValues, + Statistic &InstRemovedStat, + Statistic &InstReplacedStat) { bool MadeChanges = false; for (Instruction &Inst : make_early_inc_range(BB)) { if (Inst.getType()->isVoidTy()) @@ -1618,6 +1621,21 @@ // Hey, we just changed something! MadeChanges = true; ++InstRemovedStat; + } else if (isa(&Inst)) { + Value *ExtOp = Inst.getOperand(0); + if (isa(ExtOp) || InsertedValues.count(ExtOp)) + continue; + const ValueLatticeElement &IV = Solver.getLatticeValueFor(ExtOp); + if (!IV.isConstantRange(/*UndefAllowed=*/false)) + continue; + if (IV.getConstantRange().isAllNonNegative()) { + auto *ZExt = new ZExtInst(ExtOp, Inst.getType(), "", &Inst); + InsertedValues.insert(ZExt); + Inst.replaceAllUsesWith(ZExt); + Inst.eraseFromParent(); + InstReplacedStat++; + MadeChanges = true; + } } } return MadeChanges; @@ -1653,6 +1671,7 @@ // delete their contents now. Note that we cannot actually delete the blocks, // as we cannot modify the CFG of the function. + SmallPtrSet InsertedValues; for (BasicBlock &BB : F) { if (!Solver.isBlockExecutable(&BB)) { LLVM_DEBUG(dbgs() << " BasicBlock Dead:" << BB); @@ -1664,7 +1683,8 @@ continue; } - MadeChanges |= simplifyInstsInBlock(Solver, BB, NumInstRemoved); + MadeChanges |= simplifyInstsInBlock(Solver, BB, InsertedValues, + NumInstRemoved, NumInstReplaced); } return MadeChanges; @@ -1893,6 +1913,7 @@ } } + SmallPtrSet InsertedValues; for (BasicBlock &BB : F) { if (!Solver.isBlockExecutable(&BB)) { LLVM_DEBUG(dbgs() << " BasicBlock Dead:" << BB); @@ -1905,7 +1926,8 @@ continue; } - MadeChanges |= simplifyInstsInBlock(Solver, BB, IPNumInstRemoved); + MadeChanges |= simplifyInstsInBlock(Solver, BB, InsertedValues, + IPNumInstRemoved, IPNumInstReplaced); } DomTreeUpdater DTU = Solver.getDTU(F); diff --git a/llvm/test/Transforms/SCCP/ip-ranges-casts.ll b/llvm/test/Transforms/SCCP/ip-ranges-casts.ll --- a/llvm/test/Transforms/SCCP/ip-ranges-casts.ll +++ b/llvm/test/Transforms/SCCP/ip-ranges-casts.ll @@ -109,9 +109,9 @@ ; x = [100, 301) define internal i1 @f.sext(i32 %x, i32 %y) { ; CHECK-LABEL: define internal i1 @f.sext(i32 %x, i32 %y) { -; CHECK-NEXT: %t.1 = sext i32 %x to i64 -; CHECK-NEXT: %c.2 = icmp sgt i64 %t.1, 299 -; CHECK-NEXT: %c.4 = icmp slt i64 %t.1, 101 +; CHECK-NEXT: [[T_1:%.*]] = zext i32 %x to i64 +; CHECK-NEXT: %c.2 = icmp sgt i64 [[T_1]], 299 +; CHECK-NEXT: %c.4 = icmp slt i64 [[T_1]], 101 ; CHECK-NEXT: %res.1 = add i1 false, %c.2 ; CHECK-NEXT: %res.2 = add i1 %res.1, false ; CHECK-NEXT: %res.3 = add i1 %res.2, %c.4 diff --git a/llvm/test/Transforms/SCCP/ip-ranges-sext.ll b/llvm/test/Transforms/SCCP/ip-ranges-sext.ll --- a/llvm/test/Transforms/SCCP/ip-ranges-sext.ll +++ b/llvm/test/Transforms/SCCP/ip-ranges-sext.ll @@ -6,8 +6,8 @@ ; CHECK-NEXT: [[C:%.*]] = icmp sgt i32 [[X:%.*]], 0 ; CHECK-NEXT: br i1 [[C]], label [[TRUE:%.*]], label [[FALSE:%.*]] ; CHECK: true: -; CHECK-NEXT: [[EXT_1:%.*]] = sext i32 [[X]] to i64 -; CHECK-NEXT: ret i64 [[EXT_1]] +; CHECK-NEXT: [[TMP1:%.*]] = zext i32 [[X]] to i64 +; CHECK-NEXT: ret i64 [[TMP1]] ; CHECK: false: ; CHECK-NEXT: [[EXT_2:%.*]] = sext i32 [[X]] to i64 ; CHECK-NEXT: ret i64 [[EXT_2]] @@ -29,8 +29,8 @@ ; CHECK-NEXT: [[C:%.*]] = icmp sge i32 [[X:%.*]], 0 ; CHECK-NEXT: br i1 [[C]], label [[TRUE:%.*]], label [[FALSE:%.*]] ; CHECK: true: -; CHECK-NEXT: [[EXT_1:%.*]] = sext i32 [[X]] to i64 -; CHECK-NEXT: ret i64 [[EXT_1]] +; CHECK-NEXT: [[TMP1:%.*]] = zext i32 [[X]] to i64 +; CHECK-NEXT: ret i64 [[TMP1]] ; CHECK: false: ; CHECK-NEXT: [[EXT_2:%.*]] = sext i32 [[X]] to i64 ; CHECK-NEXT: ret i64 [[EXT_2]] @@ -105,8 +105,8 @@ define i64 @test5(i32 %x) { ; CHECK-LABEL: @test5( ; CHECK-NEXT: [[P:%.*]] = and i32 [[X:%.*]], 15 -; CHECK-NEXT: [[EXT:%.*]] = sext i32 [[P]] to i64 -; CHECK-NEXT: ret i64 [[EXT]] +; CHECK-NEXT: [[TMP1:%.*]] = zext i32 [[P]] to i64 +; CHECK-NEXT: ret i64 [[TMP1]] ; %p = and i32 %x, 15 %ext = sext i32 %p to i64 @@ -126,8 +126,8 @@ define i64 @test7(i16 %x) { ; CHECK-LABEL: @test7( ; CHECK-NEXT: [[P:%.*]] = and i16 [[X:%.*]], 15 -; CHECK-NEXT: [[EXT_1:%.*]] = sext i16 [[P]] to i32 -; CHECK-NEXT: [[EXT_2:%.*]] = sext i32 [[EXT_1]] to i64 +; CHECK-NEXT: [[TMP1:%.*]] = zext i16 [[P]] to i32 +; CHECK-NEXT: [[EXT_2:%.*]] = sext i32 [[TMP1]] to i64 ; CHECK-NEXT: ret i64 [[EXT_2]] ; %p = and i16 %x, 15 diff --git a/llvm/test/Transforms/SCCP/ranges-sext.ll b/llvm/test/Transforms/SCCP/ranges-sext.ll --- a/llvm/test/Transforms/SCCP/ranges-sext.ll +++ b/llvm/test/Transforms/SCCP/ranges-sext.ll @@ -35,8 +35,8 @@ define i64 @test2(i32 %x) { ; CHECK-LABEL: @test2( ; CHECK-NEXT: [[P:%.*]] = and i32 [[X:%.*]], 15 -; CHECK-NEXT: [[EXT:%.*]] = sext i32 [[P]] to i64 -; CHECK-NEXT: ret i64 [[EXT]] +; CHECK-NEXT: [[TMP1:%.*]] = zext i32 [[P]] to i64 +; CHECK-NEXT: ret i64 [[TMP1]] ; %p = and i32 %x, 15 %ext = sext i32 %p to i64 @@ -54,8 +54,8 @@ ; CHECK-NEXT: br label [[EXIT]] ; CHECK: exit: ; CHECK-NEXT: [[P:%.*]] = phi i32 [ 0, [[TRUE_1]] ], [ 1, [[TRUE_2]] ], [ 3, [[FALSE]] ] -; CHECK-NEXT: [[EXT:%.*]] = sext i32 [[P]] to i64 -; CHECK-NEXT: ret i64 [[EXT]] +; CHECK-NEXT: [[TMP1:%.*]] = zext i32 [[P]] to i64 +; CHECK-NEXT: ret i64 [[TMP1]] ; br i1 %c.1, label %true.1, label %false diff --git a/llvm/test/Transforms/SCCP/widening.ll b/llvm/test/Transforms/SCCP/widening.ll --- a/llvm/test/Transforms/SCCP/widening.ll +++ b/llvm/test/Transforms/SCCP/widening.ll @@ -451,7 +451,7 @@ ; SCCP-NEXT: [[TMP7:%.*]] = sub i64 3, [[TMP6]] ; SCCP-NEXT: [[TMP8:%.*]] = shl i64 [[TMP7]], 1 ; SCCP-NEXT: [[TMP9:%.*]] = trunc i64 [[TMP8]] to i32 -; SCCP-NEXT: [[TMP10:%.*]] = sext i32 [[TMP9]] to i64 +; SCCP-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 ; SCCP-NEXT: br label [[BB11:%.*]] ; SCCP: bb11: ; SCCP-NEXT: [[TMP12:%.*]] = phi i64 [ [[TMP10]], [[BB4]] ], [ [[TMP17:%.*]], [[BB18:%.*]] ] @@ -489,7 +489,7 @@ ; IPSCCP-NEXT: [[TMP7:%.*]] = sub i64 3, [[TMP6]] ; IPSCCP-NEXT: [[TMP8:%.*]] = shl i64 [[TMP7]], 1 ; IPSCCP-NEXT: [[TMP9:%.*]] = trunc i64 [[TMP8]] to i32 -; IPSCCP-NEXT: [[TMP10:%.*]] = sext i32 [[TMP9]] to i64 +; IPSCCP-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 ; IPSCCP-NEXT: br label [[BB11:%.*]] ; IPSCCP: bb11: ; IPSCCP-NEXT: [[TMP12:%.*]] = phi i64 [ [[TMP10]], [[BB4]] ], [ [[TMP17:%.*]], [[BB18:%.*]] ]