Index: llvm/lib/Analysis/ValueTracking.cpp =================================================================== --- llvm/lib/Analysis/ValueTracking.cpp +++ llvm/lib/Analysis/ValueTracking.cpp @@ -1527,6 +1527,22 @@ Known.makeNonNegative(); else if (Known2.isNegative() && Known3.isNegative()) Known.makeNegative(); + + // Try to evaluate the value range of the integer. + ICmpInst *CmpInst = dyn_cast(BO->use_begin()->getUser()); + if (CmpInst && dyn_cast(CmpInst->getOperand(1)) && + BO->hasNoSignedWrap() && Known2.isNonNegative()) { + KnownBits KnownEnd(BitWidth); + computeKnownBits(CmpInst->getOperand(1), KnownEnd, Depth + 1, + RecQ); + if (KnownEnd.isNonNegative()) { + unsigned FirstZeroHighBit = + BitWidth - std::min(Known2.countMinLeadingZeros(), + KnownEnd.countMinLeadingZeros()); + Known.makeNonNegative(); + Known.Zero.setBitsFrom(FirstZeroHighBit); + } + } } // (sub nsw non-negative, negative) --> non-negative Index: llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp =================================================================== --- llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp +++ llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp @@ -1720,7 +1720,7 @@ /// Return true if the cast from integer to FP can be proven to be exact for all /// possible inputs (the conversion does not lose any precision). -static bool isKnownExactCastIntToFP(CastInst &I) { +static bool isKnownExactCastIntToFP(CastInst &I, InstCombinerImpl &IC) { CastInst::CastOps Opcode = I.getOpcode(); assert((Opcode == CastInst::SIToFP || Opcode == CastInst::UIToFP) && "Unexpected cast"); @@ -1754,6 +1754,14 @@ return true; } + // If all the value is within the value range of 2^DestNumSigBits, then the + // value can be accurately represented. + KnownBits SrcKnown = llvm::computeKnownBits(Src, IC.getDataLayout()); + int LowBits = + (int)SrcTy->getScalarSizeInBits() - SrcKnown.countMinLeadingZeros(); + if (LowBits <= DestNumSigBits) + return true; + // TODO: // Try harder to find if the source integer type has less significant bits. // For example, compute number of sign bits or compute low bit mask. @@ -1937,7 +1945,7 @@ Value *Src = FPT.getOperand(0); if (isa(Src) || isa(Src)) { auto *FPCast = cast(Src); - if (isKnownExactCastIntToFP(*FPCast)) + if (isKnownExactCastIntToFP(*FPCast, *this)) return CastInst::Create(FPCast->getOpcode(), FPCast->getOperand(0), Ty); } @@ -1951,7 +1959,7 @@ Value *Src = FPExt.getOperand(0); if (isa(Src) || isa(Src)) { auto *FPCast = cast(Src); - if (isKnownExactCastIntToFP(*FPCast)) + if (isKnownExactCastIntToFP(*FPCast, *this)) return CastInst::Create(FPCast->getOpcode(), FPCast->getOperand(0), Ty); } @@ -1978,7 +1986,7 @@ // This means this is also safe for a signed input and unsigned output, since // a negative input would lead to undefined behavior. - if (!isKnownExactCastIntToFP(*OpI)) { + if (!isKnownExactCastIntToFP(*OpI, *this)) { // The first cast may not round exactly based on the source integer width // and FP width, but the overflow UB rules can still allow this to fold. // If the destination type is narrow, that means the intermediate FP value Index: llvm/test/Transforms/InstCombine/fptrunc.ll =================================================================== --- llvm/test/Transforms/InstCombine/fptrunc.ll +++ llvm/test/Transforms/InstCombine/fptrunc.ll @@ -190,3 +190,74 @@ %r = fptrunc float %x to half ret half %r } + +@array = dso_local global [101 x i32] zeroinitializer, align 4 +; Positive test - intermediate 16777215 (= 1 << 24 - 1) rounding in float type. +define void @foo_max () { +; CHECK-LABEL: @foo_max( +; CHECK-NEXT: entry: +; CHECK-NEXT: br label [[FOR_BODY:%.*]] +; CHECK: for.body: +; CHECK-NEXT: [[INDVAR_PHI:%.*]] = phi i32 [ 16777215, [[ENTRY:%.*]] ], [ [[DEC_INT:%.*]], [[FOR_BODY]] ] +; CHECK-NEXT: [[IDX:%.*]] = zext i32 [[INDVAR_PHI]] to i64 +; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [101 x i32], [101 x i32]* @array, i64 0, i64 [[IDX]] +; CHECK-NEXT: store i32 [[INDVAR_PHI]], i32* [[ARRAYIDX]], align 4 +; CHECK-NEXT: [[DEC_INT]] = add nsw i32 [[INDVAR_PHI]], -1 +; CHECK-NEXT: [[CMP_NOT:%.*]] = icmp eq i32 [[DEC_INT]], 0 +; CHECK-NEXT: br i1 [[CMP_NOT]], label [[CLEANUP:%.*]], label [[FOR_BODY]] +; CHECK: cleanup: +; CHECK-NEXT: ret void +; +entry: + br label %for.body + +for.body: ; preds = %entry, %for.body + %indvar.phi = phi i32 [ 16777215, %entry ], [ %dec.int, %for.body ] + %indvar.fp = sitofp i32 %indvar.phi to float + %indvar.si = fptosi float %indvar.fp to i32 + %idx = sext i32 %indvar.si to i64 + %arrayidx = getelementptr inbounds [101 x i32], [101 x i32]* @array, i64 0, i64 %idx + store i32 %indvar.si, i32* %arrayidx, align 4 + %dec.int = add nsw i32 %indvar.phi, -1 + %cmp = icmp ugt i32 %dec.int, 0 + br i1 %cmp, label %for.body, label %cleanup + +cleanup: ; preds = %for.body + ret void +} + +; Negative test - intermediate 16777216 (= 1 << 24) rounding in float type. +define void @float_overflow () { +; CHECK-LABEL: @float_overflow( +; CHECK-NEXT: entry: +; CHECK-NEXT: br label [[FOR_BODY:%.*]] +; CHECK: for.body: +; CHECK-NEXT: [[INDVAR_PHI:%.*]] = phi i32 [ 16777216, [[ENTRY:%.*]] ], [ [[DEC_INT:%.*]], [[FOR_BODY]] ] +; CHECK-NEXT: [[INDVAR_FP:%.*]] = sitofp i32 [[INDVAR_PHI]] to float +; CHECK-NEXT: [[INDVAR_SI:%.*]] = fptosi float [[INDVAR_FP]] to i32 +; CHECK-NEXT: [[IDX:%.*]] = sext i32 [[INDVAR_SI]] to i64 +; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [101 x i32], [101 x i32]* @array, i64 0, i64 [[IDX]] +; CHECK-NEXT: store i32 [[INDVAR_SI]], i32* [[ARRAYIDX]], align 4 +; CHECK-NEXT: [[DEC_INT]] = add nsw i32 [[INDVAR_PHI]], -1 +; CHECK-NEXT: [[CMP_NOT:%.*]] = icmp eq i32 [[DEC_INT]], 0 +; CHECK-NEXT: br i1 [[CMP_NOT]], label [[CLEANUP:%.*]], label [[FOR_BODY]] +; CHECK: cleanup: +; CHECK-NEXT: ret void +; +entry: + br label %for.body + +for.body: ; preds = %entry, %for.body + %indvar.phi = phi i32 [ 16777216, %entry ], [ %dec.int, %for.body ] + %indvar.fp = sitofp i32 %indvar.phi to float + %indvar.si = fptosi float %indvar.fp to i32 + %idx = sext i32 %indvar.si to i64 + %arrayidx = getelementptr inbounds [101 x i32], [101 x i32]* @array, i64 0, i64 %idx + store i32 %indvar.si, i32* %arrayidx, align 4 + %dec.int = add nsw i32 %indvar.phi, -1 + %cmp = icmp ugt i32 %dec.int, 0 + br i1 %cmp, label %for.body, label %cleanup + +cleanup: ; preds = %for.body + ret void +} Index: llvm/test/Transforms/InstCombine/sitofp.ll =================================================================== --- llvm/test/Transforms/InstCombine/sitofp.ll +++ llvm/test/Transforms/InstCombine/sitofp.ll @@ -223,9 +223,7 @@ define i25 @masked_input(i25 %A) { ; CHECK-LABEL: @masked_input( ; CHECK-NEXT: [[M:%.*]] = and i25 [[A:%.*]], 65535 -; CHECK-NEXT: [[B:%.*]] = uitofp i25 [[M]] to float -; CHECK-NEXT: [[C:%.*]] = fptoui float [[B]] to i25 -; CHECK-NEXT: ret i25 [[C]] +; CHECK-NEXT: ret i25 [[M]] ; %m = and i25 %A, 65535 %B = uitofp i25 %m to float Index: llvm/test/Transforms/LoopVectorize/induction.ll =================================================================== --- llvm/test/Transforms/LoopVectorize/induction.ll +++ llvm/test/Transforms/LoopVectorize/induction.ll @@ -5823,7 +5823,7 @@ ; IND-NEXT: [[C12:%.*]] = sext i32 [[C11]] to i64 ; IND-NEXT: [[C13:%.*]] = add i64 [[C5]], [[C12]] ; IND-NEXT: [[INDVARS_IV_TR:%.*]] = trunc i64 [[INDVARS_IV]] to i32 -; IND-NEXT: [[C14:%.*]] = shl i32 [[INDVARS_IV_TR]], 1 +; IND-NEXT: [[C14:%.*]] = shl nuw nsw i32 [[INDVARS_IV_TR]], 1 ; IND-NEXT: [[C15:%.*]] = add i32 [[C9]], [[C14]] ; IND-NEXT: [[C16:%.*]] = sext i32 [[C15]] to i64 ; IND-NEXT: [[C23]] = add i64 [[C13]], [[C16]] @@ -5903,7 +5903,7 @@ ; UNROLL-NEXT: [[C12:%.*]] = sext i32 [[C11]] to i64 ; UNROLL-NEXT: [[C13:%.*]] = add i64 [[C5]], [[C12]] ; UNROLL-NEXT: [[INDVARS_IV_TR:%.*]] = trunc i64 [[INDVARS_IV]] to i32 -; UNROLL-NEXT: [[C14:%.*]] = shl i32 [[INDVARS_IV_TR]], 1 +; UNROLL-NEXT: [[C14:%.*]] = shl nuw nsw i32 [[INDVARS_IV_TR]], 1 ; UNROLL-NEXT: [[C15:%.*]] = add i32 [[C9]], [[C14]] ; UNROLL-NEXT: [[C16:%.*]] = sext i32 [[C15]] to i64 ; UNROLL-NEXT: [[C23]] = add i64 [[C13]], [[C16]] @@ -6065,7 +6065,7 @@ ; INTERLEAVE-NEXT: [[C12:%.*]] = sext i32 [[C11]] to i64 ; INTERLEAVE-NEXT: [[C13:%.*]] = add i64 [[C5]], [[C12]] ; INTERLEAVE-NEXT: [[INDVARS_IV_TR:%.*]] = trunc i64 [[INDVARS_IV]] to i32 -; INTERLEAVE-NEXT: [[C14:%.*]] = shl i32 [[INDVARS_IV_TR]], 1 +; INTERLEAVE-NEXT: [[C14:%.*]] = shl nuw nsw i32 [[INDVARS_IV_TR]], 1 ; INTERLEAVE-NEXT: [[C15:%.*]] = add i32 [[C9]], [[C14]] ; INTERLEAVE-NEXT: [[C16:%.*]] = sext i32 [[C15]] to i64 ; INTERLEAVE-NEXT: [[C23]] = add i64 [[C13]], [[C16]]