diff --git a/llvm/include/llvm/ADT/APFloat.h b/llvm/include/llvm/ADT/APFloat.h --- a/llvm/include/llvm/ADT/APFloat.h +++ b/llvm/include/llvm/ADT/APFloat.h @@ -246,6 +246,8 @@ static ExponentType semanticsMinExponent(const fltSemantics &); static ExponentType semanticsMaxExponent(const fltSemantics &); static unsigned int semanticsSizeInBits(const fltSemantics &); + static unsigned int semanticsIntSizeInBits(const fltSemantics&, + bool isSigned); /// Returns the size of the floating point number (in bits) in the given /// semantics. diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -5108,7 +5108,7 @@ // same as SimplifySelectCC. N0getScalarType(); + const fltSemantics &Semantics = InputTy->getFltSemantics(); + uint32_t MinBitWidth = + APFloatBase::semanticsIntSizeInBits(Semantics, /*isSigned*/ true); + if (IntVT.getSizeInBits() >= MinBitWidth) { + Unsigned = true; + BW = PowerOf2Ceil(MinBitWidth); + return N0; + } + } + } + } + SDValue N00, N01, N02, N03; ISD::CondCode N0CC; switch (N0.getOpcode()) { @@ -5194,7 +5214,7 @@ SelectionDAG &DAG) { unsigned BW; bool Unsigned; - SDValue Fp = isSaturatingMinMax(N0, N1, N2, N3, CC, BW, Unsigned); + SDValue Fp = isSaturatingMinMax(N0, N1, N2, N3, CC, BW, Unsigned, DAG); if (!Fp || Fp.getOpcode() != ISD::FP_TO_SINT) return SDValue(); EVT FPVT = Fp.getOperand(0).getValueType(); diff --git a/llvm/lib/Support/APFloat.cpp b/llvm/lib/Support/APFloat.cpp --- a/llvm/lib/Support/APFloat.cpp +++ b/llvm/lib/Support/APFloat.cpp @@ -251,6 +251,16 @@ unsigned int APFloatBase::semanticsSizeInBits(const fltSemantics &semantics) { return semantics.sizeInBits; } + unsigned int APFloatBase::semanticsIntSizeInBits(const fltSemantics &semantics, + bool isSigned) { + // The max FP value is pow(2, MaxExponent) * (1 + MaxFraction), so we need + // at least one more bit than the MaxExponent to hold the max FP value. + unsigned int MinBitWidth = semanticsMaxExponent(semantics) + 1; + // Extra sign bit needed. + if (isSigned) + ++MinBitWidth; + return MinBitWidth; + } unsigned APFloatBase::getSizeInBits(const fltSemantics &Sem) { return Sem.sizeInBits; diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp --- a/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp @@ -501,16 +501,12 @@ // If the integer type can hold the max FP value, it is safe to cast // directly to that type. Otherwise, we may create poison via overflow // that did not exist in the original code. - // - // The max FP value is pow(2, MaxExponent) * (1 + MaxFraction), so we need - // at least one more bit than the MaxExponent to hold the max FP value. Type *InputTy = I->getOperand(0)->getType()->getScalarType(); const fltSemantics &Semantics = InputTy->getFltSemantics(); - uint32_t MinBitWidth = APFloatBase::semanticsMaxExponent(Semantics); - // Extra sign bit needed. - if (I->getOpcode() == Instruction::FPToSI) - ++MinBitWidth; - return Ty->getScalarSizeInBits() > MinBitWidth; + uint32_t MinBitWidth = + APFloatBase::semanticsIntSizeInBits(Semantics, + I->getOpcode() == Instruction::FPToSI); + return Ty->getScalarSizeInBits() >= MinBitWidth; } default: // TODO: Can handle more cases here. diff --git a/llvm/test/CodeGen/WebAssembly/fpclamptosat.ll b/llvm/test/CodeGen/WebAssembly/fpclamptosat.ll --- a/llvm/test/CodeGen/WebAssembly/fpclamptosat.ll +++ b/llvm/test/CodeGen/WebAssembly/fpclamptosat.ll @@ -191,19 +191,11 @@ define i32 @ustest_f16i32_cse(half %x) { ; CHECK-LABEL: ustest_f16i32_cse: ; CHECK: .functype ustest_f16i32_cse (f32) -> (i32) -; CHECK-NEXT: .local i64 ; CHECK-NEXT: # %bb.0: # %entry ; CHECK-NEXT: local.get 0 ; CHECK-NEXT: call __truncsfhf2 ; CHECK-NEXT: call __extendhfsf2 -; CHECK-NEXT: i64.trunc_sat_f32_s -; CHECK-NEXT: local.tee 1 -; CHECK-NEXT: i64.const 0 -; CHECK-NEXT: local.get 1 -; CHECK-NEXT: i64.const 0 -; CHECK-NEXT: i64.gt_s -; CHECK-NEXT: i64.select -; CHECK-NEXT: i32.wrap_i64 +; CHECK-NEXT: i32.trunc_sat_f32_u ; CHECK-NEXT: # fallthrough-return entry: %conv = fptosi half %x to i64 @@ -485,18 +477,11 @@ define i16 @ustest_f16i16_cse(half %x) { ; CHECK-LABEL: ustest_f16i16_cse: ; CHECK: .functype ustest_f16i16_cse (f32) -> (i32) -; CHECK-NEXT: .local i32 ; CHECK-NEXT: # %bb.0: # %entry ; CHECK-NEXT: local.get 0 ; CHECK-NEXT: call __truncsfhf2 ; CHECK-NEXT: call __extendhfsf2 -; CHECK-NEXT: i32.trunc_sat_f32_s -; CHECK-NEXT: local.tee 1 -; CHECK-NEXT: i32.const 0 -; CHECK-NEXT: local.get 1 -; CHECK-NEXT: i32.const 0 -; CHECK-NEXT: i32.gt_s -; CHECK-NEXT: i32.select +; CHECK-NEXT: i32.trunc_sat_f32_u ; CHECK-NEXT: # fallthrough-return entry: %conv = fptosi half %x to i32 @@ -607,7 +592,6 @@ ret i64 %conv6 } - define i64 @ustest_f64i64(double %x) { ; CHECK-LABEL: ustest_f64i64: ; CHECK: .functype ustest_f64i64 (f64) -> (i64) @@ -1214,19 +1198,11 @@ define i32 @ustest_f16i32_mm_cse(half %x) { ; CHECK-LABEL: ustest_f16i32_mm_cse: ; CHECK: .functype ustest_f16i32_mm_cse (f32) -> (i32) -; CHECK-NEXT: .local i64 ; CHECK-NEXT: # %bb.0: # %entry ; CHECK-NEXT: local.get 0 ; CHECK-NEXT: call __truncsfhf2 ; CHECK-NEXT: call __extendhfsf2 -; CHECK-NEXT: i64.trunc_sat_f32_s -; CHECK-NEXT: local.tee 1 -; CHECK-NEXT: i64.const 0 -; CHECK-NEXT: local.get 1 -; CHECK-NEXT: i64.const 0 -; CHECK-NEXT: i64.gt_s -; CHECK-NEXT: i64.select -; CHECK-NEXT: i32.wrap_i64 +; CHECK-NEXT: i32.trunc_sat_f32_u ; CHECK-NEXT: # fallthrough-return entry: %conv = fptosi half %x to i64 @@ -1477,18 +1453,11 @@ define i16 @ustest_f16i16_mm_cse(half %x) { ; CHECK-LABEL: ustest_f16i16_mm_cse: ; CHECK: .functype ustest_f16i16_mm_cse (f32) -> (i32) -; CHECK-NEXT: .local i32 ; CHECK-NEXT: # %bb.0: # %entry ; CHECK-NEXT: local.get 0 ; CHECK-NEXT: call __truncsfhf2 ; CHECK-NEXT: call __extendhfsf2 -; CHECK-NEXT: i32.trunc_sat_f32_s -; CHECK-NEXT: local.tee 1 -; CHECK-NEXT: i32.const 0 -; CHECK-NEXT: local.get 1 -; CHECK-NEXT: i32.const 0 -; CHECK-NEXT: i32.gt_s -; CHECK-NEXT: i32.select +; CHECK-NEXT: i32.trunc_sat_f32_u ; CHECK-NEXT: # fallthrough-return entry: %conv = fptosi half %x to i32