Index: llvm/include/llvm/IR/PatternMatch.h =================================================================== --- llvm/include/llvm/IR/PatternMatch.h +++ llvm/include/llvm/IR/PatternMatch.h @@ -1525,6 +1525,16 @@ return CastClass_match(Op); } +template +inline CastClass_match m_FPToUI(const OpTy &Op) { + return CastClass_match(Op); +} + +template +inline CastClass_match m_FPToSI(const OpTy &Op) { + return CastClass_match(Op); +} + /// Matches FPTrunc template inline CastClass_match m_FPTrunc(const OpTy &Op) { Index: llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp =================================================================== --- llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp +++ llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp @@ -1735,8 +1735,26 @@ return nullptr; } -Instruction *InstCombiner::visitFPExt(CastInst &CI) { - return commonCastTransforms(CI); +Instruction *InstCombiner::visitFPExt(CastInst &FPExt) { + Value *X = FPExt.getOperand(0); + + // If a value is being cast to integer and to the same or wider FP type before + // being extended, the extend can be folded into the cast from integer to FP. + // The cast from integer to FP must be exact in the intermediate FP type (and + // therefore also in the final FP type). If the cast to integer had + // overflowed, the result would be poison. Example: + // fpext (sitofp (fptosi half F to iN) to float) to double --> + // sitofp (fptosi half F to iN) to double + Value *I, *F; + if (match(X, m_SIToFP(m_Value(I))) && match(I, m_FPToSI(m_Value(F))) && + F->getType()->getFPMantissaWidth() <= X->getType()->getFPMantissaWidth()) + return CastInst::Create(CastInst::SIToFP, I, FPExt.getType()); + + if (match(X, m_UIToFP(m_Value(I))) && match(I, m_FPToUI(m_Value(F))) && + F->getType()->getFPMantissaWidth() <= X->getType()->getFPMantissaWidth()) + return CastInst::Create(CastInst::UIToFP, I, FPExt.getType()); + + return commonCastTransforms(FPExt); } // fpto{s/u}i({u/s}itofp(X)) --> X or zext(X) or sext(X) or trunc(X) Index: llvm/test/Transforms/InstCombine/fpextend.ll =================================================================== --- llvm/test/Transforms/InstCombine/fpextend.ll +++ llvm/test/Transforms/InstCombine/fpextend.ll @@ -262,8 +262,7 @@ define double @FtoItoFtoF_f32_s32_f32_f64(float %f) { ; CHECK-LABEL: @FtoItoFtoF_f32_s32_f32_f64( ; CHECK-NEXT: [[I:%.*]] = fptosi float [[F:%.*]] to i32 -; CHECK-NEXT: [[X:%.*]] = sitofp i32 [[I]] to float -; CHECK-NEXT: [[R:%.*]] = fpext float [[X]] to double +; CHECK-NEXT: [[R:%.*]] = sitofp i32 [[I]] to double ; CHECK-NEXT: ret double [[R]] ; %i = fptosi float %f to i32 @@ -283,7 +282,7 @@ ; CHECK-NEXT: call void @use_i32(i32 [[I]]) ; CHECK-NEXT: [[X:%.*]] = uitofp i32 [[I]] to float ; CHECK-NEXT: call void @use_f32(float [[X]]) -; CHECK-NEXT: [[R:%.*]] = fpext float [[X]] to double +; CHECK-NEXT: [[R:%.*]] = uitofp i32 [[I]] to double ; CHECK-NEXT: ret double [[R]] ; %i = fptoui float %f to i32 @@ -299,8 +298,7 @@ define <3 x double> @FtoItoFtoF_v3f16_v3s32_v3f32_v3f64(<3 x half> %f) { ; CHECK-LABEL: @FtoItoFtoF_v3f16_v3s32_v3f32_v3f64( ; CHECK-NEXT: [[I:%.*]] = fptosi <3 x half> [[F:%.*]] to <3 x i32> -; CHECK-NEXT: [[X:%.*]] = sitofp <3 x i32> [[I]] to <3 x float> -; CHECK-NEXT: [[R:%.*]] = fpext <3 x float> [[X]] to <3 x double> +; CHECK-NEXT: [[R:%.*]] = sitofp <3 x i32> [[I]] to <3 x double> ; CHECK-NEXT: ret <3 x double> [[R]] ; %i = fptosi <3 x half> %f to <3 x i32> @@ -309,11 +307,12 @@ ret <3 x double> %r } +; Wider than double is ok. + define fp128 @FtoItoFtoF_f32_s64_f64_f128(float %f) { ; CHECK-LABEL: @FtoItoFtoF_f32_s64_f64_f128( ; CHECK-NEXT: [[I:%.*]] = fptosi float [[F:%.*]] to i64 -; CHECK-NEXT: [[X:%.*]] = sitofp i64 [[I]] to double -; CHECK-NEXT: [[R:%.*]] = fpext double [[X]] to fp128 +; CHECK-NEXT: [[R:%.*]] = sitofp i64 [[I]] to fp128 ; CHECK-NEXT: ret fp128 [[R]] ; %i = fptosi float %f to i64 @@ -322,11 +321,12 @@ ret fp128 %r } +; Target-specific type is ok. + define x86_fp80 @FtoItoFtoF_f64_u32_f64_f80(double %f) { ; CHECK-LABEL: @FtoItoFtoF_f64_u32_f64_f80( ; CHECK-NEXT: [[I:%.*]] = fptoui double [[F:%.*]] to i32 -; CHECK-NEXT: [[X:%.*]] = uitofp i32 [[I]] to double -; CHECK-NEXT: [[R:%.*]] = fpext double [[X]] to x86_fp80 +; CHECK-NEXT: [[R:%.*]] = uitofp i32 [[I]] to x86_fp80 ; CHECK-NEXT: ret x86_fp80 [[R]] ; %i = fptoui double %f to i32 @@ -335,11 +335,12 @@ ret x86_fp80 %r } +; Weird target-specific type is ok (not possible to extend *from* that type). + define ppc_fp128 @FtoItoFtoF_f64_u32_f64_p128(double %f) { ; CHECK-LABEL: @FtoItoFtoF_f64_u32_f64_p128( ; CHECK-NEXT: [[I:%.*]] = fptoui double [[F:%.*]] to i32 -; CHECK-NEXT: [[X:%.*]] = uitofp i32 [[I]] to double -; CHECK-NEXT: [[R:%.*]] = fpext double [[X]] to ppc_fp128 +; CHECK-NEXT: [[R:%.*]] = uitofp i32 [[I]] to ppc_fp128 ; CHECK-NEXT: ret ppc_fp128 [[R]] ; %i = fptoui double %f to i32 @@ -348,6 +349,8 @@ ret ppc_fp128 %r } +; TODO: unsigned to signed is ok because signed int has smaller magnitude. + define double @FtoItoFtoF_f32_us32_f32_f64(float %f) { ; CHECK-LABEL: @FtoItoFtoF_f32_us32_f32_f64( ; CHECK-NEXT: [[I:%.*]] = fptoui float [[F:%.*]] to i32 @@ -361,6 +364,8 @@ ret double %r } +; Negative test: consider -1.0 + define double @FtoItoFtoF_f32_su32_f32_f64(float %f) { ; CHECK-LABEL: @FtoItoFtoF_f32_su32_f32_f64( ; CHECK-NEXT: [[I:%.*]] = fptosi float [[F:%.*]] to i32