diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp --- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp @@ -5385,8 +5385,10 @@ EVT VT = Op.getValueType(); const SDNodeFlags Flags = Op->getFlags(); const TargetOptions &Options = DAG.getTarget().Options; - if (!Op.hasOneUse() && !(Op.getOpcode() == ISD::FP_EXTEND && - isFPExtFree(VT, Op.getOperand(0).getValueType()))) + if (!Op.hasOneUse() && + !(Op.getOpcode() == ISD::FP_EXTEND && + isFPExtFree(VT, Op.getOperand(0).getValueType())) && + !(Op.getOpcode() == ISD::ConstantFP)) return 0; // Don't recurse exponentially. diff --git a/llvm/test/CodeGen/AMDGPU/const-multiuse-tl.ll b/llvm/test/CodeGen/AMDGPU/const-multiuse-tl.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/const-multiuse-tl.ll @@ -0,0 +1,54 @@ +; ModuleID = 'foo-new.bc' +source_filename = "foo.ll" + +define amdgpu_ps void @main(float addrspace(6)* %arg1) #0 { +main_body: + %tmp2 = load float, float addrspace(6)* %arg1 + %tmp3 = call nsz float @llvm.floor.f32(float undef) #2 + %tmp4 = fptosi float %tmp3 to i32 + %tmp5 = add i32 0, %tmp4 + %tmp6 = sub i32 0, %tmp5 + %tmp7 = sitofp i32 %tmp4 to float + %tmp8 = fmul nsz float %tmp7, 0x3FEBB67AE0000000 + %tmp9 = sitofp i32 %tmp6 to float + %tmp10 = call nsz float @llvm.fmuladd.f32(float %tmp9, float 0xBFEBB67AE0000000, float %tmp8) #2 + %tmp11 = fsub nsz float 0.000000e+00, %tmp10 + %tmp12 = call nsz float @llvm.fmuladd.f32(float %tmp11, float 5.000000e-01, float 5.000000e-01) #2 + %tmp13 = call nsz float @llvm.fmuladd.f32(float 0.000000e+00, float %tmp2, float 5.000000e-01) #2 + %tmp14 = call nsz float @llvm.floor.f32(float %tmp13) #2 + %tmp15 = fptosi float %tmp14 to i32 + %tmp16 = icmp eq i32 %tmp15, 0 + br i1 %tmp16, label %endif06, label %if04 + +if04: ; preds = %main_body + %tmp17 = fadd nsz float %tmp12, -5.000000e-01 + %tmp18 = fneg nsz float %tmp17 + %tmp19 = fmul nsz float 0.000000e+00, %tmp18 + %tmp20 = call nsz float @llvm.fmuladd.f32(float 0.000000e+00, float 0.000000e+00, float %tmp19) #2 + %tmp21 = fadd nsz float %tmp20, 5.000000e-01 + %tmp22 = fadd nsz float %tmp21, 0.000000e+00 + %tmp23 = fmul nsz float %tmp22, 0.000000e+00 + %tmp24 = call nsz <4 x float> @llvm.amdgcn.image.sample.2d.v4f32.f32(i32 15, float %tmp23, float undef, <8 x i32> undef, <4 x i32> undef, i1 false, i32 0, i32 0) #4 + %tmp25 = extractelement <4 x float> %tmp24, i32 3 + %tmp26 = fsub nsz float 1.000000e+00, %tmp25 + %tmp27 = call nsz float @llvm.fmuladd.f32(float undef, float %tmp26, float undef) #2 + unreachable + +endif06: ; preds = %main_body + ret void +} + +; Function Attrs: nounwind readonly +declare <4 x float> @llvm.amdgcn.image.sample.2d.v4f32.f32(i32 immarg, float, float, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #1 + +; Function Attrs: nounwind readnone speculatable willreturn +declare float @llvm.floor.f32(float) #3 + +; Function Attrs: nounwind readnone speculatable willreturn +declare float @llvm.fmuladd.f32(float, float, float) #3 + +attributes #0 = { "no-signed-zeros-fp-math"="true" } +attributes #1 = { nounwind readonly } +attributes #2 = { nounwind readnone } +attributes #3 = { nounwind readnone speculatable willreturn } +attributes #4 = { convergent nounwind readnone }