diff --git a/llvm/lib/Analysis/ConstantFolding.cpp b/llvm/lib/Analysis/ConstantFolding.cpp --- a/llvm/lib/Analysis/ConstantFolding.cpp +++ b/llvm/lib/Analysis/ConstantFolding.cpp @@ -39,6 +39,7 @@ #include "llvm/IR/Instruction.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/Intrinsics.h" +#include "llvm/IR/IntrinsicsAMDGPU.h" #include "llvm/IR/IntrinsicsX86.h" #include "llvm/IR/Operator.h" #include "llvm/IR/Type.h" @@ -1458,6 +1459,7 @@ case Intrinsic::convert_from_fp16: case Intrinsic::convert_to_fp16: case Intrinsic::bitreverse: + case Intrinsic::amdgcn_fmul_legacy: case Intrinsic::x86_sse_cvtss2si: case Intrinsic::x86_sse_cvtss2si64: case Intrinsic::x86_sse_cvttss2si: @@ -2082,6 +2084,16 @@ return ConstantFP::get(Ty->getContext(), maximum(C1, C2)); } + if (IntrinsicID == Intrinsic::amdgcn_fmul_legacy) { + const APFloat &C1 = Op1->getValueAPF(); + const APFloat &C2 = Op2->getValueAPF(); + // The legacy behaviour is that multiplying zero by anything, even NaN + // or infinity, gives +0.0. + if (C1.isZero() || C2.isZero()) + return ConstantFP::getNullValue(Ty); + return ConstantFP::get(Ty->getContext(), C1 * C2); + } + if (!TLI) return nullptr; diff --git a/llvm/test/Transforms/ConstProp/amdgpu.ll b/llvm/test/Transforms/ConstProp/amdgpu.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/ConstProp/amdgpu.ll @@ -0,0 +1,45 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt < %s -constprop -S | FileCheck %s +; REQUIRES: amdgpu-registered-target + +declare float @llvm.amdgcn.fmul.legacy(float, float) + +define void @test(float* %p) { +; CHECK-LABEL: @test( +; CHECK-NEXT: store volatile float 6.000000e+00, float* [[P:%.*]] +; CHECK-NEXT: store volatile float 0.000000e+00, float* [[P]] +; CHECK-NEXT: store volatile float 0.000000e+00, float* [[P]] +; CHECK-NEXT: store volatile float 0.000000e+00, float* [[P]] +; CHECK-NEXT: store volatile float 0.000000e+00, float* [[P]] +; CHECK-NEXT: store volatile float 0.000000e+00, float* [[P]] +; CHECK-NEXT: store volatile float 0.000000e+00, float* [[P]] +; CHECK-NEXT: store volatile float 0.000000e+00, float* [[P]] +; CHECK-NEXT: store volatile float 0.000000e+00, float* [[P]] +; CHECK-NEXT: store volatile float 0.000000e+00, float* [[P]] +; CHECK-NEXT: store volatile float 0.000000e+00, float* [[P]] +; CHECK-NEXT: ret void +; + %a = call float @llvm.amdgcn.fmul.legacy(float +2.0, float +3.0) + store volatile float %a, float* %p + %b = call float @llvm.amdgcn.fmul.legacy(float +2.0, float +0.0) + store volatile float %b, float* %p + %c = call float @llvm.amdgcn.fmul.legacy(float +2.0, float -0.0) + store volatile float %c, float* %p + %d = call float @llvm.amdgcn.fmul.legacy(float +0.0, float +0.0) + store volatile float %d, float* %p + %e = call float @llvm.amdgcn.fmul.legacy(float +0.0, float -0.0) + store volatile float %e, float* %p + %f = call float @llvm.amdgcn.fmul.legacy(float -0.0, float +0.0) + store volatile float %f, float* %p + %g = call float @llvm.amdgcn.fmul.legacy(float -0.0, float -0.0) + store volatile float %g, float* %p + %h = call float @llvm.amdgcn.fmul.legacy(float +0.0, float 0x7ff0000000000000) ; +inf + store volatile float %h, float* %p + %i = call float @llvm.amdgcn.fmul.legacy(float 0xfff0000000000000, float +0.0) ; -inf + store volatile float %i, float* %p + %j = call float @llvm.amdgcn.fmul.legacy(float 0x7ff0001000000000, float -0.0) ; +nan + store volatile float %j, float* %p + %k = call float @llvm.amdgcn.fmul.legacy(float -0.0, float 0xfff0000100000000) ; -nan + store volatile float %k, float* %p + ret void +}