Index: lib/CodeGen/GlobalISel/LegalizerHelper.cpp =================================================================== --- lib/CodeGen/GlobalISel/LegalizerHelper.cpp +++ lib/CodeGen/GlobalISel/LegalizerHelper.cpp @@ -1463,9 +1463,8 @@ auto Zero = MIRBuilder.buildFConstant(Ty, ZeroForNegation); unsigned SubByReg = MI.getOperand(1).getReg(); unsigned ZeroReg = Zero->getOperand(0).getReg(); - MachineInstr *SrcMI = MRI.getVRegDef(SubByReg); MIRBuilder.buildInstr(TargetOpcode::G_FSUB, {Res}, {ZeroReg, SubByReg}, - SrcMI->getFlags()); + MI.getFlags()); MI.eraseFromParent(); return Legalized; } Index: test/CodeGen/AMDGPU/GlobalISel/irtranslator-fast-math-flags.ll =================================================================== --- /dev/null +++ test/CodeGen/AMDGPU/GlobalISel/irtranslator-fast-math-flags.ll @@ -0,0 +1,31 @@ +; RUN: llc -march=amdgcn -mcpu=fiji -O0 -stop-after=irtranslator -global-isel %s -o - | FileCheck %s + +; Check flags are preserved for a regular instruction. +; CHECK-LABEL: name: fadd_nnan +; CHECK: nnan G_FADD +define amdgpu_kernel void @fadd_nnan(float %arg0, float %arg1) { + %res = fadd nnan float %arg0, %arg1 + store float %res, float addrspace(1)* undef + ret void +} + +; Check flags are preserved for a specially handled intrinsic +; CHECK-LABEL: name: fma_fast +; CHECK: nnan ninf nsz arcp contract afn reassoc G_FMA +define amdgpu_kernel void @fma_fast(float %arg0, float %arg1, float %arg2) { + %res = call fast float @llvm.fma.f32(float %arg0, float %arg1, float %arg2) + store float %res, float addrspace(1)* undef + ret void +} + +; Check flags are preserved for an arbitrarry target intrinsic +; CHECK-LABEL: name: rcp_nsz +; CHECK: = nsz G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), %8(s32) +define amdgpu_kernel void @rcp_nsz(float %arg0) { + %res = call nsz float @llvm.amdgcn.rcp.f32 (float %arg0) + store float %res, float addrspace(1)* undef + ret void +} + +declare float @llvm.fma.f32(float, float, float) +declare float @llvm.amdgcn.rcp.f32(float) Index: test/CodeGen/AMDGPU/GlobalISel/legalize-fsub.mir =================================================================== --- test/CodeGen/AMDGPU/GlobalISel/legalize-fsub.mir +++ test/CodeGen/AMDGPU/GlobalISel/legalize-fsub.mir @@ -59,6 +59,36 @@ $vgpr0_vgpr1 = COPY %2 ... +--- +name: test_fsub_s64_fmf +body: | + bb.0: + liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 + + ; SI-LABEL: name: test_fsub_s64_fmf + ; SI: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 + ; SI: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 + ; SI: [[FNEG:%[0-9]+]]:_(s64) = G_FNEG [[COPY1]] + ; SI: %2:_(s64) = nnan nsz G_FADD [[COPY]], [[FNEG]] + ; SI: $vgpr0_vgpr1 = COPY %2(s64) + ; VI-LABEL: name: test_fsub_s64_fmf + ; VI: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 + ; VI: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 + ; VI: [[FNEG:%[0-9]+]]:_(s64) = G_FNEG [[COPY1]] + ; VI: %2:_(s64) = nnan nsz G_FADD [[COPY]], [[FNEG]] + ; VI: $vgpr0_vgpr1 = COPY %2(s64) + ; GFX9-LABEL: name: test_fsub_s64_fmf + ; GFX9: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 + ; GFX9: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 + ; GFX9: [[FNEG:%[0-9]+]]:_(s64) = G_FNEG [[COPY1]] + ; GFX9: %2:_(s64) = nnan nsz G_FADD [[COPY]], [[FNEG]] + ; GFX9: $vgpr0_vgpr1 = COPY %2(s64) + %0:_(s64) = COPY $vgpr0_vgpr1 + %1:_(s64) = COPY $vgpr2_vgpr3 + %2:_(s64) = nnan nsz G_FSUB %0, %1 + $vgpr0_vgpr1 = COPY %2 +... + --- name: test_fsub_s16 body: | Index: unittests/CodeGen/GlobalISel/LegalizerHelperTest.cpp =================================================================== --- unittests/CodeGen/GlobalISel/LegalizerHelperTest.cpp +++ unittests/CodeGen/GlobalISel/LegalizerHelperTest.cpp @@ -729,4 +729,50 @@ EXPECT_TRUE(CheckMachineFunction(*MF, CheckStr)) << *MF; } + +// FNEG expansion in terms of FSUB +TEST_F(GISelMITest, LowerFNEG) { + if (!TM) + return; + + // Declare your legalization info + DefineLegalizerInfo(A, { + getActionDefinitionsBuilder(G_FSUB).legalFor({s64}); + }); + + // Build Instr. Make sure FMF are preserved. + auto FAdd = + B.buildInstr(TargetOpcode::G_FADD, {LLT::scalar(64)}, {Copies[0], Copies[1]}, + MachineInstr::MIFlag::FmNsz); + + // Should not propagate the flags of src instruction. + auto FNeg0 = + B.buildInstr(TargetOpcode::G_FNEG, {LLT::scalar(64)}, {FAdd.getReg(0)}, + {MachineInstr::MIFlag::FmArcp}); + + // Preserve the one flag. + auto FNeg1 = + B.buildInstr(TargetOpcode::G_FNEG, {LLT::scalar(64)}, {Copies[0]}, + MachineInstr::MIFlag::FmNoInfs); + + AInfo Info(MF->getSubtarget()); + DummyGISelObserver Observer; + LegalizerHelper Helper(*MF, Info, Observer, B); + // Perform Legalization + EXPECT_EQ(LegalizerHelper::LegalizeResult::Legalized, + Helper.lower(*FNeg0, 0, LLT::scalar(64))); + EXPECT_EQ(LegalizerHelper::LegalizeResult::Legalized, + Helper.lower(*FNeg1, 0, LLT::scalar(64))); + + auto CheckStr = R"( + CHECK: [[FADD:%[0-9]+]]:_(s64) = nsz G_FADD %0:_, %1:_ + CHECK: [[CONST0:%[0-9]+]]:_(s64) = G_FCONSTANT double -0.000000e+00 + CHECK: [[FSUB0:%[0-9]+]]:_(s64) = arcp G_FSUB [[CONST0]]:_, [[FADD]]:_ + CHECK: [[CONST1:%[0-9]+]]:_(s64) = G_FCONSTANT double -0.000000e+00 + CHECK: [[FSUB1:%[0-9]+]]:_(s64) = ninf G_FSUB [[CONST1]]:_, %0:_ + )"; + + // Check + EXPECT_TRUE(CheckMachineFunction(*MF, CheckStr)) << *MF; +} } // namespace