Index: llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp =================================================================== --- llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp +++ llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp @@ -1719,13 +1719,15 @@ LLT Ty = MRI.getType(MI.getOperand(0).getReg()); assert(Ty.isScalar()); + MachineFunction &MF = B.getMF(); + const SIMachineFunctionInfo *MFI = MF.getInfo(); + // TODO: Always legal with future ftz flag. - if (Ty == LLT::scalar(32) && !ST.hasFP32Denormals()) + if (Ty == LLT::scalar(32) && !MFI->getMode().FP32Denormals) return true; - if (Ty == LLT::scalar(16) && !ST.hasFP16Denormals()) + if (Ty == LLT::scalar(16) && !MFI->getMode().FP64FP16Denormals) return true; - MachineFunction &MF = B.getMF(); MachineIRBuilder HelperBuilder(MI); GISelObserverWrapper DummyObserver; @@ -1896,7 +1898,8 @@ if (!MF.getTarget().Options.UnsafeFPMath && ResTy == S64) return false; - if (!Unsafe && ResTy == S32 && ST.hasFP32Denormals()) + if (!Unsafe && ResTy == S32 && + MF.getInfo()->getMode().FP32Denormals) return false; if (auto CLHS = getConstantFPVRegVal(LHS, MRI)) { @@ -1972,15 +1975,16 @@ // Enable or disable FP32 denorm mode. When 'Enable' is true, emit instructions // to enable denorm mode. When 'Enable' is false, disable denorm mode. static void toggleSPDenormMode(bool Enable, + MachineIRBuilder &B, const GCNSubtarget &ST, - MachineIRBuilder &B) { + AMDGPU::SIModeRegisterDefaults Mode) { // Set SP denorm mode to this value. unsigned SPDenormMode = Enable ? FP_DENORM_FLUSH_NONE : FP_DENORM_FLUSH_IN_FLUSH_OUT; if (ST.hasDenormModeInst()) { // Preserve default FP64FP16 denorm mode while updating FP32 mode. - unsigned DPDenormModeDefault = ST.hasFP64Denormals() + unsigned DPDenormModeDefault = Mode.FP64FP16Denormals ? FP_DENORM_FLUSH_NONE : FP_DENORM_FLUSH_IN_FLUSH_OUT; @@ -2007,6 +2011,8 @@ Register Res = MI.getOperand(0).getReg(); Register LHS = MI.getOperand(1).getReg(); Register RHS = MI.getOperand(2).getReg(); + const SIMachineFunctionInfo *MFI = B.getMF().getInfo(); + AMDGPU::SIModeRegisterDefaults Mode = MFI->getMode(); uint16_t Flags = MI.getFlags(); @@ -2035,8 +2041,8 @@ // FIXME: Doesn't correctly model the FP mode switch, and the FP operations // aren't modeled as reading it. - if (!ST.hasFP32Denormals()) - toggleSPDenormMode(true, ST, B); + if (!Mode.FP32Denormals) + toggleSPDenormMode(true, B, ST, Mode); auto Fma0 = B.buildFMA(S32, NegDivScale0, ApproxRcp, One, Flags); auto Fma1 = B.buildFMA(S32, Fma0, ApproxRcp, ApproxRcp, Flags); @@ -2045,8 +2051,8 @@ auto Fma3 = B.buildFMA(S32, Fma2, Fma1, Mul, Flags); auto Fma4 = B.buildFMA(S32, NegDivScale0, Fma3, NumeratorScaled, Flags); - if (!ST.hasFP32Denormals()) - toggleSPDenormMode(false, ST, B); + if (!Mode.FP32Denormals) + toggleSPDenormMode(false, B, ST, Mode); auto Fmas = B.buildIntrinsic(Intrinsic::amdgcn_div_fmas, {S32}, false) .addUse(Fma4.getReg(0)) Index: llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fdiv.mir =================================================================== --- llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fdiv.mir +++ llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fdiv.mir @@ -7,6 +7,11 @@ --- name: test_fdiv_s16 +machineFunctionInfo: + mode: + fp32-denormals: true + fp64-fp16-denormals: true + body: | bb.0: liveins: $vgpr0, $vgpr1 @@ -23,14 +28,12 @@ ; SI: [[INT2:%[0-9]+]]:_(s32), [[INT3:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[FPEXT]](s32), [[FPEXT1]](s32), [[FPEXT]](s32) ; SI: [[INT4:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[INT]](s32) ; SI: [[FNEG:%[0-9]+]]:_(s32) = G_FNEG [[INT]] - ; SI: S_SETREG_IMM32_B32 3, 2305 ; SI: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[FNEG]], [[INT4]], [[C]] ; SI: [[FMA1:%[0-9]+]]:_(s32) = G_FMA [[FMA]], [[INT4]], [[INT4]] ; SI: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[INT2]], [[FMA1]] ; SI: [[FMA2:%[0-9]+]]:_(s32) = G_FMA [[FNEG]], [[FMUL]], [[INT2]] ; SI: [[FMA3:%[0-9]+]]:_(s32) = G_FMA [[FMA2]], [[FMA1]], [[FMUL]] ; SI: [[FMA4:%[0-9]+]]:_(s32) = G_FMA [[FNEG]], [[FMA3]], [[INT2]] - ; SI: S_SETREG_IMM32_B32 0, 2305 ; SI: [[INT5:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[FMA4]](s32), [[FMA1]](s32), [[FMA3]](s32), [[INT3]](s1) ; SI: [[INT6:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[INT5]](s32), [[FPEXT1]](s32), [[FPEXT]](s32) ; SI: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[INT6]](s32) @@ -94,12 +97,108 @@ ... --- -name: test_fdiv_s32 +name: test_fdiv_s32_denorms_on +machineFunctionInfo: + mode: + fp32-denormals: true + fp64-fp16-denormals: true + body: | bb.0: liveins: $vgpr0, $vgpr1 - ; SI-LABEL: name: test_fdiv_s32 + ; SI-LABEL: name: test_fdiv_s32_denorms_on + ; SI: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; SI: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; SI: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 1.000000e+00 + ; SI: [[INT:%[0-9]+]]:_(s32), [[INT1:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[COPY1]](s32), [[COPY1]](s32), [[COPY]](s32) + ; SI: [[INT2:%[0-9]+]]:_(s32), [[INT3:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[COPY]](s32), [[COPY1]](s32), [[COPY]](s32) + ; SI: [[INT4:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[INT]](s32) + ; SI: [[FNEG:%[0-9]+]]:_(s32) = G_FNEG [[INT]] + ; SI: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[FNEG]], [[INT4]], [[C]] + ; SI: [[FMA1:%[0-9]+]]:_(s32) = G_FMA [[FMA]], [[INT4]], [[INT4]] + ; SI: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[INT2]], [[FMA1]] + ; SI: [[FMA2:%[0-9]+]]:_(s32) = G_FMA [[FNEG]], [[FMUL]], [[INT2]] + ; SI: [[FMA3:%[0-9]+]]:_(s32) = G_FMA [[FMA2]], [[FMA1]], [[FMUL]] + ; SI: [[FMA4:%[0-9]+]]:_(s32) = G_FMA [[FNEG]], [[FMA3]], [[INT2]] + ; SI: [[INT5:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[FMA4]](s32), [[FMA1]](s32), [[FMA3]](s32), [[INT3]](s1) + ; SI: [[INT6:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[INT5]](s32), [[COPY1]](s32), [[COPY]](s32) + ; SI: $vgpr0 = COPY [[INT6]](s32) + ; VI-LABEL: name: test_fdiv_s32_denorms_on + ; VI: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; VI: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 1.000000e+00 + ; VI: [[INT:%[0-9]+]]:_(s32), [[INT1:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[COPY1]](s32), [[COPY1]](s32), [[COPY]](s32) + ; VI: [[INT2:%[0-9]+]]:_(s32), [[INT3:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[COPY]](s32), [[COPY1]](s32), [[COPY]](s32) + ; VI: [[INT4:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[INT]](s32) + ; VI: [[FNEG:%[0-9]+]]:_(s32) = G_FNEG [[INT]] + ; VI: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[FNEG]], [[INT4]], [[C]] + ; VI: [[FMA1:%[0-9]+]]:_(s32) = G_FMA [[FMA]], [[INT4]], [[INT4]] + ; VI: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[INT2]], [[FMA1]] + ; VI: [[FMA2:%[0-9]+]]:_(s32) = G_FMA [[FNEG]], [[FMUL]], [[INT2]] + ; VI: [[FMA3:%[0-9]+]]:_(s32) = G_FMA [[FMA2]], [[FMA1]], [[FMUL]] + ; VI: [[FMA4:%[0-9]+]]:_(s32) = G_FMA [[FNEG]], [[FMA3]], [[INT2]] + ; VI: [[INT5:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[FMA4]](s32), [[FMA1]](s32), [[FMA3]](s32), [[INT3]](s1) + ; VI: [[INT6:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[INT5]](s32), [[COPY1]](s32), [[COPY]](s32) + ; VI: $vgpr0 = COPY [[INT6]](s32) + ; GFX9-LABEL: name: test_fdiv_s32_denorms_on + ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX9: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 1.000000e+00 + ; GFX9: [[INT:%[0-9]+]]:_(s32), [[INT1:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[COPY1]](s32), [[COPY1]](s32), [[COPY]](s32) + ; GFX9: [[INT2:%[0-9]+]]:_(s32), [[INT3:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[COPY]](s32), [[COPY1]](s32), [[COPY]](s32) + ; GFX9: [[INT4:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[INT]](s32) + ; GFX9: [[FNEG:%[0-9]+]]:_(s32) = G_FNEG [[INT]] + ; GFX9: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[FNEG]], [[INT4]], [[C]] + ; GFX9: [[FMA1:%[0-9]+]]:_(s32) = G_FMA [[FMA]], [[INT4]], [[INT4]] + ; GFX9: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[INT2]], [[FMA1]] + ; GFX9: [[FMA2:%[0-9]+]]:_(s32) = G_FMA [[FNEG]], [[FMUL]], [[INT2]] + ; GFX9: [[FMA3:%[0-9]+]]:_(s32) = G_FMA [[FMA2]], [[FMA1]], [[FMUL]] + ; GFX9: [[FMA4:%[0-9]+]]:_(s32) = G_FMA [[FNEG]], [[FMA3]], [[INT2]] + ; GFX9: [[INT5:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[FMA4]](s32), [[FMA1]](s32), [[FMA3]](s32), [[INT3]](s1) + ; GFX9: [[INT6:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[INT5]](s32), [[COPY1]](s32), [[COPY]](s32) + ; GFX9: $vgpr0 = COPY [[INT6]](s32) + ; GFX9-UNSAFE-LABEL: name: test_fdiv_s32_denorms_on + ; GFX9-UNSAFE: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX9-UNSAFE: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX9-UNSAFE: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[COPY1]](s32) + ; GFX9-UNSAFE: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[COPY]], [[INT]] + ; GFX9-UNSAFE: $vgpr0 = COPY [[FMUL]](s32) + ; GFX10-LABEL: name: test_fdiv_s32_denorms_on + ; GFX10: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX10: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX10: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 1.000000e+00 + ; GFX10: [[INT:%[0-9]+]]:_(s32), [[INT1:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[COPY1]](s32), [[COPY1]](s32), [[COPY]](s32) + ; GFX10: [[INT2:%[0-9]+]]:_(s32), [[INT3:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[COPY]](s32), [[COPY1]](s32), [[COPY]](s32) + ; GFX10: [[INT4:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[INT]](s32) + ; GFX10: [[FNEG:%[0-9]+]]:_(s32) = G_FNEG [[INT]] + ; GFX10: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[FNEG]], [[INT4]], [[C]] + ; GFX10: [[FMA1:%[0-9]+]]:_(s32) = G_FMA [[FMA]], [[INT4]], [[INT4]] + ; GFX10: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[INT2]], [[FMA1]] + ; GFX10: [[FMA2:%[0-9]+]]:_(s32) = G_FMA [[FNEG]], [[FMUL]], [[INT2]] + ; GFX10: [[FMA3:%[0-9]+]]:_(s32) = G_FMA [[FMA2]], [[FMA1]], [[FMUL]] + ; GFX10: [[FMA4:%[0-9]+]]:_(s32) = G_FMA [[FNEG]], [[FMA3]], [[INT2]] + ; GFX10: [[INT5:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[FMA4]](s32), [[FMA1]](s32), [[FMA3]](s32), [[INT3]](s1) + ; GFX10: [[INT6:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[INT5]](s32), [[COPY1]](s32), [[COPY]](s32) + ; GFX10: $vgpr0 = COPY [[INT6]](s32) + %0:_(s32) = COPY $vgpr0 + %1:_(s32) = COPY $vgpr1 + %2:_(s32) = G_FDIV %0, %1 + $vgpr0 = COPY %2 +... + +--- +name: test_fdiv_s32_denorms_off +machineFunctionInfo: + mode: + fp32-denormals: false + fp64-fp16-denormals: true + +body: | + bb.0: + liveins: $vgpr0, $vgpr1 + + ; SI-LABEL: name: test_fdiv_s32_denorms_off ; SI: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; SI: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 ; SI: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 1.000000e+00 @@ -118,7 +217,7 @@ ; SI: [[INT5:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[FMA4]](s32), [[FMA1]](s32), [[FMA3]](s32), [[INT3]](s1) ; SI: [[INT6:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[INT5]](s32), [[COPY1]](s32), [[COPY]](s32) ; SI: $vgpr0 = COPY [[INT6]](s32) - ; VI-LABEL: name: test_fdiv_s32 + ; VI-LABEL: name: test_fdiv_s32_denorms_off ; VI: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 ; VI: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 1.000000e+00 @@ -137,7 +236,7 @@ ; VI: [[INT5:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[FMA4]](s32), [[FMA1]](s32), [[FMA3]](s32), [[INT3]](s1) ; VI: [[INT6:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[INT5]](s32), [[COPY1]](s32), [[COPY]](s32) ; VI: $vgpr0 = COPY [[INT6]](s32) - ; GFX9-LABEL: name: test_fdiv_s32 + ; GFX9-LABEL: name: test_fdiv_s32_denorms_off ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 ; GFX9: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 1.000000e+00 @@ -156,13 +255,13 @@ ; GFX9: [[INT5:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[FMA4]](s32), [[FMA1]](s32), [[FMA3]](s32), [[INT3]](s1) ; GFX9: [[INT6:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[INT5]](s32), [[COPY1]](s32), [[COPY]](s32) ; GFX9: $vgpr0 = COPY [[INT6]](s32) - ; GFX9-UNSAFE-LABEL: name: test_fdiv_s32 + ; GFX9-UNSAFE-LABEL: name: test_fdiv_s32_denorms_off ; GFX9-UNSAFE: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX9-UNSAFE: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 ; GFX9-UNSAFE: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[COPY1]](s32) ; GFX9-UNSAFE: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[COPY]], [[INT]] ; GFX9-UNSAFE: $vgpr0 = COPY [[FMUL]](s32) - ; GFX10-LABEL: name: test_fdiv_s32 + ; GFX10-LABEL: name: test_fdiv_s32_denorms_off ; GFX10: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX10: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 ; GFX10: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 1.000000e+00 @@ -188,36 +287,41 @@ ... --- -name: test_fdiv_s32_arcp +name: test_fdiv_s32_denorms_off_arcp +machineFunctionInfo: + mode: + fp32-denormals: false + fp64-fp16-denormals: true + body: | bb.0: liveins: $vgpr0, $vgpr1 - ; SI-LABEL: name: test_fdiv_s32_arcp + ; SI-LABEL: name: test_fdiv_s32_denorms_off_arcp ; SI: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; SI: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 ; SI: [[INT:%[0-9]+]]:_(s32) = arcp G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[COPY1]](s32) ; SI: [[FMUL:%[0-9]+]]:_(s32) = arcp G_FMUL [[COPY]], [[INT]] ; SI: $vgpr0 = COPY [[FMUL]](s32) - ; VI-LABEL: name: test_fdiv_s32_arcp + ; VI-LABEL: name: test_fdiv_s32_denorms_off_arcp ; VI: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 ; VI: [[INT:%[0-9]+]]:_(s32) = arcp G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[COPY1]](s32) ; VI: [[FMUL:%[0-9]+]]:_(s32) = arcp G_FMUL [[COPY]], [[INT]] ; VI: $vgpr0 = COPY [[FMUL]](s32) - ; GFX9-LABEL: name: test_fdiv_s32_arcp + ; GFX9-LABEL: name: test_fdiv_s32_denorms_off_arcp ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 ; GFX9: [[INT:%[0-9]+]]:_(s32) = arcp G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[COPY1]](s32) ; GFX9: [[FMUL:%[0-9]+]]:_(s32) = arcp G_FMUL [[COPY]], [[INT]] ; GFX9: $vgpr0 = COPY [[FMUL]](s32) - ; GFX9-UNSAFE-LABEL: name: test_fdiv_s32_arcp + ; GFX9-UNSAFE-LABEL: name: test_fdiv_s32_denorms_off_arcp ; GFX9-UNSAFE: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX9-UNSAFE: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 ; GFX9-UNSAFE: [[INT:%[0-9]+]]:_(s32) = arcp G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[COPY1]](s32) ; GFX9-UNSAFE: [[FMUL:%[0-9]+]]:_(s32) = arcp G_FMUL [[COPY]], [[INT]] ; GFX9-UNSAFE: $vgpr0 = COPY [[FMUL]](s32) - ; GFX10-LABEL: name: test_fdiv_s32_arcp + ; GFX10-LABEL: name: test_fdiv_s32_denorms_off_arcp ; GFX10: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX10: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 ; GFX10: [[INT:%[0-9]+]]:_(s32) = arcp G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[COPY1]](s32) @@ -231,6 +335,11 @@ --- name: test_fdiv_s64 +machineFunctionInfo: + mode: + fp32-denormals: false + fp64-fp16-denormals: true + body: | bb.0: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 @@ -269,6 +378,11 @@ --- name: test_fdiv_v2s32 +machineFunctionInfo: + mode: + fp32-denormals: false + fp64-fp16-denormals: true + body: | bb.0: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 @@ -450,28 +564,24 @@ ; SI: [[INT2:%[0-9]+]]:_(s32), [[INT3:%[0-9]+]]:_(s1) = nnan G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[UV]](s32), [[UV2]](s32), [[UV]](s32) ; SI: [[INT4:%[0-9]+]]:_(s32) = nnan G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[INT]](s32) ; SI: [[FNEG:%[0-9]+]]:_(s32) = nnan G_FNEG [[INT]] - ; SI: S_SETREG_IMM32_B32 3, 2305 ; SI: [[FMA:%[0-9]+]]:_(s32) = nnan G_FMA [[FNEG]], [[INT4]], [[C]] ; SI: [[FMA1:%[0-9]+]]:_(s32) = nnan G_FMA [[FMA]], [[INT4]], [[INT4]] ; SI: [[FMUL:%[0-9]+]]:_(s32) = nnan G_FMUL [[INT2]], [[FMA1]] ; SI: [[FMA2:%[0-9]+]]:_(s32) = nnan G_FMA [[FNEG]], [[FMUL]], [[INT2]] ; SI: [[FMA3:%[0-9]+]]:_(s32) = nnan G_FMA [[FMA2]], [[FMA1]], [[FMUL]] ; SI: [[FMA4:%[0-9]+]]:_(s32) = nnan G_FMA [[FNEG]], [[FMA3]], [[INT2]] - ; SI: S_SETREG_IMM32_B32 0, 2305 ; SI: [[INT5:%[0-9]+]]:_(s32) = nnan G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[FMA4]](s32), [[FMA1]](s32), [[FMA3]](s32), [[INT3]](s1) ; SI: [[INT6:%[0-9]+]]:_(s32) = nnan G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[INT5]](s32), [[UV2]](s32), [[UV]](s32) ; SI: [[INT7:%[0-9]+]]:_(s32), [[INT8:%[0-9]+]]:_(s1) = nnan G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[UV3]](s32), [[UV3]](s32), [[UV1]](s32) ; SI: [[INT9:%[0-9]+]]:_(s32), [[INT10:%[0-9]+]]:_(s1) = nnan G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[UV1]](s32), [[UV3]](s32), [[UV1]](s32) ; SI: [[INT11:%[0-9]+]]:_(s32) = nnan G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[INT7]](s32) ; SI: [[FNEG1:%[0-9]+]]:_(s32) = nnan G_FNEG [[INT7]] - ; SI: S_SETREG_IMM32_B32 3, 2305 ; SI: [[FMA5:%[0-9]+]]:_(s32) = nnan G_FMA [[FNEG1]], [[INT11]], [[C]] ; SI: [[FMA6:%[0-9]+]]:_(s32) = nnan G_FMA [[FMA5]], [[INT11]], [[INT11]] ; SI: [[FMUL1:%[0-9]+]]:_(s32) = nnan G_FMUL [[INT9]], [[FMA6]] ; SI: [[FMA7:%[0-9]+]]:_(s32) = nnan G_FMA [[FNEG1]], [[FMUL1]], [[INT9]] ; SI: [[FMA8:%[0-9]+]]:_(s32) = nnan G_FMA [[FMA7]], [[FMA6]], [[FMUL1]] ; SI: [[FMA9:%[0-9]+]]:_(s32) = nnan G_FMA [[FNEG1]], [[FMA8]], [[INT9]] - ; SI: S_SETREG_IMM32_B32 0, 2305 ; SI: [[INT12:%[0-9]+]]:_(s32) = nnan G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[FMA9]](s32), [[FMA6]](s32), [[FMA8]](s32), [[INT10]](s1) ; SI: [[INT13:%[0-9]+]]:_(s32) = nnan G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[INT12]](s32), [[UV3]](s32), [[UV1]](s32) ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[INT6]](s32), [[INT13]](s32) @@ -486,28 +596,24 @@ ; VI: [[INT2:%[0-9]+]]:_(s32), [[INT3:%[0-9]+]]:_(s1) = nnan G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[UV]](s32), [[UV2]](s32), [[UV]](s32) ; VI: [[INT4:%[0-9]+]]:_(s32) = nnan G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[INT]](s32) ; VI: [[FNEG:%[0-9]+]]:_(s32) = nnan G_FNEG [[INT]] - ; VI: S_SETREG_IMM32_B32 3, 2305 ; VI: [[FMA:%[0-9]+]]:_(s32) = nnan G_FMA [[FNEG]], [[INT4]], [[C]] ; VI: [[FMA1:%[0-9]+]]:_(s32) = nnan G_FMA [[FMA]], [[INT4]], [[INT4]] ; VI: [[FMUL:%[0-9]+]]:_(s32) = nnan G_FMUL [[INT2]], [[FMA1]] ; VI: [[FMA2:%[0-9]+]]:_(s32) = nnan G_FMA [[FNEG]], [[FMUL]], [[INT2]] ; VI: [[FMA3:%[0-9]+]]:_(s32) = nnan G_FMA [[FMA2]], [[FMA1]], [[FMUL]] ; VI: [[FMA4:%[0-9]+]]:_(s32) = nnan G_FMA [[FNEG]], [[FMA3]], [[INT2]] - ; VI: S_SETREG_IMM32_B32 0, 2305 ; VI: [[INT5:%[0-9]+]]:_(s32) = nnan G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[FMA4]](s32), [[FMA1]](s32), [[FMA3]](s32), [[INT3]](s1) ; VI: [[INT6:%[0-9]+]]:_(s32) = nnan G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[INT5]](s32), [[UV2]](s32), [[UV]](s32) ; VI: [[INT7:%[0-9]+]]:_(s32), [[INT8:%[0-9]+]]:_(s1) = nnan G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[UV3]](s32), [[UV3]](s32), [[UV1]](s32) ; VI: [[INT9:%[0-9]+]]:_(s32), [[INT10:%[0-9]+]]:_(s1) = nnan G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[UV1]](s32), [[UV3]](s32), [[UV1]](s32) ; VI: [[INT11:%[0-9]+]]:_(s32) = nnan G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[INT7]](s32) ; VI: [[FNEG1:%[0-9]+]]:_(s32) = nnan G_FNEG [[INT7]] - ; VI: S_SETREG_IMM32_B32 3, 2305 ; VI: [[FMA5:%[0-9]+]]:_(s32) = nnan G_FMA [[FNEG1]], [[INT11]], [[C]] ; VI: [[FMA6:%[0-9]+]]:_(s32) = nnan G_FMA [[FMA5]], [[INT11]], [[INT11]] ; VI: [[FMUL1:%[0-9]+]]:_(s32) = nnan G_FMUL [[INT9]], [[FMA6]] ; VI: [[FMA7:%[0-9]+]]:_(s32) = nnan G_FMA [[FNEG1]], [[FMUL1]], [[INT9]] ; VI: [[FMA8:%[0-9]+]]:_(s32) = nnan G_FMA [[FMA7]], [[FMA6]], [[FMUL1]] ; VI: [[FMA9:%[0-9]+]]:_(s32) = nnan G_FMA [[FNEG1]], [[FMA8]], [[INT9]] - ; VI: S_SETREG_IMM32_B32 0, 2305 ; VI: [[INT12:%[0-9]+]]:_(s32) = nnan G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[FMA9]](s32), [[FMA6]](s32), [[FMA8]](s32), [[INT10]](s1) ; VI: [[INT13:%[0-9]+]]:_(s32) = nnan G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[INT12]](s32), [[UV3]](s32), [[UV1]](s32) ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[INT6]](s32), [[INT13]](s32) @@ -522,28 +628,24 @@ ; GFX9: [[INT2:%[0-9]+]]:_(s32), [[INT3:%[0-9]+]]:_(s1) = nnan G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[UV]](s32), [[UV2]](s32), [[UV]](s32) ; GFX9: [[INT4:%[0-9]+]]:_(s32) = nnan G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[INT]](s32) ; GFX9: [[FNEG:%[0-9]+]]:_(s32) = nnan G_FNEG [[INT]] - ; GFX9: S_SETREG_IMM32_B32 3, 2305 ; GFX9: [[FMA:%[0-9]+]]:_(s32) = nnan G_FMA [[FNEG]], [[INT4]], [[C]] ; GFX9: [[FMA1:%[0-9]+]]:_(s32) = nnan G_FMA [[FMA]], [[INT4]], [[INT4]] ; GFX9: [[FMUL:%[0-9]+]]:_(s32) = nnan G_FMUL [[INT2]], [[FMA1]] ; GFX9: [[FMA2:%[0-9]+]]:_(s32) = nnan G_FMA [[FNEG]], [[FMUL]], [[INT2]] ; GFX9: [[FMA3:%[0-9]+]]:_(s32) = nnan G_FMA [[FMA2]], [[FMA1]], [[FMUL]] ; GFX9: [[FMA4:%[0-9]+]]:_(s32) = nnan G_FMA [[FNEG]], [[FMA3]], [[INT2]] - ; GFX9: S_SETREG_IMM32_B32 0, 2305 ; GFX9: [[INT5:%[0-9]+]]:_(s32) = nnan G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[FMA4]](s32), [[FMA1]](s32), [[FMA3]](s32), [[INT3]](s1) ; GFX9: [[INT6:%[0-9]+]]:_(s32) = nnan G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[INT5]](s32), [[UV2]](s32), [[UV]](s32) ; GFX9: [[INT7:%[0-9]+]]:_(s32), [[INT8:%[0-9]+]]:_(s1) = nnan G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[UV3]](s32), [[UV3]](s32), [[UV1]](s32) ; GFX9: [[INT9:%[0-9]+]]:_(s32), [[INT10:%[0-9]+]]:_(s1) = nnan G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[UV1]](s32), [[UV3]](s32), [[UV1]](s32) ; GFX9: [[INT11:%[0-9]+]]:_(s32) = nnan G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[INT7]](s32) ; GFX9: [[FNEG1:%[0-9]+]]:_(s32) = nnan G_FNEG [[INT7]] - ; GFX9: S_SETREG_IMM32_B32 3, 2305 ; GFX9: [[FMA5:%[0-9]+]]:_(s32) = nnan G_FMA [[FNEG1]], [[INT11]], [[C]] ; GFX9: [[FMA6:%[0-9]+]]:_(s32) = nnan G_FMA [[FMA5]], [[INT11]], [[INT11]] ; GFX9: [[FMUL1:%[0-9]+]]:_(s32) = nnan G_FMUL [[INT9]], [[FMA6]] ; GFX9: [[FMA7:%[0-9]+]]:_(s32) = nnan G_FMA [[FNEG1]], [[FMUL1]], [[INT9]] ; GFX9: [[FMA8:%[0-9]+]]:_(s32) = nnan G_FMA [[FMA7]], [[FMA6]], [[FMUL1]] ; GFX9: [[FMA9:%[0-9]+]]:_(s32) = nnan G_FMA [[FNEG1]], [[FMA8]], [[INT9]] - ; GFX9: S_SETREG_IMM32_B32 0, 2305 ; GFX9: [[INT12:%[0-9]+]]:_(s32) = nnan G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[FMA9]](s32), [[FMA6]](s32), [[FMA8]](s32), [[INT10]](s1) ; GFX9: [[INT13:%[0-9]+]]:_(s32) = nnan G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[INT12]](s32), [[UV3]](s32), [[UV1]](s32) ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[INT6]](s32), [[INT13]](s32) @@ -569,28 +671,24 @@ ; GFX10: [[INT2:%[0-9]+]]:_(s32), [[INT3:%[0-9]+]]:_(s1) = nnan G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[UV]](s32), [[UV2]](s32), [[UV]](s32) ; GFX10: [[INT4:%[0-9]+]]:_(s32) = nnan G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[INT]](s32) ; GFX10: [[FNEG:%[0-9]+]]:_(s32) = nnan G_FNEG [[INT]] - ; GFX10: S_DENORM_MODE 15 ; GFX10: [[FMA:%[0-9]+]]:_(s32) = nnan G_FMA [[FNEG]], [[INT4]], [[C]] ; GFX10: [[FMA1:%[0-9]+]]:_(s32) = nnan G_FMA [[FMA]], [[INT4]], [[INT4]] ; GFX10: [[FMUL:%[0-9]+]]:_(s32) = nnan G_FMUL [[INT2]], [[FMA1]] ; GFX10: [[FMA2:%[0-9]+]]:_(s32) = nnan G_FMA [[FNEG]], [[FMUL]], [[INT2]] ; GFX10: [[FMA3:%[0-9]+]]:_(s32) = nnan G_FMA [[FMA2]], [[FMA1]], [[FMUL]] ; GFX10: [[FMA4:%[0-9]+]]:_(s32) = nnan G_FMA [[FNEG]], [[FMA3]], [[INT2]] - ; GFX10: S_DENORM_MODE 12 ; GFX10: [[INT5:%[0-9]+]]:_(s32) = nnan G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[FMA4]](s32), [[FMA1]](s32), [[FMA3]](s32), [[INT3]](s1) ; GFX10: [[INT6:%[0-9]+]]:_(s32) = nnan G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[INT5]](s32), [[UV2]](s32), [[UV]](s32) ; GFX10: [[INT7:%[0-9]+]]:_(s32), [[INT8:%[0-9]+]]:_(s1) = nnan G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[UV3]](s32), [[UV3]](s32), [[UV1]](s32) ; GFX10: [[INT9:%[0-9]+]]:_(s32), [[INT10:%[0-9]+]]:_(s1) = nnan G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[UV1]](s32), [[UV3]](s32), [[UV1]](s32) ; GFX10: [[INT11:%[0-9]+]]:_(s32) = nnan G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[INT7]](s32) ; GFX10: [[FNEG1:%[0-9]+]]:_(s32) = nnan G_FNEG [[INT7]] - ; GFX10: S_DENORM_MODE 15 ; GFX10: [[FMA5:%[0-9]+]]:_(s32) = nnan G_FMA [[FNEG1]], [[INT11]], [[C]] ; GFX10: [[FMA6:%[0-9]+]]:_(s32) = nnan G_FMA [[FMA5]], [[INT11]], [[INT11]] ; GFX10: [[FMUL1:%[0-9]+]]:_(s32) = nnan G_FMUL [[INT9]], [[FMA6]] ; GFX10: [[FMA7:%[0-9]+]]:_(s32) = nnan G_FMA [[FNEG1]], [[FMUL1]], [[INT9]] ; GFX10: [[FMA8:%[0-9]+]]:_(s32) = nnan G_FMA [[FMA7]], [[FMA6]], [[FMUL1]] ; GFX10: [[FMA9:%[0-9]+]]:_(s32) = nnan G_FMA [[FNEG1]], [[FMA8]], [[INT9]] - ; GFX10: S_DENORM_MODE 12 ; GFX10: [[INT12:%[0-9]+]]:_(s32) = nnan G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[FMA9]](s32), [[FMA6]](s32), [[FMA8]](s32), [[INT10]](s1) ; GFX10: [[INT13:%[0-9]+]]:_(s32) = nnan G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[INT12]](s32), [[UV3]](s32), [[UV1]](s32) ; GFX10: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[INT6]](s32), [[INT13]](s32) @@ -617,42 +715,36 @@ ; SI: [[INT2:%[0-9]+]]:_(s32), [[INT3:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[UV]](s32), [[UV3]](s32), [[UV]](s32) ; SI: [[INT4:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[INT]](s32) ; SI: [[FNEG:%[0-9]+]]:_(s32) = G_FNEG [[INT]] - ; SI: S_SETREG_IMM32_B32 3, 2305 ; SI: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[FNEG]], [[INT4]], [[C]] ; SI: [[FMA1:%[0-9]+]]:_(s32) = G_FMA [[FMA]], [[INT4]], [[INT4]] ; SI: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[INT2]], [[FMA1]] ; SI: [[FMA2:%[0-9]+]]:_(s32) = G_FMA [[FNEG]], [[FMUL]], [[INT2]] ; SI: [[FMA3:%[0-9]+]]:_(s32) = G_FMA [[FMA2]], [[FMA1]], [[FMUL]] ; SI: [[FMA4:%[0-9]+]]:_(s32) = G_FMA [[FNEG]], [[FMA3]], [[INT2]] - ; SI: S_SETREG_IMM32_B32 0, 2305 ; SI: [[INT5:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[FMA4]](s32), [[FMA1]](s32), [[FMA3]](s32), [[INT3]](s1) ; SI: [[INT6:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[INT5]](s32), [[UV3]](s32), [[UV]](s32) ; SI: [[INT7:%[0-9]+]]:_(s32), [[INT8:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[UV4]](s32), [[UV4]](s32), [[UV1]](s32) ; SI: [[INT9:%[0-9]+]]:_(s32), [[INT10:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[UV1]](s32), [[UV4]](s32), [[UV1]](s32) ; SI: [[INT11:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[INT7]](s32) ; SI: [[FNEG1:%[0-9]+]]:_(s32) = G_FNEG [[INT7]] - ; SI: S_SETREG_IMM32_B32 3, 2305 ; SI: [[FMA5:%[0-9]+]]:_(s32) = G_FMA [[FNEG1]], [[INT11]], [[C]] ; SI: [[FMA6:%[0-9]+]]:_(s32) = G_FMA [[FMA5]], [[INT11]], [[INT11]] ; SI: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[INT9]], [[FMA6]] ; SI: [[FMA7:%[0-9]+]]:_(s32) = G_FMA [[FNEG1]], [[FMUL1]], [[INT9]] ; SI: [[FMA8:%[0-9]+]]:_(s32) = G_FMA [[FMA7]], [[FMA6]], [[FMUL1]] ; SI: [[FMA9:%[0-9]+]]:_(s32) = G_FMA [[FNEG1]], [[FMA8]], [[INT9]] - ; SI: S_SETREG_IMM32_B32 0, 2305 ; SI: [[INT12:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[FMA9]](s32), [[FMA6]](s32), [[FMA8]](s32), [[INT10]](s1) ; SI: [[INT13:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[INT12]](s32), [[UV4]](s32), [[UV1]](s32) ; SI: [[INT14:%[0-9]+]]:_(s32), [[INT15:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[UV5]](s32), [[UV5]](s32), [[UV2]](s32) ; SI: [[INT16:%[0-9]+]]:_(s32), [[INT17:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[UV2]](s32), [[UV5]](s32), [[UV2]](s32) ; SI: [[INT18:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[INT14]](s32) ; SI: [[FNEG2:%[0-9]+]]:_(s32) = G_FNEG [[INT14]] - ; SI: S_SETREG_IMM32_B32 3, 2305 ; SI: [[FMA10:%[0-9]+]]:_(s32) = G_FMA [[FNEG2]], [[INT18]], [[C]] ; SI: [[FMA11:%[0-9]+]]:_(s32) = G_FMA [[FMA10]], [[INT18]], [[INT18]] ; SI: [[FMUL2:%[0-9]+]]:_(s32) = G_FMUL [[INT16]], [[FMA11]] ; SI: [[FMA12:%[0-9]+]]:_(s32) = G_FMA [[FNEG2]], [[FMUL2]], [[INT16]] ; SI: [[FMA13:%[0-9]+]]:_(s32) = G_FMA [[FMA12]], [[FMA11]], [[FMUL2]] ; SI: [[FMA14:%[0-9]+]]:_(s32) = G_FMA [[FNEG2]], [[FMA13]], [[INT16]] - ; SI: S_SETREG_IMM32_B32 0, 2305 ; SI: [[INT19:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[FMA14]](s32), [[FMA11]](s32), [[FMA13]](s32), [[INT17]](s1) ; SI: [[INT20:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[INT19]](s32), [[UV5]](s32), [[UV2]](s32) ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[INT6]](s32), [[INT13]](s32), [[INT20]](s32) @@ -667,42 +759,36 @@ ; VI: [[INT2:%[0-9]+]]:_(s32), [[INT3:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[UV]](s32), [[UV3]](s32), [[UV]](s32) ; VI: [[INT4:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[INT]](s32) ; VI: [[FNEG:%[0-9]+]]:_(s32) = G_FNEG [[INT]] - ; VI: S_SETREG_IMM32_B32 3, 2305 ; VI: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[FNEG]], [[INT4]], [[C]] ; VI: [[FMA1:%[0-9]+]]:_(s32) = G_FMA [[FMA]], [[INT4]], [[INT4]] ; VI: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[INT2]], [[FMA1]] ; VI: [[FMA2:%[0-9]+]]:_(s32) = G_FMA [[FNEG]], [[FMUL]], [[INT2]] ; VI: [[FMA3:%[0-9]+]]:_(s32) = G_FMA [[FMA2]], [[FMA1]], [[FMUL]] ; VI: [[FMA4:%[0-9]+]]:_(s32) = G_FMA [[FNEG]], [[FMA3]], [[INT2]] - ; VI: S_SETREG_IMM32_B32 0, 2305 ; VI: [[INT5:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[FMA4]](s32), [[FMA1]](s32), [[FMA3]](s32), [[INT3]](s1) ; VI: [[INT6:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[INT5]](s32), [[UV3]](s32), [[UV]](s32) ; VI: [[INT7:%[0-9]+]]:_(s32), [[INT8:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[UV4]](s32), [[UV4]](s32), [[UV1]](s32) ; VI: [[INT9:%[0-9]+]]:_(s32), [[INT10:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[UV1]](s32), [[UV4]](s32), [[UV1]](s32) ; VI: [[INT11:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[INT7]](s32) ; VI: [[FNEG1:%[0-9]+]]:_(s32) = G_FNEG [[INT7]] - ; VI: S_SETREG_IMM32_B32 3, 2305 ; VI: [[FMA5:%[0-9]+]]:_(s32) = G_FMA [[FNEG1]], [[INT11]], [[C]] ; VI: [[FMA6:%[0-9]+]]:_(s32) = G_FMA [[FMA5]], [[INT11]], [[INT11]] ; VI: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[INT9]], [[FMA6]] ; VI: [[FMA7:%[0-9]+]]:_(s32) = G_FMA [[FNEG1]], [[FMUL1]], [[INT9]] ; VI: [[FMA8:%[0-9]+]]:_(s32) = G_FMA [[FMA7]], [[FMA6]], [[FMUL1]] ; VI: [[FMA9:%[0-9]+]]:_(s32) = G_FMA [[FNEG1]], [[FMA8]], [[INT9]] - ; VI: S_SETREG_IMM32_B32 0, 2305 ; VI: [[INT12:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[FMA9]](s32), [[FMA6]](s32), [[FMA8]](s32), [[INT10]](s1) ; VI: [[INT13:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[INT12]](s32), [[UV4]](s32), [[UV1]](s32) ; VI: [[INT14:%[0-9]+]]:_(s32), [[INT15:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[UV5]](s32), [[UV5]](s32), [[UV2]](s32) ; VI: [[INT16:%[0-9]+]]:_(s32), [[INT17:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[UV2]](s32), [[UV5]](s32), [[UV2]](s32) ; VI: [[INT18:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[INT14]](s32) ; VI: [[FNEG2:%[0-9]+]]:_(s32) = G_FNEG [[INT14]] - ; VI: S_SETREG_IMM32_B32 3, 2305 ; VI: [[FMA10:%[0-9]+]]:_(s32) = G_FMA [[FNEG2]], [[INT18]], [[C]] ; VI: [[FMA11:%[0-9]+]]:_(s32) = G_FMA [[FMA10]], [[INT18]], [[INT18]] ; VI: [[FMUL2:%[0-9]+]]:_(s32) = G_FMUL [[INT16]], [[FMA11]] ; VI: [[FMA12:%[0-9]+]]:_(s32) = G_FMA [[FNEG2]], [[FMUL2]], [[INT16]] ; VI: [[FMA13:%[0-9]+]]:_(s32) = G_FMA [[FMA12]], [[FMA11]], [[FMUL2]] ; VI: [[FMA14:%[0-9]+]]:_(s32) = G_FMA [[FNEG2]], [[FMA13]], [[INT16]] - ; VI: S_SETREG_IMM32_B32 0, 2305 ; VI: [[INT19:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[FMA14]](s32), [[FMA11]](s32), [[FMA13]](s32), [[INT17]](s1) ; VI: [[INT20:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[INT19]](s32), [[UV5]](s32), [[UV2]](s32) ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[INT6]](s32), [[INT13]](s32), [[INT20]](s32) @@ -717,42 +803,36 @@ ; GFX9: [[INT2:%[0-9]+]]:_(s32), [[INT3:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[UV]](s32), [[UV3]](s32), [[UV]](s32) ; GFX9: [[INT4:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[INT]](s32) ; GFX9: [[FNEG:%[0-9]+]]:_(s32) = G_FNEG [[INT]] - ; GFX9: S_SETREG_IMM32_B32 3, 2305 ; GFX9: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[FNEG]], [[INT4]], [[C]] ; GFX9: [[FMA1:%[0-9]+]]:_(s32) = G_FMA [[FMA]], [[INT4]], [[INT4]] ; GFX9: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[INT2]], [[FMA1]] ; GFX9: [[FMA2:%[0-9]+]]:_(s32) = G_FMA [[FNEG]], [[FMUL]], [[INT2]] ; GFX9: [[FMA3:%[0-9]+]]:_(s32) = G_FMA [[FMA2]], [[FMA1]], [[FMUL]] ; GFX9: [[FMA4:%[0-9]+]]:_(s32) = G_FMA [[FNEG]], [[FMA3]], [[INT2]] - ; GFX9: S_SETREG_IMM32_B32 0, 2305 ; GFX9: [[INT5:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[FMA4]](s32), [[FMA1]](s32), [[FMA3]](s32), [[INT3]](s1) ; GFX9: [[INT6:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[INT5]](s32), [[UV3]](s32), [[UV]](s32) ; GFX9: [[INT7:%[0-9]+]]:_(s32), [[INT8:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[UV4]](s32), [[UV4]](s32), [[UV1]](s32) ; GFX9: [[INT9:%[0-9]+]]:_(s32), [[INT10:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[UV1]](s32), [[UV4]](s32), [[UV1]](s32) ; GFX9: [[INT11:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[INT7]](s32) ; GFX9: [[FNEG1:%[0-9]+]]:_(s32) = G_FNEG [[INT7]] - ; GFX9: S_SETREG_IMM32_B32 3, 2305 ; GFX9: [[FMA5:%[0-9]+]]:_(s32) = G_FMA [[FNEG1]], [[INT11]], [[C]] ; GFX9: [[FMA6:%[0-9]+]]:_(s32) = G_FMA [[FMA5]], [[INT11]], [[INT11]] ; GFX9: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[INT9]], [[FMA6]] ; GFX9: [[FMA7:%[0-9]+]]:_(s32) = G_FMA [[FNEG1]], [[FMUL1]], [[INT9]] ; GFX9: [[FMA8:%[0-9]+]]:_(s32) = G_FMA [[FMA7]], [[FMA6]], [[FMUL1]] ; GFX9: [[FMA9:%[0-9]+]]:_(s32) = G_FMA [[FNEG1]], [[FMA8]], [[INT9]] - ; GFX9: S_SETREG_IMM32_B32 0, 2305 ; GFX9: [[INT12:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[FMA9]](s32), [[FMA6]](s32), [[FMA8]](s32), [[INT10]](s1) ; GFX9: [[INT13:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[INT12]](s32), [[UV4]](s32), [[UV1]](s32) ; GFX9: [[INT14:%[0-9]+]]:_(s32), [[INT15:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[UV5]](s32), [[UV5]](s32), [[UV2]](s32) ; GFX9: [[INT16:%[0-9]+]]:_(s32), [[INT17:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[UV2]](s32), [[UV5]](s32), [[UV2]](s32) ; GFX9: [[INT18:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[INT14]](s32) ; GFX9: [[FNEG2:%[0-9]+]]:_(s32) = G_FNEG [[INT14]] - ; GFX9: S_SETREG_IMM32_B32 3, 2305 ; GFX9: [[FMA10:%[0-9]+]]:_(s32) = G_FMA [[FNEG2]], [[INT18]], [[C]] ; GFX9: [[FMA11:%[0-9]+]]:_(s32) = G_FMA [[FMA10]], [[INT18]], [[INT18]] ; GFX9: [[FMUL2:%[0-9]+]]:_(s32) = G_FMUL [[INT16]], [[FMA11]] ; GFX9: [[FMA12:%[0-9]+]]:_(s32) = G_FMA [[FNEG2]], [[FMUL2]], [[INT16]] ; GFX9: [[FMA13:%[0-9]+]]:_(s32) = G_FMA [[FMA12]], [[FMA11]], [[FMUL2]] ; GFX9: [[FMA14:%[0-9]+]]:_(s32) = G_FMA [[FNEG2]], [[FMA13]], [[INT16]] - ; GFX9: S_SETREG_IMM32_B32 0, 2305 ; GFX9: [[INT19:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[FMA14]](s32), [[FMA11]](s32), [[FMA13]](s32), [[INT17]](s1) ; GFX9: [[INT20:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[INT19]](s32), [[UV5]](s32), [[UV2]](s32) ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[INT6]](s32), [[INT13]](s32), [[INT20]](s32) @@ -780,42 +860,36 @@ ; GFX10: [[INT2:%[0-9]+]]:_(s32), [[INT3:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[UV]](s32), [[UV3]](s32), [[UV]](s32) ; GFX10: [[INT4:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[INT]](s32) ; GFX10: [[FNEG:%[0-9]+]]:_(s32) = G_FNEG [[INT]] - ; GFX10: S_DENORM_MODE 15 ; GFX10: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[FNEG]], [[INT4]], [[C]] ; GFX10: [[FMA1:%[0-9]+]]:_(s32) = G_FMA [[FMA]], [[INT4]], [[INT4]] ; GFX10: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[INT2]], [[FMA1]] ; GFX10: [[FMA2:%[0-9]+]]:_(s32) = G_FMA [[FNEG]], [[FMUL]], [[INT2]] ; GFX10: [[FMA3:%[0-9]+]]:_(s32) = G_FMA [[FMA2]], [[FMA1]], [[FMUL]] ; GFX10: [[FMA4:%[0-9]+]]:_(s32) = G_FMA [[FNEG]], [[FMA3]], [[INT2]] - ; GFX10: S_DENORM_MODE 12 ; GFX10: [[INT5:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[FMA4]](s32), [[FMA1]](s32), [[FMA3]](s32), [[INT3]](s1) ; GFX10: [[INT6:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[INT5]](s32), [[UV3]](s32), [[UV]](s32) ; GFX10: [[INT7:%[0-9]+]]:_(s32), [[INT8:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[UV4]](s32), [[UV4]](s32), [[UV1]](s32) ; GFX10: [[INT9:%[0-9]+]]:_(s32), [[INT10:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[UV1]](s32), [[UV4]](s32), [[UV1]](s32) ; GFX10: [[INT11:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[INT7]](s32) ; GFX10: [[FNEG1:%[0-9]+]]:_(s32) = G_FNEG [[INT7]] - ; GFX10: S_DENORM_MODE 15 ; GFX10: [[FMA5:%[0-9]+]]:_(s32) = G_FMA [[FNEG1]], [[INT11]], [[C]] ; GFX10: [[FMA6:%[0-9]+]]:_(s32) = G_FMA [[FMA5]], [[INT11]], [[INT11]] ; GFX10: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[INT9]], [[FMA6]] ; GFX10: [[FMA7:%[0-9]+]]:_(s32) = G_FMA [[FNEG1]], [[FMUL1]], [[INT9]] ; GFX10: [[FMA8:%[0-9]+]]:_(s32) = G_FMA [[FMA7]], [[FMA6]], [[FMUL1]] ; GFX10: [[FMA9:%[0-9]+]]:_(s32) = G_FMA [[FNEG1]], [[FMA8]], [[INT9]] - ; GFX10: S_DENORM_MODE 12 ; GFX10: [[INT12:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[FMA9]](s32), [[FMA6]](s32), [[FMA8]](s32), [[INT10]](s1) ; GFX10: [[INT13:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[INT12]](s32), [[UV4]](s32), [[UV1]](s32) ; GFX10: [[INT14:%[0-9]+]]:_(s32), [[INT15:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[UV5]](s32), [[UV5]](s32), [[UV2]](s32) ; GFX10: [[INT16:%[0-9]+]]:_(s32), [[INT17:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[UV2]](s32), [[UV5]](s32), [[UV2]](s32) ; GFX10: [[INT18:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[INT14]](s32) ; GFX10: [[FNEG2:%[0-9]+]]:_(s32) = G_FNEG [[INT14]] - ; GFX10: S_DENORM_MODE 15 ; GFX10: [[FMA10:%[0-9]+]]:_(s32) = G_FMA [[FNEG2]], [[INT18]], [[C]] ; GFX10: [[FMA11:%[0-9]+]]:_(s32) = G_FMA [[FMA10]], [[INT18]], [[INT18]] ; GFX10: [[FMUL2:%[0-9]+]]:_(s32) = G_FMUL [[INT16]], [[FMA11]] ; GFX10: [[FMA12:%[0-9]+]]:_(s32) = G_FMA [[FNEG2]], [[FMUL2]], [[INT16]] ; GFX10: [[FMA13:%[0-9]+]]:_(s32) = G_FMA [[FMA12]], [[FMA11]], [[FMUL2]] ; GFX10: [[FMA14:%[0-9]+]]:_(s32) = G_FMA [[FNEG2]], [[FMA13]], [[INT16]] - ; GFX10: S_DENORM_MODE 12 ; GFX10: [[INT19:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[FMA14]](s32), [[FMA11]](s32), [[FMA13]](s32), [[INT17]](s1) ; GFX10: [[INT20:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[INT19]](s32), [[UV5]](s32), [[UV2]](s32) ; GFX10: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[INT6]](s32), [[INT13]](s32), [[INT20]](s32) @@ -910,14 +984,12 @@ ; SI: [[INT2:%[0-9]+]]:_(s32), [[INT3:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[FPEXT]](s32), [[FPEXT1]](s32), [[FPEXT]](s32) ; SI: [[INT4:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[INT]](s32) ; SI: [[FNEG:%[0-9]+]]:_(s32) = G_FNEG [[INT]] - ; SI: S_SETREG_IMM32_B32 3, 2305 ; SI: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[FNEG]], [[INT4]], [[C1]] ; SI: [[FMA1:%[0-9]+]]:_(s32) = G_FMA [[FMA]], [[INT4]], [[INT4]] ; SI: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[INT2]], [[FMA1]] ; SI: [[FMA2:%[0-9]+]]:_(s32) = G_FMA [[FNEG]], [[FMUL]], [[INT2]] ; SI: [[FMA3:%[0-9]+]]:_(s32) = G_FMA [[FMA2]], [[FMA1]], [[FMUL]] ; SI: [[FMA4:%[0-9]+]]:_(s32) = G_FMA [[FNEG]], [[FMA3]], [[INT2]] - ; SI: S_SETREG_IMM32_B32 0, 2305 ; SI: [[INT5:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[FMA4]](s32), [[FMA1]](s32), [[FMA3]](s32), [[INT3]](s1) ; SI: [[INT6:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[INT5]](s32), [[FPEXT1]](s32), [[FPEXT]](s32) ; SI: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[INT6]](s32) @@ -927,14 +999,12 @@ ; SI: [[INT9:%[0-9]+]]:_(s32), [[INT10:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[FPEXT2]](s32), [[FPEXT3]](s32), [[FPEXT2]](s32) ; SI: [[INT11:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[INT7]](s32) ; SI: [[FNEG1:%[0-9]+]]:_(s32) = G_FNEG [[INT7]] - ; SI: S_SETREG_IMM32_B32 3, 2305 ; SI: [[FMA5:%[0-9]+]]:_(s32) = G_FMA [[FNEG1]], [[INT11]], [[C1]] ; SI: [[FMA6:%[0-9]+]]:_(s32) = G_FMA [[FMA5]], [[INT11]], [[INT11]] ; SI: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[INT9]], [[FMA6]] ; SI: [[FMA7:%[0-9]+]]:_(s32) = G_FMA [[FNEG1]], [[FMUL1]], [[INT9]] ; SI: [[FMA8:%[0-9]+]]:_(s32) = G_FMA [[FMA7]], [[FMA6]], [[FMUL1]] ; SI: [[FMA9:%[0-9]+]]:_(s32) = G_FMA [[FNEG1]], [[FMA8]], [[INT9]] - ; SI: S_SETREG_IMM32_B32 0, 2305 ; SI: [[INT12:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[FMA9]](s32), [[FMA6]](s32), [[FMA8]](s32), [[INT10]](s1) ; SI: [[INT13:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[INT12]](s32), [[FPEXT3]](s32), [[FPEXT2]](s32) ; SI: [[FPTRUNC1:%[0-9]+]]:_(s16) = G_FPTRUNC [[INT13]](s32) @@ -1081,14 +1151,12 @@ ; SI: [[INT2:%[0-9]+]]:_(s32), [[INT3:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[FPEXT]](s32), [[FPEXT1]](s32), [[FPEXT]](s32) ; SI: [[INT4:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[INT]](s32) ; SI: [[FNEG:%[0-9]+]]:_(s32) = G_FNEG [[INT]] - ; SI: S_SETREG_IMM32_B32 3, 2305 ; SI: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[FNEG]], [[INT4]], [[C1]] ; SI: [[FMA1:%[0-9]+]]:_(s32) = G_FMA [[FMA]], [[INT4]], [[INT4]] ; SI: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[INT2]], [[FMA1]] ; SI: [[FMA2:%[0-9]+]]:_(s32) = G_FMA [[FNEG]], [[FMUL]], [[INT2]] ; SI: [[FMA3:%[0-9]+]]:_(s32) = G_FMA [[FMA2]], [[FMA1]], [[FMUL]] ; SI: [[FMA4:%[0-9]+]]:_(s32) = G_FMA [[FNEG]], [[FMA3]], [[INT2]] - ; SI: S_SETREG_IMM32_B32 0, 2305 ; SI: [[INT5:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[FMA4]](s32), [[FMA1]](s32), [[FMA3]](s32), [[INT3]](s1) ; SI: [[INT6:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[INT5]](s32), [[FPEXT1]](s32), [[FPEXT]](s32) ; SI: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[INT6]](s32) @@ -1098,14 +1166,12 @@ ; SI: [[INT9:%[0-9]+]]:_(s32), [[INT10:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[FPEXT2]](s32), [[FPEXT3]](s32), [[FPEXT2]](s32) ; SI: [[INT11:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[INT7]](s32) ; SI: [[FNEG1:%[0-9]+]]:_(s32) = G_FNEG [[INT7]] - ; SI: S_SETREG_IMM32_B32 3, 2305 ; SI: [[FMA5:%[0-9]+]]:_(s32) = G_FMA [[FNEG1]], [[INT11]], [[C1]] ; SI: [[FMA6:%[0-9]+]]:_(s32) = G_FMA [[FMA5]], [[INT11]], [[INT11]] ; SI: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[INT9]], [[FMA6]] ; SI: [[FMA7:%[0-9]+]]:_(s32) = G_FMA [[FNEG1]], [[FMUL1]], [[INT9]] ; SI: [[FMA8:%[0-9]+]]:_(s32) = G_FMA [[FMA7]], [[FMA6]], [[FMUL1]] ; SI: [[FMA9:%[0-9]+]]:_(s32) = G_FMA [[FNEG1]], [[FMA8]], [[INT9]] - ; SI: S_SETREG_IMM32_B32 0, 2305 ; SI: [[INT12:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[FMA9]](s32), [[FMA6]](s32), [[FMA8]](s32), [[INT10]](s1) ; SI: [[INT13:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[INT12]](s32), [[FPEXT3]](s32), [[FPEXT2]](s32) ; SI: [[FPTRUNC1:%[0-9]+]]:_(s16) = G_FPTRUNC [[INT13]](s32) @@ -1115,14 +1181,12 @@ ; SI: [[INT16:%[0-9]+]]:_(s32), [[INT17:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[FPEXT4]](s32), [[FPEXT5]](s32), [[FPEXT4]](s32) ; SI: [[INT18:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[INT14]](s32) ; SI: [[FNEG2:%[0-9]+]]:_(s32) = G_FNEG [[INT14]] - ; SI: S_SETREG_IMM32_B32 3, 2305 ; SI: [[FMA10:%[0-9]+]]:_(s32) = G_FMA [[FNEG2]], [[INT18]], [[C1]] ; SI: [[FMA11:%[0-9]+]]:_(s32) = G_FMA [[FMA10]], [[INT18]], [[INT18]] ; SI: [[FMUL2:%[0-9]+]]:_(s32) = G_FMUL [[INT16]], [[FMA11]] ; SI: [[FMA12:%[0-9]+]]:_(s32) = G_FMA [[FNEG2]], [[FMUL2]], [[INT16]] ; SI: [[FMA13:%[0-9]+]]:_(s32) = G_FMA [[FMA12]], [[FMA11]], [[FMUL2]] ; SI: [[FMA14:%[0-9]+]]:_(s32) = G_FMA [[FNEG2]], [[FMA13]], [[INT16]] - ; SI: S_SETREG_IMM32_B32 0, 2305 ; SI: [[INT19:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[FMA14]](s32), [[FMA11]](s32), [[FMA13]](s32), [[INT17]](s1) ; SI: [[INT20:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[INT19]](s32), [[FPEXT5]](s32), [[FPEXT4]](s32) ; SI: [[FPTRUNC2:%[0-9]+]]:_(s16) = G_FPTRUNC [[INT20]](s32) @@ -1361,14 +1425,12 @@ ; SI: [[INT2:%[0-9]+]]:_(s32), [[INT3:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[FPEXT]](s32), [[FPEXT1]](s32), [[FPEXT]](s32) ; SI: [[INT4:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[INT]](s32) ; SI: [[FNEG:%[0-9]+]]:_(s32) = G_FNEG [[INT]] - ; SI: S_SETREG_IMM32_B32 3, 2305 ; SI: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[FNEG]], [[INT4]], [[C1]] ; SI: [[FMA1:%[0-9]+]]:_(s32) = G_FMA [[FMA]], [[INT4]], [[INT4]] ; SI: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[INT2]], [[FMA1]] ; SI: [[FMA2:%[0-9]+]]:_(s32) = G_FMA [[FNEG]], [[FMUL]], [[INT2]] ; SI: [[FMA3:%[0-9]+]]:_(s32) = G_FMA [[FMA2]], [[FMA1]], [[FMUL]] ; SI: [[FMA4:%[0-9]+]]:_(s32) = G_FMA [[FNEG]], [[FMA3]], [[INT2]] - ; SI: S_SETREG_IMM32_B32 0, 2305 ; SI: [[INT5:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[FMA4]](s32), [[FMA1]](s32), [[FMA3]](s32), [[INT3]](s1) ; SI: [[INT6:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[INT5]](s32), [[FPEXT1]](s32), [[FPEXT]](s32) ; SI: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[INT6]](s32) @@ -1378,14 +1440,12 @@ ; SI: [[INT9:%[0-9]+]]:_(s32), [[INT10:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[FPEXT2]](s32), [[FPEXT3]](s32), [[FPEXT2]](s32) ; SI: [[INT11:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[INT7]](s32) ; SI: [[FNEG1:%[0-9]+]]:_(s32) = G_FNEG [[INT7]] - ; SI: S_SETREG_IMM32_B32 3, 2305 ; SI: [[FMA5:%[0-9]+]]:_(s32) = G_FMA [[FNEG1]], [[INT11]], [[C1]] ; SI: [[FMA6:%[0-9]+]]:_(s32) = G_FMA [[FMA5]], [[INT11]], [[INT11]] ; SI: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[INT9]], [[FMA6]] ; SI: [[FMA7:%[0-9]+]]:_(s32) = G_FMA [[FNEG1]], [[FMUL1]], [[INT9]] ; SI: [[FMA8:%[0-9]+]]:_(s32) = G_FMA [[FMA7]], [[FMA6]], [[FMUL1]] ; SI: [[FMA9:%[0-9]+]]:_(s32) = G_FMA [[FNEG1]], [[FMA8]], [[INT9]] - ; SI: S_SETREG_IMM32_B32 0, 2305 ; SI: [[INT12:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[FMA9]](s32), [[FMA6]](s32), [[FMA8]](s32), [[INT10]](s1) ; SI: [[INT13:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[INT12]](s32), [[FPEXT3]](s32), [[FPEXT2]](s32) ; SI: [[FPTRUNC1:%[0-9]+]]:_(s16) = G_FPTRUNC [[INT13]](s32) @@ -1395,14 +1455,12 @@ ; SI: [[INT16:%[0-9]+]]:_(s32), [[INT17:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[FPEXT4]](s32), [[FPEXT5]](s32), [[FPEXT4]](s32) ; SI: [[INT18:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[INT14]](s32) ; SI: [[FNEG2:%[0-9]+]]:_(s32) = G_FNEG [[INT14]] - ; SI: S_SETREG_IMM32_B32 3, 2305 ; SI: [[FMA10:%[0-9]+]]:_(s32) = G_FMA [[FNEG2]], [[INT18]], [[C1]] ; SI: [[FMA11:%[0-9]+]]:_(s32) = G_FMA [[FMA10]], [[INT18]], [[INT18]] ; SI: [[FMUL2:%[0-9]+]]:_(s32) = G_FMUL [[INT16]], [[FMA11]] ; SI: [[FMA12:%[0-9]+]]:_(s32) = G_FMA [[FNEG2]], [[FMUL2]], [[INT16]] ; SI: [[FMA13:%[0-9]+]]:_(s32) = G_FMA [[FMA12]], [[FMA11]], [[FMUL2]] ; SI: [[FMA14:%[0-9]+]]:_(s32) = G_FMA [[FNEG2]], [[FMA13]], [[INT16]] - ; SI: S_SETREG_IMM32_B32 0, 2305 ; SI: [[INT19:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[FMA14]](s32), [[FMA11]](s32), [[FMA13]](s32), [[INT17]](s1) ; SI: [[INT20:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[INT19]](s32), [[FPEXT5]](s32), [[FPEXT4]](s32) ; SI: [[FPTRUNC2:%[0-9]+]]:_(s16) = G_FPTRUNC [[INT20]](s32) @@ -1412,14 +1470,12 @@ ; SI: [[INT23:%[0-9]+]]:_(s32), [[INT24:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[FPEXT6]](s32), [[FPEXT7]](s32), [[FPEXT6]](s32) ; SI: [[INT25:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[INT21]](s32) ; SI: [[FNEG3:%[0-9]+]]:_(s32) = G_FNEG [[INT21]] - ; SI: S_SETREG_IMM32_B32 3, 2305 ; SI: [[FMA15:%[0-9]+]]:_(s32) = G_FMA [[FNEG3]], [[INT25]], [[C1]] ; SI: [[FMA16:%[0-9]+]]:_(s32) = G_FMA [[FMA15]], [[INT25]], [[INT25]] ; SI: [[FMUL3:%[0-9]+]]:_(s32) = G_FMUL [[INT23]], [[FMA16]] ; SI: [[FMA17:%[0-9]+]]:_(s32) = G_FMA [[FNEG3]], [[FMUL3]], [[INT23]] ; SI: [[FMA18:%[0-9]+]]:_(s32) = G_FMA [[FMA17]], [[FMA16]], [[FMUL3]] ; SI: [[FMA19:%[0-9]+]]:_(s32) = G_FMA [[FNEG3]], [[FMA18]], [[INT23]] - ; SI: S_SETREG_IMM32_B32 0, 2305 ; SI: [[INT26:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[FMA19]](s32), [[FMA16]](s32), [[FMA18]](s32), [[INT24]](s1) ; SI: [[INT27:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[INT26]](s32), [[FPEXT7]](s32), [[FPEXT6]](s32) ; SI: [[FPTRUNC3:%[0-9]+]]:_(s16) = G_FPTRUNC [[INT27]](s32) @@ -1634,14 +1690,12 @@ ; SI: [[INT2:%[0-9]+]]:_(s32), [[INT3:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[FPEXT]](s32), [[FPEXT1]](s32), [[FPEXT]](s32) ; SI: [[INT4:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[INT]](s32) ; SI: [[FNEG:%[0-9]+]]:_(s32) = G_FNEG [[INT]] - ; SI: S_SETREG_IMM32_B32 3, 2305 ; SI: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[FNEG]], [[INT4]], [[C1]] ; SI: [[FMA1:%[0-9]+]]:_(s32) = G_FMA [[FMA]], [[INT4]], [[INT4]] ; SI: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[INT2]], [[FMA1]] ; SI: [[FMA2:%[0-9]+]]:_(s32) = G_FMA [[FNEG]], [[FMUL]], [[INT2]] ; SI: [[FMA3:%[0-9]+]]:_(s32) = G_FMA [[FMA2]], [[FMA1]], [[FMUL]] ; SI: [[FMA4:%[0-9]+]]:_(s32) = G_FMA [[FNEG]], [[FMA3]], [[INT2]] - ; SI: S_SETREG_IMM32_B32 0, 2305 ; SI: [[INT5:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[FMA4]](s32), [[FMA1]](s32), [[FMA3]](s32), [[INT3]](s1) ; SI: [[INT6:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[INT5]](s32), [[FPEXT1]](s32), [[FPEXT]](s32) ; SI: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[INT6]](s32) @@ -1696,14 +1750,12 @@ ; SI: [[INT2:%[0-9]+]]:_(s32), [[INT3:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[FPEXT]](s32), [[FPEXT1]](s32), [[FPEXT]](s32) ; SI: [[INT4:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[INT]](s32) ; SI: [[FNEG:%[0-9]+]]:_(s32) = G_FNEG [[INT]] - ; SI: S_SETREG_IMM32_B32 3, 2305 ; SI: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[FNEG]], [[INT4]], [[C1]] ; SI: [[FMA1:%[0-9]+]]:_(s32) = G_FMA [[FMA]], [[INT4]], [[INT4]] ; SI: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[INT2]], [[FMA1]] ; SI: [[FMA2:%[0-9]+]]:_(s32) = G_FMA [[FNEG]], [[FMUL]], [[INT2]] ; SI: [[FMA3:%[0-9]+]]:_(s32) = G_FMA [[FMA2]], [[FMA1]], [[FMUL]] ; SI: [[FMA4:%[0-9]+]]:_(s32) = G_FMA [[FNEG]], [[FMA3]], [[INT2]] - ; SI: S_SETREG_IMM32_B32 0, 2305 ; SI: [[INT5:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[FMA4]](s32), [[FMA1]](s32), [[FMA3]](s32), [[INT3]](s1) ; SI: [[INT6:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[INT5]](s32), [[FPEXT1]](s32), [[FPEXT]](s32) ; SI: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[INT6]](s32) @@ -1752,25 +1804,73 @@ liveins: $vgpr0 ; SI-LABEL: name: test_fdiv_s32_constant_one_rcp + ; SI: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 1.000000e+00 ; SI: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; SI: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[COPY]](s32) - ; SI: $vgpr0 = COPY [[INT]](s32) + ; SI: [[INT:%[0-9]+]]:_(s32), [[INT1:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[COPY]](s32), [[COPY]](s32), [[C]](s32) + ; SI: [[INT2:%[0-9]+]]:_(s32), [[INT3:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[C]](s32), [[COPY]](s32), [[C]](s32) + ; SI: [[INT4:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[INT]](s32) + ; SI: [[FNEG:%[0-9]+]]:_(s32) = G_FNEG [[INT]] + ; SI: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[FNEG]], [[INT4]], [[C]] + ; SI: [[FMA1:%[0-9]+]]:_(s32) = G_FMA [[FMA]], [[INT4]], [[INT4]] + ; SI: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[INT2]], [[FMA1]] + ; SI: [[FMA2:%[0-9]+]]:_(s32) = G_FMA [[FNEG]], [[FMUL]], [[INT2]] + ; SI: [[FMA3:%[0-9]+]]:_(s32) = G_FMA [[FMA2]], [[FMA1]], [[FMUL]] + ; SI: [[FMA4:%[0-9]+]]:_(s32) = G_FMA [[FNEG]], [[FMA3]], [[INT2]] + ; SI: [[INT5:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[FMA4]](s32), [[FMA1]](s32), [[FMA3]](s32), [[INT3]](s1) + ; SI: [[INT6:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[INT5]](s32), [[COPY]](s32), [[C]](s32) + ; SI: $vgpr0 = COPY [[INT6]](s32) ; VI-LABEL: name: test_fdiv_s32_constant_one_rcp + ; VI: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 1.000000e+00 ; VI: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; VI: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[COPY]](s32) - ; VI: $vgpr0 = COPY [[INT]](s32) + ; VI: [[INT:%[0-9]+]]:_(s32), [[INT1:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[COPY]](s32), [[COPY]](s32), [[C]](s32) + ; VI: [[INT2:%[0-9]+]]:_(s32), [[INT3:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[C]](s32), [[COPY]](s32), [[C]](s32) + ; VI: [[INT4:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[INT]](s32) + ; VI: [[FNEG:%[0-9]+]]:_(s32) = G_FNEG [[INT]] + ; VI: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[FNEG]], [[INT4]], [[C]] + ; VI: [[FMA1:%[0-9]+]]:_(s32) = G_FMA [[FMA]], [[INT4]], [[INT4]] + ; VI: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[INT2]], [[FMA1]] + ; VI: [[FMA2:%[0-9]+]]:_(s32) = G_FMA [[FNEG]], [[FMUL]], [[INT2]] + ; VI: [[FMA3:%[0-9]+]]:_(s32) = G_FMA [[FMA2]], [[FMA1]], [[FMUL]] + ; VI: [[FMA4:%[0-9]+]]:_(s32) = G_FMA [[FNEG]], [[FMA3]], [[INT2]] + ; VI: [[INT5:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[FMA4]](s32), [[FMA1]](s32), [[FMA3]](s32), [[INT3]](s1) + ; VI: [[INT6:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[INT5]](s32), [[COPY]](s32), [[C]](s32) + ; VI: $vgpr0 = COPY [[INT6]](s32) ; GFX9-LABEL: name: test_fdiv_s32_constant_one_rcp + ; GFX9: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 1.000000e+00 ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[COPY]](s32) - ; GFX9: $vgpr0 = COPY [[INT]](s32) + ; GFX9: [[INT:%[0-9]+]]:_(s32), [[INT1:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[COPY]](s32), [[COPY]](s32), [[C]](s32) + ; GFX9: [[INT2:%[0-9]+]]:_(s32), [[INT3:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[C]](s32), [[COPY]](s32), [[C]](s32) + ; GFX9: [[INT4:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[INT]](s32) + ; GFX9: [[FNEG:%[0-9]+]]:_(s32) = G_FNEG [[INT]] + ; GFX9: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[FNEG]], [[INT4]], [[C]] + ; GFX9: [[FMA1:%[0-9]+]]:_(s32) = G_FMA [[FMA]], [[INT4]], [[INT4]] + ; GFX9: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[INT2]], [[FMA1]] + ; GFX9: [[FMA2:%[0-9]+]]:_(s32) = G_FMA [[FNEG]], [[FMUL]], [[INT2]] + ; GFX9: [[FMA3:%[0-9]+]]:_(s32) = G_FMA [[FMA2]], [[FMA1]], [[FMUL]] + ; GFX9: [[FMA4:%[0-9]+]]:_(s32) = G_FMA [[FNEG]], [[FMA3]], [[INT2]] + ; GFX9: [[INT5:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[FMA4]](s32), [[FMA1]](s32), [[FMA3]](s32), [[INT3]](s1) + ; GFX9: [[INT6:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[INT5]](s32), [[COPY]](s32), [[C]](s32) + ; GFX9: $vgpr0 = COPY [[INT6]](s32) ; GFX9-UNSAFE-LABEL: name: test_fdiv_s32_constant_one_rcp ; GFX9-UNSAFE: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX9-UNSAFE: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[COPY]](s32) ; GFX9-UNSAFE: $vgpr0 = COPY [[INT]](s32) ; GFX10-LABEL: name: test_fdiv_s32_constant_one_rcp + ; GFX10: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 1.000000e+00 ; GFX10: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[COPY]](s32) - ; GFX10: $vgpr0 = COPY [[INT]](s32) + ; GFX10: [[INT:%[0-9]+]]:_(s32), [[INT1:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[COPY]](s32), [[COPY]](s32), [[C]](s32) + ; GFX10: [[INT2:%[0-9]+]]:_(s32), [[INT3:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[C]](s32), [[COPY]](s32), [[C]](s32) + ; GFX10: [[INT4:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[INT]](s32) + ; GFX10: [[FNEG:%[0-9]+]]:_(s32) = G_FNEG [[INT]] + ; GFX10: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[FNEG]], [[INT4]], [[C]] + ; GFX10: [[FMA1:%[0-9]+]]:_(s32) = G_FMA [[FMA]], [[INT4]], [[INT4]] + ; GFX10: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[INT2]], [[FMA1]] + ; GFX10: [[FMA2:%[0-9]+]]:_(s32) = G_FMA [[FNEG]], [[FMUL]], [[INT2]] + ; GFX10: [[FMA3:%[0-9]+]]:_(s32) = G_FMA [[FMA2]], [[FMA1]], [[FMUL]] + ; GFX10: [[FMA4:%[0-9]+]]:_(s32) = G_FMA [[FNEG]], [[FMA3]], [[INT2]] + ; GFX10: [[INT5:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[FMA4]](s32), [[FMA1]](s32), [[FMA3]](s32), [[INT3]](s1) + ; GFX10: [[INT6:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[INT5]](s32), [[COPY]](s32), [[C]](s32) + ; GFX10: $vgpr0 = COPY [[INT6]](s32) %0:_(s32) = G_FCONSTANT float 1.0 %1:_(s32) = COPY $vgpr0 %2:_(s32) = G_FDIV %0, %1 @@ -1784,30 +1884,78 @@ liveins: $vgpr0 ; SI-LABEL: name: test_fdiv_s32_constant_negative_one_rcp + ; SI: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float -1.000000e+00 ; SI: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; SI: [[FNEG:%[0-9]+]]:_(s32) = G_FNEG [[COPY]] - ; SI: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[FNEG]](s32) - ; SI: $vgpr0 = COPY [[INT]](s32) + ; SI: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 1.000000e+00 + ; SI: [[INT:%[0-9]+]]:_(s32), [[INT1:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[COPY]](s32), [[COPY]](s32), [[C]](s32) + ; SI: [[INT2:%[0-9]+]]:_(s32), [[INT3:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[C]](s32), [[COPY]](s32), [[C]](s32) + ; SI: [[INT4:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[INT]](s32) + ; SI: [[FNEG:%[0-9]+]]:_(s32) = G_FNEG [[INT]] + ; SI: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[FNEG]], [[INT4]], [[C1]] + ; SI: [[FMA1:%[0-9]+]]:_(s32) = G_FMA [[FMA]], [[INT4]], [[INT4]] + ; SI: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[INT2]], [[FMA1]] + ; SI: [[FMA2:%[0-9]+]]:_(s32) = G_FMA [[FNEG]], [[FMUL]], [[INT2]] + ; SI: [[FMA3:%[0-9]+]]:_(s32) = G_FMA [[FMA2]], [[FMA1]], [[FMUL]] + ; SI: [[FMA4:%[0-9]+]]:_(s32) = G_FMA [[FNEG]], [[FMA3]], [[INT2]] + ; SI: [[INT5:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[FMA4]](s32), [[FMA1]](s32), [[FMA3]](s32), [[INT3]](s1) + ; SI: [[INT6:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[INT5]](s32), [[COPY]](s32), [[C]](s32) + ; SI: $vgpr0 = COPY [[INT6]](s32) ; VI-LABEL: name: test_fdiv_s32_constant_negative_one_rcp + ; VI: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float -1.000000e+00 ; VI: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; VI: [[FNEG:%[0-9]+]]:_(s32) = G_FNEG [[COPY]] - ; VI: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[FNEG]](s32) - ; VI: $vgpr0 = COPY [[INT]](s32) + ; VI: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 1.000000e+00 + ; VI: [[INT:%[0-9]+]]:_(s32), [[INT1:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[COPY]](s32), [[COPY]](s32), [[C]](s32) + ; VI: [[INT2:%[0-9]+]]:_(s32), [[INT3:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[C]](s32), [[COPY]](s32), [[C]](s32) + ; VI: [[INT4:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[INT]](s32) + ; VI: [[FNEG:%[0-9]+]]:_(s32) = G_FNEG [[INT]] + ; VI: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[FNEG]], [[INT4]], [[C1]] + ; VI: [[FMA1:%[0-9]+]]:_(s32) = G_FMA [[FMA]], [[INT4]], [[INT4]] + ; VI: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[INT2]], [[FMA1]] + ; VI: [[FMA2:%[0-9]+]]:_(s32) = G_FMA [[FNEG]], [[FMUL]], [[INT2]] + ; VI: [[FMA3:%[0-9]+]]:_(s32) = G_FMA [[FMA2]], [[FMA1]], [[FMUL]] + ; VI: [[FMA4:%[0-9]+]]:_(s32) = G_FMA [[FNEG]], [[FMA3]], [[INT2]] + ; VI: [[INT5:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[FMA4]](s32), [[FMA1]](s32), [[FMA3]](s32), [[INT3]](s1) + ; VI: [[INT6:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[INT5]](s32), [[COPY]](s32), [[C]](s32) + ; VI: $vgpr0 = COPY [[INT6]](s32) ; GFX9-LABEL: name: test_fdiv_s32_constant_negative_one_rcp + ; GFX9: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float -1.000000e+00 ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9: [[FNEG:%[0-9]+]]:_(s32) = G_FNEG [[COPY]] - ; GFX9: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[FNEG]](s32) - ; GFX9: $vgpr0 = COPY [[INT]](s32) + ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 1.000000e+00 + ; GFX9: [[INT:%[0-9]+]]:_(s32), [[INT1:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[COPY]](s32), [[COPY]](s32), [[C]](s32) + ; GFX9: [[INT2:%[0-9]+]]:_(s32), [[INT3:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[C]](s32), [[COPY]](s32), [[C]](s32) + ; GFX9: [[INT4:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[INT]](s32) + ; GFX9: [[FNEG:%[0-9]+]]:_(s32) = G_FNEG [[INT]] + ; GFX9: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[FNEG]], [[INT4]], [[C1]] + ; GFX9: [[FMA1:%[0-9]+]]:_(s32) = G_FMA [[FMA]], [[INT4]], [[INT4]] + ; GFX9: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[INT2]], [[FMA1]] + ; GFX9: [[FMA2:%[0-9]+]]:_(s32) = G_FMA [[FNEG]], [[FMUL]], [[INT2]] + ; GFX9: [[FMA3:%[0-9]+]]:_(s32) = G_FMA [[FMA2]], [[FMA1]], [[FMUL]] + ; GFX9: [[FMA4:%[0-9]+]]:_(s32) = G_FMA [[FNEG]], [[FMA3]], [[INT2]] + ; GFX9: [[INT5:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[FMA4]](s32), [[FMA1]](s32), [[FMA3]](s32), [[INT3]](s1) + ; GFX9: [[INT6:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[INT5]](s32), [[COPY]](s32), [[C]](s32) + ; GFX9: $vgpr0 = COPY [[INT6]](s32) ; GFX9-UNSAFE-LABEL: name: test_fdiv_s32_constant_negative_one_rcp ; GFX9-UNSAFE: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX9-UNSAFE: [[FNEG:%[0-9]+]]:_(s32) = G_FNEG [[COPY]] ; GFX9-UNSAFE: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[FNEG]](s32) ; GFX9-UNSAFE: $vgpr0 = COPY [[INT]](s32) ; GFX10-LABEL: name: test_fdiv_s32_constant_negative_one_rcp + ; GFX10: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float -1.000000e+00 ; GFX10: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10: [[FNEG:%[0-9]+]]:_(s32) = G_FNEG [[COPY]] - ; GFX10: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[FNEG]](s32) - ; GFX10: $vgpr0 = COPY [[INT]](s32) + ; GFX10: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 1.000000e+00 + ; GFX10: [[INT:%[0-9]+]]:_(s32), [[INT1:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[COPY]](s32), [[COPY]](s32), [[C]](s32) + ; GFX10: [[INT2:%[0-9]+]]:_(s32), [[INT3:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[C]](s32), [[COPY]](s32), [[C]](s32) + ; GFX10: [[INT4:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[INT]](s32) + ; GFX10: [[FNEG:%[0-9]+]]:_(s32) = G_FNEG [[INT]] + ; GFX10: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[FNEG]], [[INT4]], [[C1]] + ; GFX10: [[FMA1:%[0-9]+]]:_(s32) = G_FMA [[FMA]], [[INT4]], [[INT4]] + ; GFX10: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[INT2]], [[FMA1]] + ; GFX10: [[FMA2:%[0-9]+]]:_(s32) = G_FMA [[FNEG]], [[FMUL]], [[INT2]] + ; GFX10: [[FMA3:%[0-9]+]]:_(s32) = G_FMA [[FMA2]], [[FMA1]], [[FMUL]] + ; GFX10: [[FMA4:%[0-9]+]]:_(s32) = G_FMA [[FNEG]], [[FMA3]], [[INT2]] + ; GFX10: [[INT5:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[FMA4]](s32), [[FMA1]](s32), [[FMA3]](s32), [[INT3]](s1) + ; GFX10: [[INT6:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[INT5]](s32), [[COPY]](s32), [[C]](s32) + ; GFX10: $vgpr0 = COPY [[INT6]](s32) %0:_(s32) = G_FCONSTANT float -1.0 %1:_(s32) = COPY $vgpr0 %2:_(s32) = G_FDIV %0, %1 @@ -1816,6 +1964,11 @@ --- name: test_fdiv_s64_constant_one_rcp +machineFunctionInfo: + mode: + fp32-denormals: false + fp64-fp16-denormals: true + body: | bb.0: liveins: $vgpr0_vgpr1 @@ -1852,6 +2005,11 @@ --- name: test_fdiv_s64_constant_negative_one_rcp +machineFunctionInfo: + mode: + fp32-denormals: false + fp64-fp16-denormals: true + body: | bb.0: liveins: $vgpr0_vgpr1 Index: llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fmad.mir =================================================================== --- llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fmad.mir +++ /dev/null @@ -1,181 +0,0 @@ -# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py -# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=tahiti -mattr=+fp32-denormals -run-pass=legalizer %s -o - | FileCheck -check-prefix=F32DENORM %s -# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=tahiti -mattr=-fp32-denormals -run-pass=legalizer %s -o - | FileCheck -check-prefix=F32FLUSH %s -# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -mattr=+fp32-denormals -run-pass=legalizer %s -o - | FileCheck -check-prefix=F32DENORM %s -# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -mattr=-fp32-denormals -run-pass=legalizer %s -o - | FileCheck -check-prefix=F32FLUSH %s -# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -mattr=+fp32-denormals -run-pass=legalizer %s -o - | FileCheck -check-prefix=F32DENORM %s -# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -mattr=-fp32-denormals -run-pass=legalizer %s -o - | FileCheck -check-prefix=F32FLUSH %s -# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx10 -mattr=+fp32-denormals -run-pass=legalizer %s -o - | FileCheck -check-prefix=F32DENORM %s -# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx10 -mattr=-fp32-denormals -run-pass=legalizer %s -o - | FileCheck -check-prefix=F32FLUSH %s - ---- -name: test_fmad_s32 -body: | - bb.0: - liveins: $vgpr0, $vgpr1, $vgpr2 - - ; F32DENORM-LABEL: name: test_fmad_s32 - ; F32DENORM: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; F32DENORM: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; F32DENORM: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; F32DENORM: $vgpr0 = COPY %3(s32) - ; F32DENORM: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[COPY]], [[COPY1]] - ; F32DENORM: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[FMUL]], [[COPY2]] - ; F32FLUSH-LABEL: name: test_fmad_s32 - ; F32FLUSH: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; F32FLUSH: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; F32FLUSH: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; F32FLUSH: [[FMAD:%[0-9]+]]:_(s32) = G_FMAD [[COPY]], [[COPY1]], [[COPY2]] - ; F32FLUSH: $vgpr0 = COPY [[FMAD]](s32) - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = COPY $vgpr1 - %2:_(s32) = COPY $vgpr2 - %3:_(s32) = G_FMAD %0, %1, %2 - $vgpr0 = COPY %3 -... - ---- -name: test_fmad_s32_flags -body: | - bb.0: - liveins: $vgpr0, $vgpr1, $vgpr2 - - ; F32DENORM-LABEL: name: test_fmad_s32_flags - ; F32DENORM: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; F32DENORM: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; F32DENORM: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; F32DENORM: $vgpr0 = COPY %3(s32) - ; F32DENORM: %4:_(s32) = nnan G_FMUL [[COPY]], [[COPY1]] - ; F32DENORM: %3:_(s32) = nnan G_FADD %4, [[COPY2]] - ; F32FLUSH-LABEL: name: test_fmad_s32_flags - ; F32FLUSH: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; F32FLUSH: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; F32FLUSH: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; F32FLUSH: %3:_(s32) = nnan G_FMAD [[COPY]], [[COPY1]], [[COPY2]] - ; F32FLUSH: $vgpr0 = COPY %3(s32) - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = COPY $vgpr1 - %2:_(s32) = COPY $vgpr2 - %3:_(s32) = nnan G_FMAD %0, %1, %2 - $vgpr0 = COPY %3 -... - ---- -name: test_fmad_v2s32 -body: | - bb.0: - liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $vgpr4_vgpr5 - - ; F32DENORM-LABEL: name: test_fmad_v2s32 - ; F32DENORM: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 - ; F32DENORM: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 - ; F32DENORM: [[COPY2:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr4_vgpr5 - ; F32DENORM: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) - ; F32DENORM: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) - ; F32DENORM: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY2]](<2 x s32>) - ; F32DENORM: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR %10(s32), %11(s32) - ; F32DENORM: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) - ; F32DENORM: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[UV1]], [[UV3]] - ; F32DENORM: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[FMUL]], [[UV5]] - ; F32DENORM: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[UV]], [[UV2]] - ; F32DENORM: [[FADD1:%[0-9]+]]:_(s32) = G_FADD [[FMUL1]], [[UV4]] - ; F32FLUSH-LABEL: name: test_fmad_v2s32 - ; F32FLUSH: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 - ; F32FLUSH: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 - ; F32FLUSH: [[COPY2:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr4_vgpr5 - ; F32FLUSH: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) - ; F32FLUSH: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) - ; F32FLUSH: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY2]](<2 x s32>) - ; F32FLUSH: [[FMAD:%[0-9]+]]:_(s32) = G_FMAD [[UV]], [[UV2]], [[UV4]] - ; F32FLUSH: [[FMAD1:%[0-9]+]]:_(s32) = G_FMAD [[UV1]], [[UV3]], [[UV5]] - ; F32FLUSH: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[FMAD]](s32), [[FMAD1]](s32) - ; F32FLUSH: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) - %0:_(<2 x s32>) = COPY $vgpr0_vgpr1 - %1:_(<2 x s32>) = COPY $vgpr2_vgpr3 - %2:_(<2 x s32>) = COPY $vgpr4_vgpr5 - %3:_(<2 x s32>) = G_FMAD %0, %1, %2 - $vgpr0_vgpr1 = COPY %3 -... - ---- -name: test_fmad_v3s32 -body: | - bb.0: - liveins: $vgpr0_vgpr1_vgpr2, $vgpr3_vgpr4_vgpr5, $vgpr6_vgpr7_vgpr8 - - ; F32DENORM-LABEL: name: test_fmad_v3s32 - ; F32DENORM: [[COPY:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2 - ; F32DENORM: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr3_vgpr4_vgpr5 - ; F32DENORM: [[COPY2:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr6_vgpr7_vgpr8 - ; F32DENORM: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<3 x s32>) - ; F32DENORM: [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<3 x s32>) - ; F32DENORM: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY2]](<3 x s32>) - ; F32DENORM: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR %13(s32), %14(s32), %15(s32) - ; F32DENORM: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>) - ; F32DENORM: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[UV2]], [[UV5]] - ; F32DENORM: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[FMUL]], [[UV8]] - ; F32DENORM: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[UV1]], [[UV4]] - ; F32DENORM: [[FADD1:%[0-9]+]]:_(s32) = G_FADD [[FMUL1]], [[UV7]] - ; F32DENORM: [[FMUL2:%[0-9]+]]:_(s32) = G_FMUL [[UV]], [[UV3]] - ; F32DENORM: [[FADD2:%[0-9]+]]:_(s32) = G_FADD [[FMUL2]], [[UV6]] - ; F32FLUSH-LABEL: name: test_fmad_v3s32 - ; F32FLUSH: [[COPY:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2 - ; F32FLUSH: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr3_vgpr4_vgpr5 - ; F32FLUSH: [[COPY2:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr6_vgpr7_vgpr8 - ; F32FLUSH: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<3 x s32>) - ; F32FLUSH: [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<3 x s32>) - ; F32FLUSH: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY2]](<3 x s32>) - ; F32FLUSH: [[FMAD:%[0-9]+]]:_(s32) = G_FMAD [[UV]], [[UV3]], [[UV6]] - ; F32FLUSH: [[FMAD1:%[0-9]+]]:_(s32) = G_FMAD [[UV1]], [[UV4]], [[UV7]] - ; F32FLUSH: [[FMAD2:%[0-9]+]]:_(s32) = G_FMAD [[UV2]], [[UV5]], [[UV8]] - ; F32FLUSH: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[FMAD]](s32), [[FMAD1]](s32), [[FMAD2]](s32) - ; F32FLUSH: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>) - %0:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2 - %1:_(<3 x s32>) = COPY $vgpr3_vgpr4_vgpr5 - %2:_(<3 x s32>) = COPY $vgpr6_vgpr7_vgpr8 - %3:_(<3 x s32>) = G_FMAD %0, %1, %2 - $vgpr0_vgpr1_vgpr2 = COPY %3 -... - ---- -name: test_fmad_v4s32 -body: | - bb.0: - liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4_vgpr5_vgpr6_vgpr7, $vgpr8_vgpr9_vgpr10_vgpr11 - - ; F32DENORM-LABEL: name: test_fmad_v4s32 - ; F32DENORM: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - ; F32DENORM: [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 - ; F32DENORM: [[COPY2:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr8_vgpr9_vgpr10_vgpr11 - ; F32DENORM: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<4 x s32>) - ; F32DENORM: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<4 x s32>) - ; F32DENORM: [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY2]](<4 x s32>) - ; F32DENORM: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR %16(s32), %17(s32), %18(s32), %19(s32) - ; F32DENORM: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>) - ; F32DENORM: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[UV3]], [[UV7]] - ; F32DENORM: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[FMUL]], [[UV11]] - ; F32DENORM: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[UV2]], [[UV6]] - ; F32DENORM: [[FADD1:%[0-9]+]]:_(s32) = G_FADD [[FMUL1]], [[UV10]] - ; F32DENORM: [[FMUL2:%[0-9]+]]:_(s32) = G_FMUL [[UV1]], [[UV5]] - ; F32DENORM: [[FADD2:%[0-9]+]]:_(s32) = G_FADD [[FMUL2]], [[UV9]] - ; F32DENORM: [[FMUL3:%[0-9]+]]:_(s32) = G_FMUL [[UV]], [[UV4]] - ; F32DENORM: [[FADD3:%[0-9]+]]:_(s32) = G_FADD [[FMUL3]], [[UV8]] - ; F32FLUSH-LABEL: name: test_fmad_v4s32 - ; F32FLUSH: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - ; F32FLUSH: [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 - ; F32FLUSH: [[COPY2:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr8_vgpr9_vgpr10_vgpr11 - ; F32FLUSH: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<4 x s32>) - ; F32FLUSH: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<4 x s32>) - ; F32FLUSH: [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY2]](<4 x s32>) - ; F32FLUSH: [[FMAD:%[0-9]+]]:_(s32) = G_FMAD [[UV]], [[UV4]], [[UV8]] - ; F32FLUSH: [[FMAD1:%[0-9]+]]:_(s32) = G_FMAD [[UV1]], [[UV5]], [[UV9]] - ; F32FLUSH: [[FMAD2:%[0-9]+]]:_(s32) = G_FMAD [[UV2]], [[UV6]], [[UV10]] - ; F32FLUSH: [[FMAD3:%[0-9]+]]:_(s32) = G_FMAD [[UV3]], [[UV7]], [[UV11]] - ; F32FLUSH: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[FMAD]](s32), [[FMAD1]](s32), [[FMAD2]](s32), [[FMAD3]](s32) - ; F32FLUSH: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>) - %0:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - %1:_(<4 x s32>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 - %2:_(<4 x s32>) = COPY $vgpr8_vgpr9_vgpr10_vgpr11 - %3:_(<4 x s32>) = G_FMAD %0, %1, %2 - $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %3 -... Index: llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fmad.s16.mir =================================================================== --- llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fmad.s16.mir +++ llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fmad.s16.mir @@ -1,75 +1,46 @@ # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py -# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=tahiti -mattr=+fp64-fp16-denormals -run-pass=legalizer %s -o - | FileCheck -check-prefix=SI-F16DENORM %s -# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=tahiti -mattr=-fp64-fp16-denormals -run-pass=legalizer %s -o - | FileCheck -check-prefix=SI-F16FLUSH %s -# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -mattr=+fp64-fp16-denormals -run-pass=legalizer %s -o - | FileCheck -check-prefix=VI-F16DENORM %s -# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -mattr=-fp64-fp16-denormals -run-pass=legalizer %s -o - | FileCheck -check-prefix=VI-F16FLUSH %s -# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -mattr=+fp64-fp16-denormals -run-pass=legalizer %s -o - | FileCheck -check-prefix=VI-F16DENORM %s -# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -mattr=-fp64-fp16-denormals -run-pass=legalizer %s -o - | FileCheck -check-prefix=VI-F16FLUSH %s -# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 -mattr=+fp64-fp16-denormals -run-pass=legalizer %s -o - | FileCheck -check-prefix=GFX10 %s -# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 -mattr=-fp64-fp16-denormals -run-pass=legalizer %s -o - | FileCheck -check-prefix=GFX10 %s +# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=tahiti -run-pass=legalizer %s -o - | FileCheck -check-prefix=GFX6 %s +# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -run-pass=legalizer %s -o - | FileCheck -check-prefix=GFX7 %s +# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 -run-pass=legalizer %s -o - | FileCheck -check-prefix=GFX10 %s --- -name: test_fmad_s16 +name: test_fmad_s16_flush +machineFunctionInfo: + mode: + fp64-fp16-denormals: false + body: | bb.0: liveins: $vgpr0, $vgpr1, $vgpr2 - ; SI-F16DENORM-LABEL: name: test_fmad_s16 - ; SI-F16DENORM: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; SI-F16DENORM: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; SI-F16DENORM: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; SI-F16DENORM: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; SI-F16DENORM: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) - ; SI-F16DENORM: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s32) - ; SI-F16DENORM: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC]](s16) - ; SI-F16DENORM: [[FPEXT1:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC1]](s16) - ; SI-F16DENORM: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[FPEXT]], [[FPEXT1]] - ; SI-F16DENORM: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[FMUL]](s32) - ; SI-F16DENORM: [[FPEXT2:%[0-9]+]]:_(s32) = G_FPEXT [[FPTRUNC]](s16) - ; SI-F16DENORM: [[FPEXT3:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC2]](s16) - ; SI-F16DENORM: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[FPEXT2]], [[FPEXT3]] - ; SI-F16DENORM: [[FPTRUNC1:%[0-9]+]]:_(s16) = G_FPTRUNC [[FADD]](s32) - ; SI-F16DENORM: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FPTRUNC1]](s16) - ; SI-F16DENORM: $vgpr0 = COPY [[ANYEXT]](s32) - ; SI-F16FLUSH-LABEL: name: test_fmad_s16 - ; SI-F16FLUSH: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; SI-F16FLUSH: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; SI-F16FLUSH: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; SI-F16FLUSH: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; SI-F16FLUSH: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) - ; SI-F16FLUSH: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s32) - ; SI-F16FLUSH: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC]](s16) - ; SI-F16FLUSH: [[FPEXT1:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC1]](s16) - ; SI-F16FLUSH: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[FPEXT]], [[FPEXT1]] - ; SI-F16FLUSH: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[FMUL]](s32) - ; SI-F16FLUSH: [[FPEXT2:%[0-9]+]]:_(s32) = G_FPEXT [[FPTRUNC]](s16) - ; SI-F16FLUSH: [[FPEXT3:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC2]](s16) - ; SI-F16FLUSH: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[FPEXT2]], [[FPEXT3]] - ; SI-F16FLUSH: [[FPTRUNC1:%[0-9]+]]:_(s16) = G_FPTRUNC [[FADD]](s32) - ; SI-F16FLUSH: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FPTRUNC1]](s16) - ; SI-F16FLUSH: $vgpr0 = COPY [[ANYEXT]](s32) - ; VI-F16DENORM-LABEL: name: test_fmad_s16 - ; VI-F16DENORM: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; VI-F16DENORM: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; VI-F16DENORM: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; VI-F16DENORM: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; VI-F16DENORM: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) - ; VI-F16DENORM: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s32) - ; VI-F16DENORM: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT %6(s16) - ; VI-F16DENORM: $vgpr0 = COPY [[ANYEXT]](s32) - ; VI-F16DENORM: [[FMUL:%[0-9]+]]:_(s16) = G_FMUL [[TRUNC]], [[TRUNC1]] - ; VI-F16DENORM: [[FADD:%[0-9]+]]:_(s16) = G_FADD [[FMUL]], [[TRUNC2]] - ; VI-F16FLUSH-LABEL: name: test_fmad_s16 - ; VI-F16FLUSH: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; VI-F16FLUSH: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; VI-F16FLUSH: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; VI-F16FLUSH: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; VI-F16FLUSH: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) - ; VI-F16FLUSH: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s32) - ; VI-F16FLUSH: [[FMAD:%[0-9]+]]:_(s16) = G_FMAD [[TRUNC]], [[TRUNC1]], [[TRUNC2]] - ; VI-F16FLUSH: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FMAD]](s16) - ; VI-F16FLUSH: $vgpr0 = COPY [[ANYEXT]](s32) - ; GFX10-LABEL: name: test_fmad_s16 + ; GFX6-LABEL: name: test_fmad_s16_flush + ; GFX6: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX6: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX6: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX6: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) + ; GFX6: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) + ; GFX6: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s32) + ; GFX6: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC]](s16) + ; GFX6: [[FPEXT1:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC1]](s16) + ; GFX6: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[FPEXT]], [[FPEXT1]] + ; GFX6: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[FMUL]](s32) + ; GFX6: [[FPEXT2:%[0-9]+]]:_(s32) = G_FPEXT [[FPTRUNC]](s16) + ; GFX6: [[FPEXT3:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC2]](s16) + ; GFX6: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[FPEXT2]], [[FPEXT3]] + ; GFX6: [[FPTRUNC1:%[0-9]+]]:_(s16) = G_FPTRUNC [[FADD]](s32) + ; GFX6: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FPTRUNC1]](s16) + ; GFX6: $vgpr0 = COPY [[ANYEXT]](s32) + ; GFX7-LABEL: name: test_fmad_s16_flush + ; GFX7: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX7: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX7: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX7: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) + ; GFX7: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) + ; GFX7: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s32) + ; GFX7: [[FMAD:%[0-9]+]]:_(s16) = G_FMAD [[TRUNC]], [[TRUNC1]], [[TRUNC2]] + ; GFX7: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FMAD]](s16) + ; GFX7: $vgpr0 = COPY [[ANYEXT]](s32) + ; GFX10-LABEL: name: test_fmad_s16_flush ; GFX10: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX10: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 ; GFX10: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr1 @@ -86,133 +57,78 @@ %3:_(s16) = G_TRUNC %0 %4:_(s16) = G_TRUNC %1 %5:_(s16) = G_TRUNC %2 - %6:_(s16) = G_FMAD %3, %4, %5 %7:_(s32) = G_ANYEXT %6 $vgpr0 = COPY %7 ... --- -name: test_fmad_v2s16 +name: test_fmad_v2s16_flush +machineFunctionInfo: + mode: + fp64-fp16-denormals: false + body: | bb.0: liveins: $vgpr0, $vgpr1, $vgpr2 - ; SI-F16DENORM-LABEL: name: test_fmad_v2s16 - ; SI-F16DENORM: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; SI-F16DENORM: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; SI-F16DENORM: [[COPY2:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2 - ; SI-F16DENORM: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) - ; SI-F16DENORM: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; SI-F16DENORM: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; SI-F16DENORM: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; SI-F16DENORM: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; SI-F16DENORM: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>) - ; SI-F16DENORM: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; SI-F16DENORM: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; SI-F16DENORM: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; SI-F16DENORM: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY2]](<2 x s16>) - ; SI-F16DENORM: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) - ; SI-F16DENORM: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; SI-F16DENORM: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) - ; SI-F16DENORM: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC]](s16) - ; SI-F16DENORM: [[FPEXT1:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC2]](s16) - ; SI-F16DENORM: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[FPEXT]], [[FPEXT1]] - ; SI-F16DENORM: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[FMUL]](s32) - ; SI-F16DENORM: [[FPEXT2:%[0-9]+]]:_(s32) = G_FPEXT [[FPTRUNC]](s16) - ; SI-F16DENORM: [[FPEXT3:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC4]](s16) - ; SI-F16DENORM: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[FPEXT2]], [[FPEXT3]] - ; SI-F16DENORM: [[FPTRUNC1:%[0-9]+]]:_(s16) = G_FPTRUNC [[FADD]](s32) - ; SI-F16DENORM: [[FPEXT4:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC1]](s16) - ; SI-F16DENORM: [[FPEXT5:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC3]](s16) - ; SI-F16DENORM: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[FPEXT4]], [[FPEXT5]] - ; SI-F16DENORM: [[FPTRUNC2:%[0-9]+]]:_(s16) = G_FPTRUNC [[FMUL1]](s32) - ; SI-F16DENORM: [[FPEXT6:%[0-9]+]]:_(s32) = G_FPEXT [[FPTRUNC2]](s16) - ; SI-F16DENORM: [[FPEXT7:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC5]](s16) - ; SI-F16DENORM: [[FADD1:%[0-9]+]]:_(s32) = G_FADD [[FPEXT6]], [[FPEXT7]] - ; SI-F16DENORM: [[FPTRUNC3:%[0-9]+]]:_(s16) = G_FPTRUNC [[FADD1]](s32) - ; SI-F16DENORM: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[FPTRUNC1]](s16), [[FPTRUNC3]](s16) - ; SI-F16DENORM: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x s16>) - ; SI-F16FLUSH-LABEL: name: test_fmad_v2s16 - ; SI-F16FLUSH: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; SI-F16FLUSH: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; SI-F16FLUSH: [[COPY2:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2 - ; SI-F16FLUSH: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) - ; SI-F16FLUSH: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; SI-F16FLUSH: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; SI-F16FLUSH: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; SI-F16FLUSH: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; SI-F16FLUSH: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>) - ; SI-F16FLUSH: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; SI-F16FLUSH: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; SI-F16FLUSH: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; SI-F16FLUSH: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY2]](<2 x s16>) - ; SI-F16FLUSH: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) - ; SI-F16FLUSH: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; SI-F16FLUSH: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) - ; SI-F16FLUSH: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC]](s16) - ; SI-F16FLUSH: [[FPEXT1:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC2]](s16) - ; SI-F16FLUSH: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[FPEXT]], [[FPEXT1]] - ; SI-F16FLUSH: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[FMUL]](s32) - ; SI-F16FLUSH: [[FPEXT2:%[0-9]+]]:_(s32) = G_FPEXT [[FPTRUNC]](s16) - ; SI-F16FLUSH: [[FPEXT3:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC4]](s16) - ; SI-F16FLUSH: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[FPEXT2]], [[FPEXT3]] - ; SI-F16FLUSH: [[FPTRUNC1:%[0-9]+]]:_(s16) = G_FPTRUNC [[FADD]](s32) - ; SI-F16FLUSH: [[FPEXT4:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC1]](s16) - ; SI-F16FLUSH: [[FPEXT5:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC3]](s16) - ; SI-F16FLUSH: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[FPEXT4]], [[FPEXT5]] - ; SI-F16FLUSH: [[FPTRUNC2:%[0-9]+]]:_(s16) = G_FPTRUNC [[FMUL1]](s32) - ; SI-F16FLUSH: [[FPEXT6:%[0-9]+]]:_(s32) = G_FPEXT [[FPTRUNC2]](s16) - ; SI-F16FLUSH: [[FPEXT7:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC5]](s16) - ; SI-F16FLUSH: [[FADD1:%[0-9]+]]:_(s32) = G_FADD [[FPEXT6]], [[FPEXT7]] - ; SI-F16FLUSH: [[FPTRUNC3:%[0-9]+]]:_(s16) = G_FPTRUNC [[FADD1]](s32) - ; SI-F16FLUSH: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[FPTRUNC1]](s16), [[FPTRUNC3]](s16) - ; SI-F16FLUSH: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x s16>) - ; VI-F16DENORM-LABEL: name: test_fmad_v2s16 - ; VI-F16DENORM: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; VI-F16DENORM: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; VI-F16DENORM: [[COPY2:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2 - ; VI-F16DENORM: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) - ; VI-F16DENORM: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; VI-F16DENORM: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; VI-F16DENORM: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; VI-F16DENORM: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; VI-F16DENORM: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>) - ; VI-F16DENORM: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; VI-F16DENORM: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; VI-F16DENORM: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; VI-F16DENORM: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY2]](<2 x s16>) - ; VI-F16DENORM: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) - ; VI-F16DENORM: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; VI-F16DENORM: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) - ; VI-F16DENORM: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR %10(s16), %11(s16) - ; VI-F16DENORM: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x s16>) - ; VI-F16DENORM: [[FMUL:%[0-9]+]]:_(s16) = G_FMUL [[TRUNC1]], [[TRUNC3]] - ; VI-F16DENORM: [[FADD:%[0-9]+]]:_(s16) = G_FADD [[FMUL]], [[TRUNC5]] - ; VI-F16DENORM: [[FMUL1:%[0-9]+]]:_(s16) = G_FMUL [[TRUNC]], [[TRUNC2]] - ; VI-F16DENORM: [[FADD1:%[0-9]+]]:_(s16) = G_FADD [[FMUL1]], [[TRUNC4]] - ; VI-F16FLUSH-LABEL: name: test_fmad_v2s16 - ; VI-F16FLUSH: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; VI-F16FLUSH: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; VI-F16FLUSH: [[COPY2:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2 - ; VI-F16FLUSH: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) - ; VI-F16FLUSH: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; VI-F16FLUSH: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; VI-F16FLUSH: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; VI-F16FLUSH: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; VI-F16FLUSH: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>) - ; VI-F16FLUSH: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; VI-F16FLUSH: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; VI-F16FLUSH: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; VI-F16FLUSH: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY2]](<2 x s16>) - ; VI-F16FLUSH: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) - ; VI-F16FLUSH: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; VI-F16FLUSH: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) - ; VI-F16FLUSH: [[FMAD:%[0-9]+]]:_(s16) = G_FMAD [[TRUNC]], [[TRUNC2]], [[TRUNC4]] - ; VI-F16FLUSH: [[FMAD1:%[0-9]+]]:_(s16) = G_FMAD [[TRUNC1]], [[TRUNC3]], [[TRUNC5]] - ; VI-F16FLUSH: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[FMAD]](s16), [[FMAD1]](s16) - ; VI-F16FLUSH: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x s16>) - ; GFX10-LABEL: name: test_fmad_v2s16 + ; GFX6-LABEL: name: test_fmad_v2s16_flush + ; GFX6: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 + ; GFX6: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 + ; GFX6: [[COPY2:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2 + ; GFX6: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) + ; GFX6: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) + ; GFX6: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX6: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; GFX6: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) + ; GFX6: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>) + ; GFX6: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) + ; GFX6: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) + ; GFX6: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) + ; GFX6: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY2]](<2 x s16>) + ; GFX6: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) + ; GFX6: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) + ; GFX6: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) + ; GFX6: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC]](s16) + ; GFX6: [[FPEXT1:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC2]](s16) + ; GFX6: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[FPEXT]], [[FPEXT1]] + ; GFX6: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[FMUL]](s32) + ; GFX6: [[FPEXT2:%[0-9]+]]:_(s32) = G_FPEXT [[FPTRUNC]](s16) + ; GFX6: [[FPEXT3:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC4]](s16) + ; GFX6: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[FPEXT2]], [[FPEXT3]] + ; GFX6: [[FPTRUNC1:%[0-9]+]]:_(s16) = G_FPTRUNC [[FADD]](s32) + ; GFX6: [[FPEXT4:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC1]](s16) + ; GFX6: [[FPEXT5:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC3]](s16) + ; GFX6: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[FPEXT4]], [[FPEXT5]] + ; GFX6: [[FPTRUNC2:%[0-9]+]]:_(s16) = G_FPTRUNC [[FMUL1]](s32) + ; GFX6: [[FPEXT6:%[0-9]+]]:_(s32) = G_FPEXT [[FPTRUNC2]](s16) + ; GFX6: [[FPEXT7:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC5]](s16) + ; GFX6: [[FADD1:%[0-9]+]]:_(s32) = G_FADD [[FPEXT6]], [[FPEXT7]] + ; GFX6: [[FPTRUNC3:%[0-9]+]]:_(s16) = G_FPTRUNC [[FADD1]](s32) + ; GFX6: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[FPTRUNC1]](s16), [[FPTRUNC3]](s16) + ; GFX6: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x s16>) + ; GFX7-LABEL: name: test_fmad_v2s16_flush + ; GFX7: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 + ; GFX7: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 + ; GFX7: [[COPY2:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2 + ; GFX7: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) + ; GFX7: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) + ; GFX7: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX7: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; GFX7: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) + ; GFX7: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>) + ; GFX7: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) + ; GFX7: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) + ; GFX7: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) + ; GFX7: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY2]](<2 x s16>) + ; GFX7: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) + ; GFX7: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) + ; GFX7: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) + ; GFX7: [[FMAD:%[0-9]+]]:_(s16) = G_FMAD [[TRUNC]], [[TRUNC2]], [[TRUNC4]] + ; GFX7: [[FMAD1:%[0-9]+]]:_(s16) = G_FMAD [[TRUNC1]], [[TRUNC3]], [[TRUNC5]] + ; GFX7: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[FMAD]](s16), [[FMAD1]](s16) + ; GFX7: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x s16>) + ; GFX10-LABEL: name: test_fmad_v2s16_flush ; GFX10: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 ; GFX10: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 ; GFX10: [[COPY2:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2 @@ -243,232 +159,124 @@ ... --- -name: test_fmad_v4s16 +name: test_fmad_v4s16_flush +machineFunctionInfo: + mode: + fp64-fp16-denormals: false + body: | bb.0: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $vgpr4_vgpr5 - ; SI-F16DENORM-LABEL: name: test_fmad_v4s16 - ; SI-F16DENORM: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 - ; SI-F16DENORM: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3 - ; SI-F16DENORM: [[COPY2:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr4_vgpr5 - ; SI-F16DENORM: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) - ; SI-F16DENORM: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; SI-F16DENORM: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; SI-F16DENORM: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; SI-F16DENORM: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; SI-F16DENORM: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; SI-F16DENORM: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; SI-F16DENORM: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; SI-F16DENORM: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; SI-F16DENORM: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; SI-F16DENORM: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<4 x s16>) - ; SI-F16DENORM: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) - ; SI-F16DENORM: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) - ; SI-F16DENORM: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; SI-F16DENORM: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) - ; SI-F16DENORM: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) - ; SI-F16DENORM: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST3]](s32) - ; SI-F16DENORM: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) - ; SI-F16DENORM: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR3]](s32) - ; SI-F16DENORM: [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY2]](<4 x s16>) - ; SI-F16DENORM: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) - ; SI-F16DENORM: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST4]](s32) - ; SI-F16DENORM: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32) - ; SI-F16DENORM: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR4]](s32) - ; SI-F16DENORM: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV5]](<2 x s16>) - ; SI-F16DENORM: [[TRUNC10:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST5]](s32) - ; SI-F16DENORM: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST5]], [[C]](s32) - ; SI-F16DENORM: [[TRUNC11:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR5]](s32) - ; SI-F16DENORM: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC]](s16) - ; SI-F16DENORM: [[FPEXT1:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC4]](s16) - ; SI-F16DENORM: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[FPEXT]], [[FPEXT1]] - ; SI-F16DENORM: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[FMUL]](s32) - ; SI-F16DENORM: [[FPEXT2:%[0-9]+]]:_(s32) = G_FPEXT [[FPTRUNC]](s16) - ; SI-F16DENORM: [[FPEXT3:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC8]](s16) - ; SI-F16DENORM: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[FPEXT2]], [[FPEXT3]] - ; SI-F16DENORM: [[FPTRUNC1:%[0-9]+]]:_(s16) = G_FPTRUNC [[FADD]](s32) - ; SI-F16DENORM: [[FPEXT4:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC1]](s16) - ; SI-F16DENORM: [[FPEXT5:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC5]](s16) - ; SI-F16DENORM: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[FPEXT4]], [[FPEXT5]] - ; SI-F16DENORM: [[FPTRUNC2:%[0-9]+]]:_(s16) = G_FPTRUNC [[FMUL1]](s32) - ; SI-F16DENORM: [[FPEXT6:%[0-9]+]]:_(s32) = G_FPEXT [[FPTRUNC2]](s16) - ; SI-F16DENORM: [[FPEXT7:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC9]](s16) - ; SI-F16DENORM: [[FADD1:%[0-9]+]]:_(s32) = G_FADD [[FPEXT6]], [[FPEXT7]] - ; SI-F16DENORM: [[FPTRUNC3:%[0-9]+]]:_(s16) = G_FPTRUNC [[FADD1]](s32) - ; SI-F16DENORM: [[FPEXT8:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC2]](s16) - ; SI-F16DENORM: [[FPEXT9:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC6]](s16) - ; SI-F16DENORM: [[FMUL2:%[0-9]+]]:_(s32) = G_FMUL [[FPEXT8]], [[FPEXT9]] - ; SI-F16DENORM: [[FPTRUNC4:%[0-9]+]]:_(s16) = G_FPTRUNC [[FMUL2]](s32) - ; SI-F16DENORM: [[FPEXT10:%[0-9]+]]:_(s32) = G_FPEXT [[FPTRUNC4]](s16) - ; SI-F16DENORM: [[FPEXT11:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC10]](s16) - ; SI-F16DENORM: [[FADD2:%[0-9]+]]:_(s32) = G_FADD [[FPEXT10]], [[FPEXT11]] - ; SI-F16DENORM: [[FPTRUNC5:%[0-9]+]]:_(s16) = G_FPTRUNC [[FADD2]](s32) - ; SI-F16DENORM: [[FPEXT12:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC3]](s16) - ; SI-F16DENORM: [[FPEXT13:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC7]](s16) - ; SI-F16DENORM: [[FMUL3:%[0-9]+]]:_(s32) = G_FMUL [[FPEXT12]], [[FPEXT13]] - ; SI-F16DENORM: [[FPTRUNC6:%[0-9]+]]:_(s16) = G_FPTRUNC [[FMUL3]](s32) - ; SI-F16DENORM: [[FPEXT14:%[0-9]+]]:_(s32) = G_FPEXT [[FPTRUNC6]](s16) - ; SI-F16DENORM: [[FPEXT15:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC11]](s16) - ; SI-F16DENORM: [[FADD3:%[0-9]+]]:_(s32) = G_FADD [[FPEXT14]], [[FPEXT15]] - ; SI-F16DENORM: [[FPTRUNC7:%[0-9]+]]:_(s16) = G_FPTRUNC [[FADD3]](s32) - ; SI-F16DENORM: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[FPTRUNC1]](s16), [[FPTRUNC3]](s16) - ; SI-F16DENORM: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[FPTRUNC5]](s16), [[FPTRUNC7]](s16) - ; SI-F16DENORM: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) - ; SI-F16DENORM: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) - ; SI-F16FLUSH-LABEL: name: test_fmad_v4s16 - ; SI-F16FLUSH: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 - ; SI-F16FLUSH: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3 - ; SI-F16FLUSH: [[COPY2:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr4_vgpr5 - ; SI-F16FLUSH: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) - ; SI-F16FLUSH: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; SI-F16FLUSH: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; SI-F16FLUSH: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; SI-F16FLUSH: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; SI-F16FLUSH: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; SI-F16FLUSH: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; SI-F16FLUSH: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; SI-F16FLUSH: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; SI-F16FLUSH: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; SI-F16FLUSH: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<4 x s16>) - ; SI-F16FLUSH: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) - ; SI-F16FLUSH: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) - ; SI-F16FLUSH: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; SI-F16FLUSH: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) - ; SI-F16FLUSH: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) - ; SI-F16FLUSH: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST3]](s32) - ; SI-F16FLUSH: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) - ; SI-F16FLUSH: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR3]](s32) - ; SI-F16FLUSH: [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY2]](<4 x s16>) - ; SI-F16FLUSH: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) - ; SI-F16FLUSH: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST4]](s32) - ; SI-F16FLUSH: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32) - ; SI-F16FLUSH: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR4]](s32) - ; SI-F16FLUSH: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV5]](<2 x s16>) - ; SI-F16FLUSH: [[TRUNC10:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST5]](s32) - ; SI-F16FLUSH: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST5]], [[C]](s32) - ; SI-F16FLUSH: [[TRUNC11:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR5]](s32) - ; SI-F16FLUSH: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC]](s16) - ; SI-F16FLUSH: [[FPEXT1:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC4]](s16) - ; SI-F16FLUSH: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[FPEXT]], [[FPEXT1]] - ; SI-F16FLUSH: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[FMUL]](s32) - ; SI-F16FLUSH: [[FPEXT2:%[0-9]+]]:_(s32) = G_FPEXT [[FPTRUNC]](s16) - ; SI-F16FLUSH: [[FPEXT3:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC8]](s16) - ; SI-F16FLUSH: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[FPEXT2]], [[FPEXT3]] - ; SI-F16FLUSH: [[FPTRUNC1:%[0-9]+]]:_(s16) = G_FPTRUNC [[FADD]](s32) - ; SI-F16FLUSH: [[FPEXT4:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC1]](s16) - ; SI-F16FLUSH: [[FPEXT5:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC5]](s16) - ; SI-F16FLUSH: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[FPEXT4]], [[FPEXT5]] - ; SI-F16FLUSH: [[FPTRUNC2:%[0-9]+]]:_(s16) = G_FPTRUNC [[FMUL1]](s32) - ; SI-F16FLUSH: [[FPEXT6:%[0-9]+]]:_(s32) = G_FPEXT [[FPTRUNC2]](s16) - ; SI-F16FLUSH: [[FPEXT7:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC9]](s16) - ; SI-F16FLUSH: [[FADD1:%[0-9]+]]:_(s32) = G_FADD [[FPEXT6]], [[FPEXT7]] - ; SI-F16FLUSH: [[FPTRUNC3:%[0-9]+]]:_(s16) = G_FPTRUNC [[FADD1]](s32) - ; SI-F16FLUSH: [[FPEXT8:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC2]](s16) - ; SI-F16FLUSH: [[FPEXT9:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC6]](s16) - ; SI-F16FLUSH: [[FMUL2:%[0-9]+]]:_(s32) = G_FMUL [[FPEXT8]], [[FPEXT9]] - ; SI-F16FLUSH: [[FPTRUNC4:%[0-9]+]]:_(s16) = G_FPTRUNC [[FMUL2]](s32) - ; SI-F16FLUSH: [[FPEXT10:%[0-9]+]]:_(s32) = G_FPEXT [[FPTRUNC4]](s16) - ; SI-F16FLUSH: [[FPEXT11:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC10]](s16) - ; SI-F16FLUSH: [[FADD2:%[0-9]+]]:_(s32) = G_FADD [[FPEXT10]], [[FPEXT11]] - ; SI-F16FLUSH: [[FPTRUNC5:%[0-9]+]]:_(s16) = G_FPTRUNC [[FADD2]](s32) - ; SI-F16FLUSH: [[FPEXT12:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC3]](s16) - ; SI-F16FLUSH: [[FPEXT13:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC7]](s16) - ; SI-F16FLUSH: [[FMUL3:%[0-9]+]]:_(s32) = G_FMUL [[FPEXT12]], [[FPEXT13]] - ; SI-F16FLUSH: [[FPTRUNC6:%[0-9]+]]:_(s16) = G_FPTRUNC [[FMUL3]](s32) - ; SI-F16FLUSH: [[FPEXT14:%[0-9]+]]:_(s32) = G_FPEXT [[FPTRUNC6]](s16) - ; SI-F16FLUSH: [[FPEXT15:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC11]](s16) - ; SI-F16FLUSH: [[FADD3:%[0-9]+]]:_(s32) = G_FADD [[FPEXT14]], [[FPEXT15]] - ; SI-F16FLUSH: [[FPTRUNC7:%[0-9]+]]:_(s16) = G_FPTRUNC [[FADD3]](s32) - ; SI-F16FLUSH: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[FPTRUNC1]](s16), [[FPTRUNC3]](s16) - ; SI-F16FLUSH: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[FPTRUNC5]](s16), [[FPTRUNC7]](s16) - ; SI-F16FLUSH: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) - ; SI-F16FLUSH: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) - ; VI-F16DENORM-LABEL: name: test_fmad_v4s16 - ; VI-F16DENORM: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 - ; VI-F16DENORM: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3 - ; VI-F16DENORM: [[COPY2:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr4_vgpr5 - ; VI-F16DENORM: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) - ; VI-F16DENORM: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; VI-F16DENORM: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; VI-F16DENORM: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; VI-F16DENORM: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; VI-F16DENORM: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; VI-F16DENORM: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; VI-F16DENORM: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; VI-F16DENORM: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; VI-F16DENORM: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; VI-F16DENORM: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<4 x s16>) - ; VI-F16DENORM: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) - ; VI-F16DENORM: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) - ; VI-F16DENORM: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; VI-F16DENORM: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) - ; VI-F16DENORM: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) - ; VI-F16DENORM: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST3]](s32) - ; VI-F16DENORM: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) - ; VI-F16DENORM: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR3]](s32) - ; VI-F16DENORM: [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY2]](<4 x s16>) - ; VI-F16DENORM: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) - ; VI-F16DENORM: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST4]](s32) - ; VI-F16DENORM: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32) - ; VI-F16DENORM: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR4]](s32) - ; VI-F16DENORM: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV5]](<2 x s16>) - ; VI-F16DENORM: [[TRUNC10:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST5]](s32) - ; VI-F16DENORM: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST5]], [[C]](s32) - ; VI-F16DENORM: [[TRUNC11:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR5]](s32) - ; VI-F16DENORM: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR %16(s16), %17(s16) - ; VI-F16DENORM: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR %18(s16), %19(s16) - ; VI-F16DENORM: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) - ; VI-F16DENORM: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) - ; VI-F16DENORM: [[FMUL:%[0-9]+]]:_(s16) = G_FMUL [[TRUNC3]], [[TRUNC7]] - ; VI-F16DENORM: [[FADD:%[0-9]+]]:_(s16) = G_FADD [[FMUL]], [[TRUNC11]] - ; VI-F16DENORM: [[FMUL1:%[0-9]+]]:_(s16) = G_FMUL [[TRUNC2]], [[TRUNC6]] - ; VI-F16DENORM: [[FADD1:%[0-9]+]]:_(s16) = G_FADD [[FMUL1]], [[TRUNC10]] - ; VI-F16DENORM: [[FMUL2:%[0-9]+]]:_(s16) = G_FMUL [[TRUNC1]], [[TRUNC5]] - ; VI-F16DENORM: [[FADD2:%[0-9]+]]:_(s16) = G_FADD [[FMUL2]], [[TRUNC9]] - ; VI-F16DENORM: [[FMUL3:%[0-9]+]]:_(s16) = G_FMUL [[TRUNC]], [[TRUNC4]] - ; VI-F16DENORM: [[FADD3:%[0-9]+]]:_(s16) = G_FADD [[FMUL3]], [[TRUNC8]] - ; VI-F16FLUSH-LABEL: name: test_fmad_v4s16 - ; VI-F16FLUSH: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 - ; VI-F16FLUSH: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3 - ; VI-F16FLUSH: [[COPY2:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr4_vgpr5 - ; VI-F16FLUSH: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) - ; VI-F16FLUSH: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; VI-F16FLUSH: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; VI-F16FLUSH: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; VI-F16FLUSH: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; VI-F16FLUSH: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; VI-F16FLUSH: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; VI-F16FLUSH: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; VI-F16FLUSH: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; VI-F16FLUSH: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; VI-F16FLUSH: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<4 x s16>) - ; VI-F16FLUSH: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) - ; VI-F16FLUSH: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) - ; VI-F16FLUSH: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; VI-F16FLUSH: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) - ; VI-F16FLUSH: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) - ; VI-F16FLUSH: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST3]](s32) - ; VI-F16FLUSH: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) - ; VI-F16FLUSH: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR3]](s32) - ; VI-F16FLUSH: [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY2]](<4 x s16>) - ; VI-F16FLUSH: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) - ; VI-F16FLUSH: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST4]](s32) - ; VI-F16FLUSH: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32) - ; VI-F16FLUSH: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR4]](s32) - ; VI-F16FLUSH: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV5]](<2 x s16>) - ; VI-F16FLUSH: [[TRUNC10:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST5]](s32) - ; VI-F16FLUSH: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST5]], [[C]](s32) - ; VI-F16FLUSH: [[TRUNC11:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR5]](s32) - ; VI-F16FLUSH: [[FMAD:%[0-9]+]]:_(s16) = G_FMAD [[TRUNC]], [[TRUNC4]], [[TRUNC8]] - ; VI-F16FLUSH: [[FMAD1:%[0-9]+]]:_(s16) = G_FMAD [[TRUNC1]], [[TRUNC5]], [[TRUNC9]] - ; VI-F16FLUSH: [[FMAD2:%[0-9]+]]:_(s16) = G_FMAD [[TRUNC2]], [[TRUNC6]], [[TRUNC10]] - ; VI-F16FLUSH: [[FMAD3:%[0-9]+]]:_(s16) = G_FMAD [[TRUNC3]], [[TRUNC7]], [[TRUNC11]] - ; VI-F16FLUSH: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[FMAD]](s16), [[FMAD1]](s16) - ; VI-F16FLUSH: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[FMAD2]](s16), [[FMAD3]](s16) - ; VI-F16FLUSH: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) - ; VI-F16FLUSH: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) - ; GFX10-LABEL: name: test_fmad_v4s16 + ; GFX6-LABEL: name: test_fmad_v4s16_flush + ; GFX6: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 + ; GFX6: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3 + ; GFX6: [[COPY2:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr4_vgpr5 + ; GFX6: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) + ; GFX6: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; GFX6: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) + ; GFX6: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX6: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; GFX6: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) + ; GFX6: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) + ; GFX6: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) + ; GFX6: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) + ; GFX6: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) + ; GFX6: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<4 x s16>) + ; GFX6: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) + ; GFX6: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) + ; GFX6: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) + ; GFX6: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) + ; GFX6: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) + ; GFX6: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST3]](s32) + ; GFX6: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) + ; GFX6: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR3]](s32) + ; GFX6: [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY2]](<4 x s16>) + ; GFX6: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) + ; GFX6: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST4]](s32) + ; GFX6: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32) + ; GFX6: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR4]](s32) + ; GFX6: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV5]](<2 x s16>) + ; GFX6: [[TRUNC10:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST5]](s32) + ; GFX6: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST5]], [[C]](s32) + ; GFX6: [[TRUNC11:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR5]](s32) + ; GFX6: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC]](s16) + ; GFX6: [[FPEXT1:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC4]](s16) + ; GFX6: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[FPEXT]], [[FPEXT1]] + ; GFX6: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[FMUL]](s32) + ; GFX6: [[FPEXT2:%[0-9]+]]:_(s32) = G_FPEXT [[FPTRUNC]](s16) + ; GFX6: [[FPEXT3:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC8]](s16) + ; GFX6: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[FPEXT2]], [[FPEXT3]] + ; GFX6: [[FPTRUNC1:%[0-9]+]]:_(s16) = G_FPTRUNC [[FADD]](s32) + ; GFX6: [[FPEXT4:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC1]](s16) + ; GFX6: [[FPEXT5:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC5]](s16) + ; GFX6: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[FPEXT4]], [[FPEXT5]] + ; GFX6: [[FPTRUNC2:%[0-9]+]]:_(s16) = G_FPTRUNC [[FMUL1]](s32) + ; GFX6: [[FPEXT6:%[0-9]+]]:_(s32) = G_FPEXT [[FPTRUNC2]](s16) + ; GFX6: [[FPEXT7:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC9]](s16) + ; GFX6: [[FADD1:%[0-9]+]]:_(s32) = G_FADD [[FPEXT6]], [[FPEXT7]] + ; GFX6: [[FPTRUNC3:%[0-9]+]]:_(s16) = G_FPTRUNC [[FADD1]](s32) + ; GFX6: [[FPEXT8:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC2]](s16) + ; GFX6: [[FPEXT9:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC6]](s16) + ; GFX6: [[FMUL2:%[0-9]+]]:_(s32) = G_FMUL [[FPEXT8]], [[FPEXT9]] + ; GFX6: [[FPTRUNC4:%[0-9]+]]:_(s16) = G_FPTRUNC [[FMUL2]](s32) + ; GFX6: [[FPEXT10:%[0-9]+]]:_(s32) = G_FPEXT [[FPTRUNC4]](s16) + ; GFX6: [[FPEXT11:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC10]](s16) + ; GFX6: [[FADD2:%[0-9]+]]:_(s32) = G_FADD [[FPEXT10]], [[FPEXT11]] + ; GFX6: [[FPTRUNC5:%[0-9]+]]:_(s16) = G_FPTRUNC [[FADD2]](s32) + ; GFX6: [[FPEXT12:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC3]](s16) + ; GFX6: [[FPEXT13:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC7]](s16) + ; GFX6: [[FMUL3:%[0-9]+]]:_(s32) = G_FMUL [[FPEXT12]], [[FPEXT13]] + ; GFX6: [[FPTRUNC6:%[0-9]+]]:_(s16) = G_FPTRUNC [[FMUL3]](s32) + ; GFX6: [[FPEXT14:%[0-9]+]]:_(s32) = G_FPEXT [[FPTRUNC6]](s16) + ; GFX6: [[FPEXT15:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC11]](s16) + ; GFX6: [[FADD3:%[0-9]+]]:_(s32) = G_FADD [[FPEXT14]], [[FPEXT15]] + ; GFX6: [[FPTRUNC7:%[0-9]+]]:_(s16) = G_FPTRUNC [[FADD3]](s32) + ; GFX6: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[FPTRUNC1]](s16), [[FPTRUNC3]](s16) + ; GFX6: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[FPTRUNC5]](s16), [[FPTRUNC7]](s16) + ; GFX6: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) + ; GFX6: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) + ; GFX7-LABEL: name: test_fmad_v4s16_flush + ; GFX7: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 + ; GFX7: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3 + ; GFX7: [[COPY2:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr4_vgpr5 + ; GFX7: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) + ; GFX7: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; GFX7: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) + ; GFX7: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX7: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; GFX7: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) + ; GFX7: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) + ; GFX7: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) + ; GFX7: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) + ; GFX7: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) + ; GFX7: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<4 x s16>) + ; GFX7: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) + ; GFX7: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) + ; GFX7: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) + ; GFX7: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) + ; GFX7: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) + ; GFX7: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST3]](s32) + ; GFX7: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) + ; GFX7: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR3]](s32) + ; GFX7: [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY2]](<4 x s16>) + ; GFX7: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) + ; GFX7: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST4]](s32) + ; GFX7: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32) + ; GFX7: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR4]](s32) + ; GFX7: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV5]](<2 x s16>) + ; GFX7: [[TRUNC10:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST5]](s32) + ; GFX7: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST5]], [[C]](s32) + ; GFX7: [[TRUNC11:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR5]](s32) + ; GFX7: [[FMAD:%[0-9]+]]:_(s16) = G_FMAD [[TRUNC]], [[TRUNC4]], [[TRUNC8]] + ; GFX7: [[FMAD1:%[0-9]+]]:_(s16) = G_FMAD [[TRUNC1]], [[TRUNC5]], [[TRUNC9]] + ; GFX7: [[FMAD2:%[0-9]+]]:_(s16) = G_FMAD [[TRUNC2]], [[TRUNC6]], [[TRUNC10]] + ; GFX7: [[FMAD3:%[0-9]+]]:_(s16) = G_FMAD [[TRUNC3]], [[TRUNC7]], [[TRUNC11]] + ; GFX7: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[FMAD]](s16), [[FMAD1]](s16) + ; GFX7: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[FMAD2]](s16), [[FMAD3]](s16) + ; GFX7: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) + ; GFX7: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) + ; GFX10-LABEL: name: test_fmad_v4s16_flush ; GFX10: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 ; GFX10: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3 ; GFX10: [[COPY2:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr4_vgpr5 @@ -519,128 +327,667 @@ $vgpr0_vgpr1 = COPY %3 ... + +--- +name: test_fmad_s16_denorm +machineFunctionInfo: + mode: + fp64-fp16-denormals: true + +body: | + bb.0: + liveins: $vgpr0, $vgpr1, $vgpr2 + + ; GFX6-LABEL: name: test_fmad_s16_denorm + ; GFX6: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX6: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX6: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX6: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) + ; GFX6: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) + ; GFX6: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s32) + ; GFX6: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC]](s16) + ; GFX6: [[FPEXT1:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC1]](s16) + ; GFX6: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[FPEXT]], [[FPEXT1]] + ; GFX6: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[FMUL]](s32) + ; GFX6: [[FPEXT2:%[0-9]+]]:_(s32) = G_FPEXT [[FPTRUNC]](s16) + ; GFX6: [[FPEXT3:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC2]](s16) + ; GFX6: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[FPEXT2]], [[FPEXT3]] + ; GFX6: [[FPTRUNC1:%[0-9]+]]:_(s16) = G_FPTRUNC [[FADD]](s32) + ; GFX6: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FPTRUNC1]](s16) + ; GFX6: $vgpr0 = COPY [[ANYEXT]](s32) + ; GFX7-LABEL: name: test_fmad_s16_denorm + ; GFX7: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX7: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX7: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX7: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) + ; GFX7: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) + ; GFX7: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s32) + ; GFX7: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT %6(s16) + ; GFX7: $vgpr0 = COPY [[ANYEXT]](s32) + ; GFX7: [[FMUL:%[0-9]+]]:_(s16) = G_FMUL [[TRUNC]], [[TRUNC1]] + ; GFX7: [[FADD:%[0-9]+]]:_(s16) = G_FADD [[FMUL]], [[TRUNC2]] + ; GFX10-LABEL: name: test_fmad_s16_denorm + ; GFX10: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX10: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX10: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX10: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) + ; GFX10: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) + ; GFX10: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s32) + ; GFX10: [[FMUL:%[0-9]+]]:_(s16) = G_FMUL [[TRUNC]], [[TRUNC1]] + ; GFX10: [[FADD:%[0-9]+]]:_(s16) = G_FADD [[FMUL]], [[TRUNC2]] + ; GFX10: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FADD]](s16) + ; GFX10: $vgpr0 = COPY [[ANYEXT]](s32) + %0:_(s32) = COPY $vgpr0 + %1:_(s32) = COPY $vgpr1 + %2:_(s32) = COPY $vgpr1 + %3:_(s16) = G_TRUNC %0 + %4:_(s16) = G_TRUNC %1 + %5:_(s16) = G_TRUNC %2 + %6:_(s16) = G_FMAD %3, %4, %5 + %7:_(s32) = G_ANYEXT %6 + $vgpr0 = COPY %7 +... + --- -name: test_fmad_s64 +name: test_fmad_s16_denorm_flags +machineFunctionInfo: + mode: + fp64-fp16-denormals: true + body: | bb.0: - liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $vgpr4_vgpr5 + liveins: $vgpr0, $vgpr1, $vgpr2 + + ; GFX6-LABEL: name: test_fmad_s16_denorm_flags + ; GFX6: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX6: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX6: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX6: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) + ; GFX6: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) + ; GFX6: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s32) + ; GFX6: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC]](s16) + ; GFX6: [[FPEXT1:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC1]](s16) + ; GFX6: [[FMUL:%[0-9]+]]:_(s32) = nnan G_FMUL [[FPEXT]], [[FPEXT1]] + ; GFX6: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[FMUL]](s32) + ; GFX6: [[FPEXT2:%[0-9]+]]:_(s32) = G_FPEXT [[FPTRUNC]](s16) + ; GFX6: [[FPEXT3:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC2]](s16) + ; GFX6: [[FADD:%[0-9]+]]:_(s32) = nnan G_FADD [[FPEXT2]], [[FPEXT3]] + ; GFX6: [[FPTRUNC1:%[0-9]+]]:_(s16) = G_FPTRUNC [[FADD]](s32) + ; GFX6: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FPTRUNC1]](s16) + ; GFX6: $vgpr0 = COPY [[ANYEXT]](s32) + ; GFX7-LABEL: name: test_fmad_s16_denorm_flags + ; GFX7: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX7: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX7: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX7: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) + ; GFX7: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) + ; GFX7: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s32) + ; GFX7: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT %6(s16) + ; GFX7: $vgpr0 = COPY [[ANYEXT]](s32) + ; GFX7: [[FMUL:%[0-9]+]]:_(s16) = nnan G_FMUL [[TRUNC]], [[TRUNC1]] + ; GFX7: [[FADD:%[0-9]+]]:_(s16) = nnan G_FADD [[FMUL]], [[TRUNC2]] + ; GFX10-LABEL: name: test_fmad_s16_denorm_flags + ; GFX10: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX10: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX10: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX10: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) + ; GFX10: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) + ; GFX10: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s32) + ; GFX10: [[FMUL:%[0-9]+]]:_(s16) = nnan G_FMUL [[TRUNC]], [[TRUNC1]] + ; GFX10: [[FADD:%[0-9]+]]:_(s16) = nnan G_FADD [[FMUL]], [[TRUNC2]] + ; GFX10: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FADD]](s16) + ; GFX10: $vgpr0 = COPY [[ANYEXT]](s32) + %0:_(s32) = COPY $vgpr0 + %1:_(s32) = COPY $vgpr1 + %2:_(s32) = COPY $vgpr1 + %3:_(s16) = G_TRUNC %0 + %4:_(s16) = G_TRUNC %1 + %5:_(s16) = G_TRUNC %2 + %6:_(s16) = nnan G_FMAD %3, %4, %5 + %7:_(s32) = G_ANYEXT %6 + $vgpr0 = COPY %7 +... + +--- +name: test_fmad_v2s16_denorm +machineFunctionInfo: + mode: + fp64-fp16-denormals: true - ; SI-F16DENORM-LABEL: name: test_fmad_s64 - ; SI-F16DENORM: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; SI-F16DENORM: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 - ; SI-F16DENORM: [[COPY2:%[0-9]+]]:_(s64) = COPY $vgpr4_vgpr5 - ; SI-F16DENORM: [[FMUL:%[0-9]+]]:_(s64) = G_FMUL [[COPY]], [[COPY1]] - ; SI-F16DENORM: [[FADD:%[0-9]+]]:_(s64) = G_FADD [[FMUL]], [[COPY2]] - ; SI-F16DENORM: $vgpr0_vgpr1 = COPY [[FADD]](s64) - ; SI-F16FLUSH-LABEL: name: test_fmad_s64 - ; SI-F16FLUSH: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; SI-F16FLUSH: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 - ; SI-F16FLUSH: [[COPY2:%[0-9]+]]:_(s64) = COPY $vgpr4_vgpr5 - ; SI-F16FLUSH: [[FMUL:%[0-9]+]]:_(s64) = G_FMUL [[COPY]], [[COPY1]] - ; SI-F16FLUSH: [[FADD:%[0-9]+]]:_(s64) = G_FADD [[FMUL]], [[COPY2]] - ; SI-F16FLUSH: $vgpr0_vgpr1 = COPY [[FADD]](s64) - ; VI-F16DENORM-LABEL: name: test_fmad_s64 - ; VI-F16DENORM: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; VI-F16DENORM: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 - ; VI-F16DENORM: [[COPY2:%[0-9]+]]:_(s64) = COPY $vgpr4_vgpr5 - ; VI-F16DENORM: [[FMUL:%[0-9]+]]:_(s64) = G_FMUL [[COPY]], [[COPY1]] - ; VI-F16DENORM: [[FADD:%[0-9]+]]:_(s64) = G_FADD [[FMUL]], [[COPY2]] - ; VI-F16DENORM: $vgpr0_vgpr1 = COPY [[FADD]](s64) - ; VI-F16FLUSH-LABEL: name: test_fmad_s64 - ; VI-F16FLUSH: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; VI-F16FLUSH: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 - ; VI-F16FLUSH: [[COPY2:%[0-9]+]]:_(s64) = COPY $vgpr4_vgpr5 - ; VI-F16FLUSH: [[FMUL:%[0-9]+]]:_(s64) = G_FMUL [[COPY]], [[COPY1]] - ; VI-F16FLUSH: [[FADD:%[0-9]+]]:_(s64) = G_FADD [[FMUL]], [[COPY2]] - ; VI-F16FLUSH: $vgpr0_vgpr1 = COPY [[FADD]](s64) - ; GFX10-LABEL: name: test_fmad_s64 - ; GFX10: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; GFX10: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 - ; GFX10: [[COPY2:%[0-9]+]]:_(s64) = COPY $vgpr4_vgpr5 - ; GFX10: [[FMUL:%[0-9]+]]:_(s64) = G_FMUL [[COPY]], [[COPY1]] - ; GFX10: [[FADD:%[0-9]+]]:_(s64) = G_FADD [[FMUL]], [[COPY2]] - ; GFX10: $vgpr0_vgpr1 = COPY [[FADD]](s64) - %0:_(s64) = COPY $vgpr0_vgpr1 - %1:_(s64) = COPY $vgpr2_vgpr3 - %2:_(s64) = COPY $vgpr4_vgpr5 - %3:_(s64) = G_FMAD %0, %1, %2 +body: | + bb.0: + liveins: $vgpr0, $vgpr1, $vgpr2 + + ; GFX6-LABEL: name: test_fmad_v2s16_denorm + ; GFX6: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 + ; GFX6: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 + ; GFX6: [[COPY2:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2 + ; GFX6: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) + ; GFX6: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) + ; GFX6: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX6: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; GFX6: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) + ; GFX6: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>) + ; GFX6: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) + ; GFX6: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) + ; GFX6: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) + ; GFX6: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY2]](<2 x s16>) + ; GFX6: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) + ; GFX6: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) + ; GFX6: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) + ; GFX6: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC]](s16) + ; GFX6: [[FPEXT1:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC2]](s16) + ; GFX6: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[FPEXT]], [[FPEXT1]] + ; GFX6: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[FMUL]](s32) + ; GFX6: [[FPEXT2:%[0-9]+]]:_(s32) = G_FPEXT [[FPTRUNC]](s16) + ; GFX6: [[FPEXT3:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC4]](s16) + ; GFX6: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[FPEXT2]], [[FPEXT3]] + ; GFX6: [[FPTRUNC1:%[0-9]+]]:_(s16) = G_FPTRUNC [[FADD]](s32) + ; GFX6: [[FPEXT4:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC1]](s16) + ; GFX6: [[FPEXT5:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC3]](s16) + ; GFX6: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[FPEXT4]], [[FPEXT5]] + ; GFX6: [[FPTRUNC2:%[0-9]+]]:_(s16) = G_FPTRUNC [[FMUL1]](s32) + ; GFX6: [[FPEXT6:%[0-9]+]]:_(s32) = G_FPEXT [[FPTRUNC2]](s16) + ; GFX6: [[FPEXT7:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC5]](s16) + ; GFX6: [[FADD1:%[0-9]+]]:_(s32) = G_FADD [[FPEXT6]], [[FPEXT7]] + ; GFX6: [[FPTRUNC3:%[0-9]+]]:_(s16) = G_FPTRUNC [[FADD1]](s32) + ; GFX6: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[FPTRUNC1]](s16), [[FPTRUNC3]](s16) + ; GFX6: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x s16>) + ; GFX7-LABEL: name: test_fmad_v2s16_denorm + ; GFX7: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 + ; GFX7: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 + ; GFX7: [[COPY2:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2 + ; GFX7: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) + ; GFX7: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) + ; GFX7: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX7: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; GFX7: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) + ; GFX7: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>) + ; GFX7: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) + ; GFX7: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) + ; GFX7: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) + ; GFX7: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY2]](<2 x s16>) + ; GFX7: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) + ; GFX7: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) + ; GFX7: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) + ; GFX7: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR %10(s16), %11(s16) + ; GFX7: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x s16>) + ; GFX7: [[FMUL:%[0-9]+]]:_(s16) = G_FMUL [[TRUNC1]], [[TRUNC3]] + ; GFX7: [[FADD:%[0-9]+]]:_(s16) = G_FADD [[FMUL]], [[TRUNC5]] + ; GFX7: [[FMUL1:%[0-9]+]]:_(s16) = G_FMUL [[TRUNC]], [[TRUNC2]] + ; GFX7: [[FADD1:%[0-9]+]]:_(s16) = G_FADD [[FMUL1]], [[TRUNC4]] + ; GFX10-LABEL: name: test_fmad_v2s16_denorm + ; GFX10: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 + ; GFX10: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 + ; GFX10: [[COPY2:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2 + ; GFX10: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) + ; GFX10: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) + ; GFX10: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX10: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; GFX10: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) + ; GFX10: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>) + ; GFX10: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) + ; GFX10: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) + ; GFX10: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) + ; GFX10: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY2]](<2 x s16>) + ; GFX10: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) + ; GFX10: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) + ; GFX10: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) + ; GFX10: [[FMUL:%[0-9]+]]:_(s16) = G_FMUL [[TRUNC]], [[TRUNC2]] + ; GFX10: [[FADD:%[0-9]+]]:_(s16) = G_FADD [[FMUL]], [[TRUNC4]] + ; GFX10: [[FMUL1:%[0-9]+]]:_(s16) = G_FMUL [[TRUNC1]], [[TRUNC3]] + ; GFX10: [[FADD1:%[0-9]+]]:_(s16) = G_FADD [[FMUL1]], [[TRUNC5]] + ; GFX10: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[FADD]](s16), [[FADD1]](s16) + ; GFX10: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x s16>) + %0:_(<2 x s16>) = COPY $vgpr0 + %1:_(<2 x s16>) = COPY $vgpr1 + %2:_(<2 x s16>) = COPY $vgpr2 + %3:_(<2 x s16>) = G_FMAD %0, %1, %2 + $vgpr0 = COPY %3 +... + +--- +name: test_fmad_v2s16_denorm_flags +machineFunctionInfo: + mode: + fp64-fp16-denormals: true + +body: | + bb.0: + liveins: $vgpr0, $vgpr1, $vgpr2 + + ; GFX6-LABEL: name: test_fmad_v2s16_denorm_flags + ; GFX6: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 + ; GFX6: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 + ; GFX6: [[COPY2:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2 + ; GFX6: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) + ; GFX6: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) + ; GFX6: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX6: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; GFX6: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) + ; GFX6: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>) + ; GFX6: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) + ; GFX6: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) + ; GFX6: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) + ; GFX6: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY2]](<2 x s16>) + ; GFX6: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) + ; GFX6: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) + ; GFX6: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) + ; GFX6: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC]](s16) + ; GFX6: [[FPEXT1:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC2]](s16) + ; GFX6: [[FMUL:%[0-9]+]]:_(s32) = nnan G_FMUL [[FPEXT]], [[FPEXT1]] + ; GFX6: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[FMUL]](s32) + ; GFX6: [[FPEXT2:%[0-9]+]]:_(s32) = G_FPEXT [[FPTRUNC]](s16) + ; GFX6: [[FPEXT3:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC4]](s16) + ; GFX6: [[FADD:%[0-9]+]]:_(s32) = nnan G_FADD [[FPEXT2]], [[FPEXT3]] + ; GFX6: [[FPTRUNC1:%[0-9]+]]:_(s16) = G_FPTRUNC [[FADD]](s32) + ; GFX6: [[FPEXT4:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC1]](s16) + ; GFX6: [[FPEXT5:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC3]](s16) + ; GFX6: [[FMUL1:%[0-9]+]]:_(s32) = nnan G_FMUL [[FPEXT4]], [[FPEXT5]] + ; GFX6: [[FPTRUNC2:%[0-9]+]]:_(s16) = G_FPTRUNC [[FMUL1]](s32) + ; GFX6: [[FPEXT6:%[0-9]+]]:_(s32) = G_FPEXT [[FPTRUNC2]](s16) + ; GFX6: [[FPEXT7:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC5]](s16) + ; GFX6: [[FADD1:%[0-9]+]]:_(s32) = nnan G_FADD [[FPEXT6]], [[FPEXT7]] + ; GFX6: [[FPTRUNC3:%[0-9]+]]:_(s16) = G_FPTRUNC [[FADD1]](s32) + ; GFX6: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[FPTRUNC1]](s16), [[FPTRUNC3]](s16) + ; GFX6: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x s16>) + ; GFX7-LABEL: name: test_fmad_v2s16_denorm_flags + ; GFX7: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 + ; GFX7: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 + ; GFX7: [[COPY2:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2 + ; GFX7: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) + ; GFX7: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) + ; GFX7: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX7: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; GFX7: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) + ; GFX7: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>) + ; GFX7: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) + ; GFX7: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) + ; GFX7: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) + ; GFX7: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY2]](<2 x s16>) + ; GFX7: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) + ; GFX7: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) + ; GFX7: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) + ; GFX7: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR %10(s16), %11(s16) + ; GFX7: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x s16>) + ; GFX7: [[FMUL:%[0-9]+]]:_(s16) = nnan G_FMUL [[TRUNC1]], [[TRUNC3]] + ; GFX7: [[FADD:%[0-9]+]]:_(s16) = nnan G_FADD [[FMUL]], [[TRUNC5]] + ; GFX7: [[FMUL1:%[0-9]+]]:_(s16) = nnan G_FMUL [[TRUNC]], [[TRUNC2]] + ; GFX7: [[FADD1:%[0-9]+]]:_(s16) = nnan G_FADD [[FMUL1]], [[TRUNC4]] + ; GFX10-LABEL: name: test_fmad_v2s16_denorm_flags + ; GFX10: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 + ; GFX10: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 + ; GFX10: [[COPY2:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2 + ; GFX10: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) + ; GFX10: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) + ; GFX10: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX10: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; GFX10: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) + ; GFX10: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>) + ; GFX10: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) + ; GFX10: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) + ; GFX10: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) + ; GFX10: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY2]](<2 x s16>) + ; GFX10: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) + ; GFX10: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) + ; GFX10: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) + ; GFX10: [[FMUL:%[0-9]+]]:_(s16) = nnan G_FMUL [[TRUNC]], [[TRUNC2]] + ; GFX10: [[FADD:%[0-9]+]]:_(s16) = nnan G_FADD [[FMUL]], [[TRUNC4]] + ; GFX10: [[FMUL1:%[0-9]+]]:_(s16) = nnan G_FMUL [[TRUNC1]], [[TRUNC3]] + ; GFX10: [[FADD1:%[0-9]+]]:_(s16) = nnan G_FADD [[FMUL1]], [[TRUNC5]] + ; GFX10: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[FADD]](s16), [[FADD1]](s16) + ; GFX10: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x s16>) + %0:_(<2 x s16>) = COPY $vgpr0 + %1:_(<2 x s16>) = COPY $vgpr1 + %2:_(<2 x s16>) = COPY $vgpr2 + %3:_(<2 x s16>) = nnan G_FMAD %0, %1, %2 + $vgpr0 = COPY %3 +... + +--- +name: test_fmad_v4s16_denorm +machineFunctionInfo: + mode: + fp64-fp16-denormals: true + +body: | + bb.0: + liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $vgpr4_vgpr5 + + ; GFX6-LABEL: name: test_fmad_v4s16_denorm + ; GFX6: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 + ; GFX6: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3 + ; GFX6: [[COPY2:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr4_vgpr5 + ; GFX6: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) + ; GFX6: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; GFX6: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) + ; GFX6: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX6: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; GFX6: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) + ; GFX6: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) + ; GFX6: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) + ; GFX6: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) + ; GFX6: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) + ; GFX6: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<4 x s16>) + ; GFX6: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) + ; GFX6: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) + ; GFX6: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) + ; GFX6: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) + ; GFX6: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) + ; GFX6: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST3]](s32) + ; GFX6: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) + ; GFX6: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR3]](s32) + ; GFX6: [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY2]](<4 x s16>) + ; GFX6: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) + ; GFX6: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST4]](s32) + ; GFX6: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32) + ; GFX6: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR4]](s32) + ; GFX6: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV5]](<2 x s16>) + ; GFX6: [[TRUNC10:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST5]](s32) + ; GFX6: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST5]], [[C]](s32) + ; GFX6: [[TRUNC11:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR5]](s32) + ; GFX6: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC]](s16) + ; GFX6: [[FPEXT1:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC4]](s16) + ; GFX6: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[FPEXT]], [[FPEXT1]] + ; GFX6: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[FMUL]](s32) + ; GFX6: [[FPEXT2:%[0-9]+]]:_(s32) = G_FPEXT [[FPTRUNC]](s16) + ; GFX6: [[FPEXT3:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC8]](s16) + ; GFX6: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[FPEXT2]], [[FPEXT3]] + ; GFX6: [[FPTRUNC1:%[0-9]+]]:_(s16) = G_FPTRUNC [[FADD]](s32) + ; GFX6: [[FPEXT4:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC1]](s16) + ; GFX6: [[FPEXT5:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC5]](s16) + ; GFX6: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[FPEXT4]], [[FPEXT5]] + ; GFX6: [[FPTRUNC2:%[0-9]+]]:_(s16) = G_FPTRUNC [[FMUL1]](s32) + ; GFX6: [[FPEXT6:%[0-9]+]]:_(s32) = G_FPEXT [[FPTRUNC2]](s16) + ; GFX6: [[FPEXT7:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC9]](s16) + ; GFX6: [[FADD1:%[0-9]+]]:_(s32) = G_FADD [[FPEXT6]], [[FPEXT7]] + ; GFX6: [[FPTRUNC3:%[0-9]+]]:_(s16) = G_FPTRUNC [[FADD1]](s32) + ; GFX6: [[FPEXT8:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC2]](s16) + ; GFX6: [[FPEXT9:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC6]](s16) + ; GFX6: [[FMUL2:%[0-9]+]]:_(s32) = G_FMUL [[FPEXT8]], [[FPEXT9]] + ; GFX6: [[FPTRUNC4:%[0-9]+]]:_(s16) = G_FPTRUNC [[FMUL2]](s32) + ; GFX6: [[FPEXT10:%[0-9]+]]:_(s32) = G_FPEXT [[FPTRUNC4]](s16) + ; GFX6: [[FPEXT11:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC10]](s16) + ; GFX6: [[FADD2:%[0-9]+]]:_(s32) = G_FADD [[FPEXT10]], [[FPEXT11]] + ; GFX6: [[FPTRUNC5:%[0-9]+]]:_(s16) = G_FPTRUNC [[FADD2]](s32) + ; GFX6: [[FPEXT12:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC3]](s16) + ; GFX6: [[FPEXT13:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC7]](s16) + ; GFX6: [[FMUL3:%[0-9]+]]:_(s32) = G_FMUL [[FPEXT12]], [[FPEXT13]] + ; GFX6: [[FPTRUNC6:%[0-9]+]]:_(s16) = G_FPTRUNC [[FMUL3]](s32) + ; GFX6: [[FPEXT14:%[0-9]+]]:_(s32) = G_FPEXT [[FPTRUNC6]](s16) + ; GFX6: [[FPEXT15:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC11]](s16) + ; GFX6: [[FADD3:%[0-9]+]]:_(s32) = G_FADD [[FPEXT14]], [[FPEXT15]] + ; GFX6: [[FPTRUNC7:%[0-9]+]]:_(s16) = G_FPTRUNC [[FADD3]](s32) + ; GFX6: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[FPTRUNC1]](s16), [[FPTRUNC3]](s16) + ; GFX6: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[FPTRUNC5]](s16), [[FPTRUNC7]](s16) + ; GFX6: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) + ; GFX6: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) + ; GFX7-LABEL: name: test_fmad_v4s16_denorm + ; GFX7: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 + ; GFX7: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3 + ; GFX7: [[COPY2:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr4_vgpr5 + ; GFX7: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) + ; GFX7: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; GFX7: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) + ; GFX7: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX7: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; GFX7: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) + ; GFX7: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) + ; GFX7: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) + ; GFX7: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) + ; GFX7: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) + ; GFX7: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<4 x s16>) + ; GFX7: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) + ; GFX7: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) + ; GFX7: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) + ; GFX7: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) + ; GFX7: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) + ; GFX7: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST3]](s32) + ; GFX7: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) + ; GFX7: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR3]](s32) + ; GFX7: [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY2]](<4 x s16>) + ; GFX7: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) + ; GFX7: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST4]](s32) + ; GFX7: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32) + ; GFX7: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR4]](s32) + ; GFX7: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV5]](<2 x s16>) + ; GFX7: [[TRUNC10:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST5]](s32) + ; GFX7: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST5]], [[C]](s32) + ; GFX7: [[TRUNC11:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR5]](s32) + ; GFX7: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR %16(s16), %17(s16) + ; GFX7: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR %18(s16), %19(s16) + ; GFX7: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) + ; GFX7: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) + ; GFX7: [[FMUL:%[0-9]+]]:_(s16) = G_FMUL [[TRUNC3]], [[TRUNC7]] + ; GFX7: [[FADD:%[0-9]+]]:_(s16) = G_FADD [[FMUL]], [[TRUNC11]] + ; GFX7: [[FMUL1:%[0-9]+]]:_(s16) = G_FMUL [[TRUNC2]], [[TRUNC6]] + ; GFX7: [[FADD1:%[0-9]+]]:_(s16) = G_FADD [[FMUL1]], [[TRUNC10]] + ; GFX7: [[FMUL2:%[0-9]+]]:_(s16) = G_FMUL [[TRUNC1]], [[TRUNC5]] + ; GFX7: [[FADD2:%[0-9]+]]:_(s16) = G_FADD [[FMUL2]], [[TRUNC9]] + ; GFX7: [[FMUL3:%[0-9]+]]:_(s16) = G_FMUL [[TRUNC]], [[TRUNC4]] + ; GFX7: [[FADD3:%[0-9]+]]:_(s16) = G_FADD [[FMUL3]], [[TRUNC8]] + ; GFX10-LABEL: name: test_fmad_v4s16_denorm + ; GFX10: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 + ; GFX10: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3 + ; GFX10: [[COPY2:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr4_vgpr5 + ; GFX10: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) + ; GFX10: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; GFX10: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) + ; GFX10: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX10: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; GFX10: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) + ; GFX10: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) + ; GFX10: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) + ; GFX10: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) + ; GFX10: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) + ; GFX10: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<4 x s16>) + ; GFX10: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) + ; GFX10: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) + ; GFX10: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) + ; GFX10: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) + ; GFX10: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) + ; GFX10: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST3]](s32) + ; GFX10: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) + ; GFX10: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR3]](s32) + ; GFX10: [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY2]](<4 x s16>) + ; GFX10: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) + ; GFX10: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST4]](s32) + ; GFX10: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32) + ; GFX10: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR4]](s32) + ; GFX10: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV5]](<2 x s16>) + ; GFX10: [[TRUNC10:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST5]](s32) + ; GFX10: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST5]], [[C]](s32) + ; GFX10: [[TRUNC11:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR5]](s32) + ; GFX10: [[FMUL:%[0-9]+]]:_(s16) = G_FMUL [[TRUNC]], [[TRUNC4]] + ; GFX10: [[FADD:%[0-9]+]]:_(s16) = G_FADD [[FMUL]], [[TRUNC8]] + ; GFX10: [[FMUL1:%[0-9]+]]:_(s16) = G_FMUL [[TRUNC1]], [[TRUNC5]] + ; GFX10: [[FADD1:%[0-9]+]]:_(s16) = G_FADD [[FMUL1]], [[TRUNC9]] + ; GFX10: [[FMUL2:%[0-9]+]]:_(s16) = G_FMUL [[TRUNC2]], [[TRUNC6]] + ; GFX10: [[FADD2:%[0-9]+]]:_(s16) = G_FADD [[FMUL2]], [[TRUNC10]] + ; GFX10: [[FMUL3:%[0-9]+]]:_(s16) = G_FMUL [[TRUNC3]], [[TRUNC7]] + ; GFX10: [[FADD3:%[0-9]+]]:_(s16) = G_FADD [[FMUL3]], [[TRUNC11]] + ; GFX10: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[FADD]](s16), [[FADD1]](s16) + ; GFX10: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[FADD2]](s16), [[FADD3]](s16) + ; GFX10: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) + ; GFX10: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) + %0:_(<4 x s16>) = COPY $vgpr0_vgpr1 + %1:_(<4 x s16>) = COPY $vgpr2_vgpr3 + %2:_(<4 x s16>) = COPY $vgpr4_vgpr5 + %3:_(<4 x s16>) = G_FMAD %0, %1, %2 $vgpr0_vgpr1 = COPY %3 ... + --- -name: test_fmad_v2s64 +name: test_fmad_v4s16_denorm_flags +machineFunctionInfo: + mode: + fp64-fp16-denormals: true + body: | bb.0: - liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4_vgpr5_vgpr6_vgpr7, $vgpr8_vgpr9_vgpr10_vgpr11 + liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $vgpr4_vgpr5 - ; SI-F16DENORM-LABEL: name: test_fmad_v2s64 - ; SI-F16DENORM: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - ; SI-F16DENORM: [[COPY1:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 - ; SI-F16DENORM: [[COPY2:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr8_vgpr9_vgpr10_vgpr11 - ; SI-F16DENORM: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](<2 x s64>) - ; SI-F16DENORM: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY1]](<2 x s64>) - ; SI-F16DENORM: [[UV4:%[0-9]+]]:_(s64), [[UV5:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY2]](<2 x s64>) - ; SI-F16DENORM: [[FMUL:%[0-9]+]]:_(s64) = G_FMUL [[UV]], [[UV2]] - ; SI-F16DENORM: [[FADD:%[0-9]+]]:_(s64) = G_FADD [[FMUL]], [[UV4]] - ; SI-F16DENORM: [[FMUL1:%[0-9]+]]:_(s64) = G_FMUL [[UV1]], [[UV3]] - ; SI-F16DENORM: [[FADD1:%[0-9]+]]:_(s64) = G_FADD [[FMUL1]], [[UV5]] - ; SI-F16DENORM: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[FADD]](s64), [[FADD1]](s64) - ; SI-F16DENORM: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) - ; SI-F16FLUSH-LABEL: name: test_fmad_v2s64 - ; SI-F16FLUSH: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - ; SI-F16FLUSH: [[COPY1:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 - ; SI-F16FLUSH: [[COPY2:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr8_vgpr9_vgpr10_vgpr11 - ; SI-F16FLUSH: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](<2 x s64>) - ; SI-F16FLUSH: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY1]](<2 x s64>) - ; SI-F16FLUSH: [[UV4:%[0-9]+]]:_(s64), [[UV5:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY2]](<2 x s64>) - ; SI-F16FLUSH: [[FMUL:%[0-9]+]]:_(s64) = G_FMUL [[UV]], [[UV2]] - ; SI-F16FLUSH: [[FADD:%[0-9]+]]:_(s64) = G_FADD [[FMUL]], [[UV4]] - ; SI-F16FLUSH: [[FMUL1:%[0-9]+]]:_(s64) = G_FMUL [[UV1]], [[UV3]] - ; SI-F16FLUSH: [[FADD1:%[0-9]+]]:_(s64) = G_FADD [[FMUL1]], [[UV5]] - ; SI-F16FLUSH: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[FADD]](s64), [[FADD1]](s64) - ; SI-F16FLUSH: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) - ; VI-F16DENORM-LABEL: name: test_fmad_v2s64 - ; VI-F16DENORM: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - ; VI-F16DENORM: [[COPY1:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 - ; VI-F16DENORM: [[COPY2:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr8_vgpr9_vgpr10_vgpr11 - ; VI-F16DENORM: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](<2 x s64>) - ; VI-F16DENORM: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY1]](<2 x s64>) - ; VI-F16DENORM: [[UV4:%[0-9]+]]:_(s64), [[UV5:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY2]](<2 x s64>) - ; VI-F16DENORM: [[FMUL:%[0-9]+]]:_(s64) = G_FMUL [[UV]], [[UV2]] - ; VI-F16DENORM: [[FADD:%[0-9]+]]:_(s64) = G_FADD [[FMUL]], [[UV4]] - ; VI-F16DENORM: [[FMUL1:%[0-9]+]]:_(s64) = G_FMUL [[UV1]], [[UV3]] - ; VI-F16DENORM: [[FADD1:%[0-9]+]]:_(s64) = G_FADD [[FMUL1]], [[UV5]] - ; VI-F16DENORM: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[FADD]](s64), [[FADD1]](s64) - ; VI-F16DENORM: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) - ; VI-F16FLUSH-LABEL: name: test_fmad_v2s64 - ; VI-F16FLUSH: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - ; VI-F16FLUSH: [[COPY1:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 - ; VI-F16FLUSH: [[COPY2:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr8_vgpr9_vgpr10_vgpr11 - ; VI-F16FLUSH: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](<2 x s64>) - ; VI-F16FLUSH: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY1]](<2 x s64>) - ; VI-F16FLUSH: [[UV4:%[0-9]+]]:_(s64), [[UV5:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY2]](<2 x s64>) - ; VI-F16FLUSH: [[FMUL:%[0-9]+]]:_(s64) = G_FMUL [[UV]], [[UV2]] - ; VI-F16FLUSH: [[FADD:%[0-9]+]]:_(s64) = G_FADD [[FMUL]], [[UV4]] - ; VI-F16FLUSH: [[FMUL1:%[0-9]+]]:_(s64) = G_FMUL [[UV1]], [[UV3]] - ; VI-F16FLUSH: [[FADD1:%[0-9]+]]:_(s64) = G_FADD [[FMUL1]], [[UV5]] - ; VI-F16FLUSH: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[FADD]](s64), [[FADD1]](s64) - ; VI-F16FLUSH: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) - ; GFX10-LABEL: name: test_fmad_v2s64 - ; GFX10: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - ; GFX10: [[COPY1:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 - ; GFX10: [[COPY2:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr8_vgpr9_vgpr10_vgpr11 - ; GFX10: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](<2 x s64>) - ; GFX10: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY1]](<2 x s64>) - ; GFX10: [[UV4:%[0-9]+]]:_(s64), [[UV5:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY2]](<2 x s64>) - ; GFX10: [[FMUL:%[0-9]+]]:_(s64) = G_FMUL [[UV]], [[UV2]] - ; GFX10: [[FADD:%[0-9]+]]:_(s64) = G_FADD [[FMUL]], [[UV4]] - ; GFX10: [[FMUL1:%[0-9]+]]:_(s64) = G_FMUL [[UV1]], [[UV3]] - ; GFX10: [[FADD1:%[0-9]+]]:_(s64) = G_FADD [[FMUL1]], [[UV5]] - ; GFX10: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[FADD]](s64), [[FADD1]](s64) - ; GFX10: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) - %0:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - %1:_(<2 x s64>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 - %2:_(<2 x s64>) = COPY $vgpr8_vgpr9_vgpr10_vgpr11 - %3:_(<2 x s64>) = G_FMAD %0, %1, %2 - $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %3 + ; GFX6-LABEL: name: test_fmad_v4s16_denorm_flags + ; GFX6: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 + ; GFX6: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3 + ; GFX6: [[COPY2:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr4_vgpr5 + ; GFX6: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) + ; GFX6: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; GFX6: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) + ; GFX6: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX6: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; GFX6: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) + ; GFX6: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) + ; GFX6: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) + ; GFX6: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) + ; GFX6: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) + ; GFX6: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<4 x s16>) + ; GFX6: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) + ; GFX6: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) + ; GFX6: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) + ; GFX6: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) + ; GFX6: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) + ; GFX6: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST3]](s32) + ; GFX6: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) + ; GFX6: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR3]](s32) + ; GFX6: [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY2]](<4 x s16>) + ; GFX6: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) + ; GFX6: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST4]](s32) + ; GFX6: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32) + ; GFX6: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR4]](s32) + ; GFX6: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV5]](<2 x s16>) + ; GFX6: [[TRUNC10:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST5]](s32) + ; GFX6: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST5]], [[C]](s32) + ; GFX6: [[TRUNC11:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR5]](s32) + ; GFX6: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC]](s16) + ; GFX6: [[FPEXT1:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC4]](s16) + ; GFX6: [[FMUL:%[0-9]+]]:_(s32) = nnan G_FMUL [[FPEXT]], [[FPEXT1]] + ; GFX6: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[FMUL]](s32) + ; GFX6: [[FPEXT2:%[0-9]+]]:_(s32) = G_FPEXT [[FPTRUNC]](s16) + ; GFX6: [[FPEXT3:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC8]](s16) + ; GFX6: [[FADD:%[0-9]+]]:_(s32) = nnan G_FADD [[FPEXT2]], [[FPEXT3]] + ; GFX6: [[FPTRUNC1:%[0-9]+]]:_(s16) = G_FPTRUNC [[FADD]](s32) + ; GFX6: [[FPEXT4:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC1]](s16) + ; GFX6: [[FPEXT5:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC5]](s16) + ; GFX6: [[FMUL1:%[0-9]+]]:_(s32) = nnan G_FMUL [[FPEXT4]], [[FPEXT5]] + ; GFX6: [[FPTRUNC2:%[0-9]+]]:_(s16) = G_FPTRUNC [[FMUL1]](s32) + ; GFX6: [[FPEXT6:%[0-9]+]]:_(s32) = G_FPEXT [[FPTRUNC2]](s16) + ; GFX6: [[FPEXT7:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC9]](s16) + ; GFX6: [[FADD1:%[0-9]+]]:_(s32) = nnan G_FADD [[FPEXT6]], [[FPEXT7]] + ; GFX6: [[FPTRUNC3:%[0-9]+]]:_(s16) = G_FPTRUNC [[FADD1]](s32) + ; GFX6: [[FPEXT8:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC2]](s16) + ; GFX6: [[FPEXT9:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC6]](s16) + ; GFX6: [[FMUL2:%[0-9]+]]:_(s32) = nnan G_FMUL [[FPEXT8]], [[FPEXT9]] + ; GFX6: [[FPTRUNC4:%[0-9]+]]:_(s16) = G_FPTRUNC [[FMUL2]](s32) + ; GFX6: [[FPEXT10:%[0-9]+]]:_(s32) = G_FPEXT [[FPTRUNC4]](s16) + ; GFX6: [[FPEXT11:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC10]](s16) + ; GFX6: [[FADD2:%[0-9]+]]:_(s32) = nnan G_FADD [[FPEXT10]], [[FPEXT11]] + ; GFX6: [[FPTRUNC5:%[0-9]+]]:_(s16) = G_FPTRUNC [[FADD2]](s32) + ; GFX6: [[FPEXT12:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC3]](s16) + ; GFX6: [[FPEXT13:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC7]](s16) + ; GFX6: [[FMUL3:%[0-9]+]]:_(s32) = nnan G_FMUL [[FPEXT12]], [[FPEXT13]] + ; GFX6: [[FPTRUNC6:%[0-9]+]]:_(s16) = G_FPTRUNC [[FMUL3]](s32) + ; GFX6: [[FPEXT14:%[0-9]+]]:_(s32) = G_FPEXT [[FPTRUNC6]](s16) + ; GFX6: [[FPEXT15:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC11]](s16) + ; GFX6: [[FADD3:%[0-9]+]]:_(s32) = nnan G_FADD [[FPEXT14]], [[FPEXT15]] + ; GFX6: [[FPTRUNC7:%[0-9]+]]:_(s16) = G_FPTRUNC [[FADD3]](s32) + ; GFX6: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[FPTRUNC1]](s16), [[FPTRUNC3]](s16) + ; GFX6: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[FPTRUNC5]](s16), [[FPTRUNC7]](s16) + ; GFX6: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) + ; GFX6: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) + ; GFX7-LABEL: name: test_fmad_v4s16_denorm_flags + ; GFX7: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 + ; GFX7: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3 + ; GFX7: [[COPY2:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr4_vgpr5 + ; GFX7: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) + ; GFX7: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; GFX7: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) + ; GFX7: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX7: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; GFX7: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) + ; GFX7: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) + ; GFX7: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) + ; GFX7: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) + ; GFX7: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) + ; GFX7: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<4 x s16>) + ; GFX7: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) + ; GFX7: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) + ; GFX7: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) + ; GFX7: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) + ; GFX7: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) + ; GFX7: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST3]](s32) + ; GFX7: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) + ; GFX7: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR3]](s32) + ; GFX7: [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY2]](<4 x s16>) + ; GFX7: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) + ; GFX7: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST4]](s32) + ; GFX7: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32) + ; GFX7: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR4]](s32) + ; GFX7: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV5]](<2 x s16>) + ; GFX7: [[TRUNC10:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST5]](s32) + ; GFX7: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST5]], [[C]](s32) + ; GFX7: [[TRUNC11:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR5]](s32) + ; GFX7: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR %16(s16), %17(s16) + ; GFX7: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR %18(s16), %19(s16) + ; GFX7: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) + ; GFX7: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) + ; GFX7: [[FMUL:%[0-9]+]]:_(s16) = nnan G_FMUL [[TRUNC3]], [[TRUNC7]] + ; GFX7: [[FADD:%[0-9]+]]:_(s16) = nnan G_FADD [[FMUL]], [[TRUNC11]] + ; GFX7: [[FMUL1:%[0-9]+]]:_(s16) = nnan G_FMUL [[TRUNC2]], [[TRUNC6]] + ; GFX7: [[FADD1:%[0-9]+]]:_(s16) = nnan G_FADD [[FMUL1]], [[TRUNC10]] + ; GFX7: [[FMUL2:%[0-9]+]]:_(s16) = nnan G_FMUL [[TRUNC1]], [[TRUNC5]] + ; GFX7: [[FADD2:%[0-9]+]]:_(s16) = nnan G_FADD [[FMUL2]], [[TRUNC9]] + ; GFX7: [[FMUL3:%[0-9]+]]:_(s16) = nnan G_FMUL [[TRUNC]], [[TRUNC4]] + ; GFX7: [[FADD3:%[0-9]+]]:_(s16) = nnan G_FADD [[FMUL3]], [[TRUNC8]] + ; GFX10-LABEL: name: test_fmad_v4s16_denorm_flags + ; GFX10: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 + ; GFX10: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3 + ; GFX10: [[COPY2:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr4_vgpr5 + ; GFX10: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) + ; GFX10: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; GFX10: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) + ; GFX10: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX10: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; GFX10: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) + ; GFX10: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) + ; GFX10: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) + ; GFX10: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) + ; GFX10: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) + ; GFX10: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<4 x s16>) + ; GFX10: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) + ; GFX10: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) + ; GFX10: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) + ; GFX10: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) + ; GFX10: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) + ; GFX10: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST3]](s32) + ; GFX10: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) + ; GFX10: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR3]](s32) + ; GFX10: [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY2]](<4 x s16>) + ; GFX10: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) + ; GFX10: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST4]](s32) + ; GFX10: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32) + ; GFX10: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR4]](s32) + ; GFX10: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV5]](<2 x s16>) + ; GFX10: [[TRUNC10:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST5]](s32) + ; GFX10: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST5]], [[C]](s32) + ; GFX10: [[TRUNC11:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR5]](s32) + ; GFX10: [[FMUL:%[0-9]+]]:_(s16) = nnan G_FMUL [[TRUNC]], [[TRUNC4]] + ; GFX10: [[FADD:%[0-9]+]]:_(s16) = nnan G_FADD [[FMUL]], [[TRUNC8]] + ; GFX10: [[FMUL1:%[0-9]+]]:_(s16) = nnan G_FMUL [[TRUNC1]], [[TRUNC5]] + ; GFX10: [[FADD1:%[0-9]+]]:_(s16) = nnan G_FADD [[FMUL1]], [[TRUNC9]] + ; GFX10: [[FMUL2:%[0-9]+]]:_(s16) = nnan G_FMUL [[TRUNC2]], [[TRUNC6]] + ; GFX10: [[FADD2:%[0-9]+]]:_(s16) = nnan G_FADD [[FMUL2]], [[TRUNC10]] + ; GFX10: [[FMUL3:%[0-9]+]]:_(s16) = nnan G_FMUL [[TRUNC3]], [[TRUNC7]] + ; GFX10: [[FADD3:%[0-9]+]]:_(s16) = nnan G_FADD [[FMUL3]], [[TRUNC11]] + ; GFX10: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[FADD]](s16), [[FADD1]](s16) + ; GFX10: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[FADD2]](s16), [[FADD3]](s16) + ; GFX10: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) + ; GFX10: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) + %0:_(<4 x s16>) = COPY $vgpr0_vgpr1 + %1:_(<4 x s16>) = COPY $vgpr2_vgpr3 + %2:_(<4 x s16>) = COPY $vgpr4_vgpr5 + %3:_(<4 x s16>) = nnan G_FMAD %0, %1, %2 + $vgpr0_vgpr1 = COPY %3 ... + Index: llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fmad.s32.mir =================================================================== --- /dev/null +++ llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fmad.s32.mir @@ -0,0 +1,495 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=tahiti -run-pass=legalizer %s -o - | FileCheck -check-prefix=GFX6 %s +# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -run-pass=legalizer %s -o - | FileCheck -check-prefix=GFX7 %s +# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 -run-pass=legalizer %s -o - | FileCheck -check-prefix=GFX10 %s + +--- +name: test_fmad_s32_flush +machineFunctionInfo: + mode: + fp32-denormals: false + +body: | + bb.0: + liveins: $vgpr0, $vgpr1, $vgpr2 + + ; GFX6-LABEL: name: test_fmad_s32_flush + ; GFX6: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX6: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX6: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; GFX6: [[FMAD:%[0-9]+]]:_(s32) = G_FMAD [[COPY]], [[COPY1]], [[COPY2]] + ; GFX6: $vgpr0 = COPY [[FMAD]](s32) + ; GFX7-LABEL: name: test_fmad_s32_flush + ; GFX7: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX7: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX7: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; GFX7: [[FMAD:%[0-9]+]]:_(s32) = G_FMAD [[COPY]], [[COPY1]], [[COPY2]] + ; GFX7: $vgpr0 = COPY [[FMAD]](s32) + ; GFX10-LABEL: name: test_fmad_s32_flush + ; GFX10: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX10: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX10: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; GFX10: [[FMAD:%[0-9]+]]:_(s32) = G_FMAD [[COPY]], [[COPY1]], [[COPY2]] + ; GFX10: $vgpr0 = COPY [[FMAD]](s32) + %0:_(s32) = COPY $vgpr0 + %1:_(s32) = COPY $vgpr1 + %2:_(s32) = COPY $vgpr2 + %3:_(s32) = G_FMAD %0, %1, %2 + $vgpr0 = COPY %3 +... + +--- +name: test_fmad_s32_flags_flush +machineFunctionInfo: + mode: + fp32-denormals: false + +body: | + bb.0: + liveins: $vgpr0, $vgpr1, $vgpr2 + + ; GFX6-LABEL: name: test_fmad_s32_flags_flush + ; GFX6: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX6: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX6: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; GFX6: [[FMAD:%[0-9]+]]:_(s32) = nnan G_FMAD [[COPY]], [[COPY1]], [[COPY2]] + ; GFX6: $vgpr0 = COPY [[FMAD]](s32) + ; GFX7-LABEL: name: test_fmad_s32_flags_flush + ; GFX7: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX7: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX7: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; GFX7: [[FMAD:%[0-9]+]]:_(s32) = nnan G_FMAD [[COPY]], [[COPY1]], [[COPY2]] + ; GFX7: $vgpr0 = COPY [[FMAD]](s32) + ; GFX10-LABEL: name: test_fmad_s32_flags_flush + ; GFX10: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX10: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX10: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; GFX10: [[FMAD:%[0-9]+]]:_(s32) = nnan G_FMAD [[COPY]], [[COPY1]], [[COPY2]] + ; GFX10: $vgpr0 = COPY [[FMAD]](s32) + %0:_(s32) = COPY $vgpr0 + %1:_(s32) = COPY $vgpr1 + %2:_(s32) = COPY $vgpr2 + %3:_(s32) = nnan G_FMAD %0, %1, %2 + $vgpr0 = COPY %3 +... + +--- +name: test_fmad_v2s32_flush +machineFunctionInfo: + mode: + fp32-denormals: false + +body: | + bb.0: + liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $vgpr4_vgpr5 + + ; GFX6-LABEL: name: test_fmad_v2s32_flush + ; GFX6: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 + ; GFX6: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 + ; GFX6: [[COPY2:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr4_vgpr5 + ; GFX6: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) + ; GFX6: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) + ; GFX6: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY2]](<2 x s32>) + ; GFX6: [[FMAD:%[0-9]+]]:_(s32) = G_FMAD [[UV]], [[UV2]], [[UV4]] + ; GFX6: [[FMAD1:%[0-9]+]]:_(s32) = G_FMAD [[UV1]], [[UV3]], [[UV5]] + ; GFX6: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[FMAD]](s32), [[FMAD1]](s32) + ; GFX6: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; GFX7-LABEL: name: test_fmad_v2s32_flush + ; GFX7: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 + ; GFX7: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 + ; GFX7: [[COPY2:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr4_vgpr5 + ; GFX7: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) + ; GFX7: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) + ; GFX7: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY2]](<2 x s32>) + ; GFX7: [[FMAD:%[0-9]+]]:_(s32) = G_FMAD [[UV]], [[UV2]], [[UV4]] + ; GFX7: [[FMAD1:%[0-9]+]]:_(s32) = G_FMAD [[UV1]], [[UV3]], [[UV5]] + ; GFX7: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[FMAD]](s32), [[FMAD1]](s32) + ; GFX7: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; GFX10-LABEL: name: test_fmad_v2s32_flush + ; GFX10: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 + ; GFX10: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 + ; GFX10: [[COPY2:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr4_vgpr5 + ; GFX10: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) + ; GFX10: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) + ; GFX10: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY2]](<2 x s32>) + ; GFX10: [[FMAD:%[0-9]+]]:_(s32) = G_FMAD [[UV]], [[UV2]], [[UV4]] + ; GFX10: [[FMAD1:%[0-9]+]]:_(s32) = G_FMAD [[UV1]], [[UV3]], [[UV5]] + ; GFX10: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[FMAD]](s32), [[FMAD1]](s32) + ; GFX10: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + %0:_(<2 x s32>) = COPY $vgpr0_vgpr1 + %1:_(<2 x s32>) = COPY $vgpr2_vgpr3 + %2:_(<2 x s32>) = COPY $vgpr4_vgpr5 + %3:_(<2 x s32>) = G_FMAD %0, %1, %2 + $vgpr0_vgpr1 = COPY %3 +... + +--- +name: test_fmad_v3s32_flush +machineFunctionInfo: + mode: + fp32-denormals: false + +body: | + bb.0: + liveins: $vgpr0_vgpr1_vgpr2, $vgpr3_vgpr4_vgpr5, $vgpr6_vgpr7_vgpr8 + + ; GFX6-LABEL: name: test_fmad_v3s32_flush + ; GFX6: [[COPY:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2 + ; GFX6: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr3_vgpr4_vgpr5 + ; GFX6: [[COPY2:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr6_vgpr7_vgpr8 + ; GFX6: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<3 x s32>) + ; GFX6: [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<3 x s32>) + ; GFX6: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY2]](<3 x s32>) + ; GFX6: [[FMAD:%[0-9]+]]:_(s32) = G_FMAD [[UV]], [[UV3]], [[UV6]] + ; GFX6: [[FMAD1:%[0-9]+]]:_(s32) = G_FMAD [[UV1]], [[UV4]], [[UV7]] + ; GFX6: [[FMAD2:%[0-9]+]]:_(s32) = G_FMAD [[UV2]], [[UV5]], [[UV8]] + ; GFX6: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[FMAD]](s32), [[FMAD1]](s32), [[FMAD2]](s32) + ; GFX6: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>) + ; GFX7-LABEL: name: test_fmad_v3s32_flush + ; GFX7: [[COPY:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2 + ; GFX7: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr3_vgpr4_vgpr5 + ; GFX7: [[COPY2:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr6_vgpr7_vgpr8 + ; GFX7: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<3 x s32>) + ; GFX7: [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<3 x s32>) + ; GFX7: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY2]](<3 x s32>) + ; GFX7: [[FMAD:%[0-9]+]]:_(s32) = G_FMAD [[UV]], [[UV3]], [[UV6]] + ; GFX7: [[FMAD1:%[0-9]+]]:_(s32) = G_FMAD [[UV1]], [[UV4]], [[UV7]] + ; GFX7: [[FMAD2:%[0-9]+]]:_(s32) = G_FMAD [[UV2]], [[UV5]], [[UV8]] + ; GFX7: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[FMAD]](s32), [[FMAD1]](s32), [[FMAD2]](s32) + ; GFX7: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>) + ; GFX10-LABEL: name: test_fmad_v3s32_flush + ; GFX10: [[COPY:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2 + ; GFX10: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr3_vgpr4_vgpr5 + ; GFX10: [[COPY2:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr6_vgpr7_vgpr8 + ; GFX10: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<3 x s32>) + ; GFX10: [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<3 x s32>) + ; GFX10: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY2]](<3 x s32>) + ; GFX10: [[FMAD:%[0-9]+]]:_(s32) = G_FMAD [[UV]], [[UV3]], [[UV6]] + ; GFX10: [[FMAD1:%[0-9]+]]:_(s32) = G_FMAD [[UV1]], [[UV4]], [[UV7]] + ; GFX10: [[FMAD2:%[0-9]+]]:_(s32) = G_FMAD [[UV2]], [[UV5]], [[UV8]] + ; GFX10: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[FMAD]](s32), [[FMAD1]](s32), [[FMAD2]](s32) + ; GFX10: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>) + %0:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2 + %1:_(<3 x s32>) = COPY $vgpr3_vgpr4_vgpr5 + %2:_(<3 x s32>) = COPY $vgpr6_vgpr7_vgpr8 + %3:_(<3 x s32>) = G_FMAD %0, %1, %2 + $vgpr0_vgpr1_vgpr2 = COPY %3 +... + +--- +name: test_fmad_v4s32_flush +machineFunctionInfo: + mode: + fp32-denormals: false + +body: | + bb.0: + liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4_vgpr5_vgpr6_vgpr7, $vgpr8_vgpr9_vgpr10_vgpr11 + + ; GFX6-LABEL: name: test_fmad_v4s32_flush + ; GFX6: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + ; GFX6: [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 + ; GFX6: [[COPY2:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr8_vgpr9_vgpr10_vgpr11 + ; GFX6: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<4 x s32>) + ; GFX6: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<4 x s32>) + ; GFX6: [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY2]](<4 x s32>) + ; GFX6: [[FMAD:%[0-9]+]]:_(s32) = G_FMAD [[UV]], [[UV4]], [[UV8]] + ; GFX6: [[FMAD1:%[0-9]+]]:_(s32) = G_FMAD [[UV1]], [[UV5]], [[UV9]] + ; GFX6: [[FMAD2:%[0-9]+]]:_(s32) = G_FMAD [[UV2]], [[UV6]], [[UV10]] + ; GFX6: [[FMAD3:%[0-9]+]]:_(s32) = G_FMAD [[UV3]], [[UV7]], [[UV11]] + ; GFX6: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[FMAD]](s32), [[FMAD1]](s32), [[FMAD2]](s32), [[FMAD3]](s32) + ; GFX6: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>) + ; GFX7-LABEL: name: test_fmad_v4s32_flush + ; GFX7: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + ; GFX7: [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 + ; GFX7: [[COPY2:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr8_vgpr9_vgpr10_vgpr11 + ; GFX7: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<4 x s32>) + ; GFX7: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<4 x s32>) + ; GFX7: [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY2]](<4 x s32>) + ; GFX7: [[FMAD:%[0-9]+]]:_(s32) = G_FMAD [[UV]], [[UV4]], [[UV8]] + ; GFX7: [[FMAD1:%[0-9]+]]:_(s32) = G_FMAD [[UV1]], [[UV5]], [[UV9]] + ; GFX7: [[FMAD2:%[0-9]+]]:_(s32) = G_FMAD [[UV2]], [[UV6]], [[UV10]] + ; GFX7: [[FMAD3:%[0-9]+]]:_(s32) = G_FMAD [[UV3]], [[UV7]], [[UV11]] + ; GFX7: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[FMAD]](s32), [[FMAD1]](s32), [[FMAD2]](s32), [[FMAD3]](s32) + ; GFX7: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>) + ; GFX10-LABEL: name: test_fmad_v4s32_flush + ; GFX10: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + ; GFX10: [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 + ; GFX10: [[COPY2:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr8_vgpr9_vgpr10_vgpr11 + ; GFX10: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<4 x s32>) + ; GFX10: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<4 x s32>) + ; GFX10: [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY2]](<4 x s32>) + ; GFX10: [[FMAD:%[0-9]+]]:_(s32) = G_FMAD [[UV]], [[UV4]], [[UV8]] + ; GFX10: [[FMAD1:%[0-9]+]]:_(s32) = G_FMAD [[UV1]], [[UV5]], [[UV9]] + ; GFX10: [[FMAD2:%[0-9]+]]:_(s32) = G_FMAD [[UV2]], [[UV6]], [[UV10]] + ; GFX10: [[FMAD3:%[0-9]+]]:_(s32) = G_FMAD [[UV3]], [[UV7]], [[UV11]] + ; GFX10: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[FMAD]](s32), [[FMAD1]](s32), [[FMAD2]](s32), [[FMAD3]](s32) + ; GFX10: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>) + %0:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + %1:_(<4 x s32>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 + %2:_(<4 x s32>) = COPY $vgpr8_vgpr9_vgpr10_vgpr11 + %3:_(<4 x s32>) = G_FMAD %0, %1, %2 + $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %3 +... + +--- +name: test_fmad_s32_denorm +machineFunctionInfo: + mode: + fp32-denormals: true + +body: | + bb.0: + liveins: $vgpr0, $vgpr1, $vgpr2 + + ; GFX6-LABEL: name: test_fmad_s32_denorm + ; GFX6: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX6: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX6: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; GFX6: $vgpr0 = COPY %3(s32) + ; GFX6: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[COPY]], [[COPY1]] + ; GFX6: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[FMUL]], [[COPY2]] + ; GFX7-LABEL: name: test_fmad_s32_denorm + ; GFX7: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX7: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX7: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; GFX7: $vgpr0 = COPY %3(s32) + ; GFX7: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[COPY]], [[COPY1]] + ; GFX7: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[FMUL]], [[COPY2]] + ; GFX10-LABEL: name: test_fmad_s32_denorm + ; GFX10: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX10: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX10: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; GFX10: $vgpr0 = COPY %3(s32) + ; GFX10: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[COPY]], [[COPY1]] + ; GFX10: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[FMUL]], [[COPY2]] + %0:_(s32) = COPY $vgpr0 + %1:_(s32) = COPY $vgpr1 + %2:_(s32) = COPY $vgpr2 + %3:_(s32) = G_FMAD %0, %1, %2 + $vgpr0 = COPY %3 +... + +--- +name: test_fmad_s32_flags_denorm +machineFunctionInfo: + mode: + fp32-denormals: true + +body: | + bb.0: + liveins: $vgpr0, $vgpr1, $vgpr2 + + ; GFX6-LABEL: name: test_fmad_s32_flags_denorm + ; GFX6: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX6: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX6: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; GFX6: $vgpr0 = COPY %3(s32) + ; GFX6: [[FMUL:%[0-9]+]]:_(s32) = nnan G_FMUL [[COPY]], [[COPY1]] + ; GFX6: [[FADD:%[0-9]+]]:_(s32) = nnan G_FADD [[FMUL]], [[COPY2]] + ; GFX7-LABEL: name: test_fmad_s32_flags_denorm + ; GFX7: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX7: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX7: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; GFX7: $vgpr0 = COPY %3(s32) + ; GFX7: [[FMUL:%[0-9]+]]:_(s32) = nnan G_FMUL [[COPY]], [[COPY1]] + ; GFX7: [[FADD:%[0-9]+]]:_(s32) = nnan G_FADD [[FMUL]], [[COPY2]] + ; GFX10-LABEL: name: test_fmad_s32_flags_denorm + ; GFX10: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX10: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX10: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; GFX10: $vgpr0 = COPY %3(s32) + ; GFX10: [[FMUL:%[0-9]+]]:_(s32) = nnan G_FMUL [[COPY]], [[COPY1]] + ; GFX10: [[FADD:%[0-9]+]]:_(s32) = nnan G_FADD [[FMUL]], [[COPY2]] + %0:_(s32) = COPY $vgpr0 + %1:_(s32) = COPY $vgpr1 + %2:_(s32) = COPY $vgpr2 + %3:_(s32) = nnan G_FMAD %0, %1, %2 + $vgpr0 = COPY %3 +... + +--- +name: test_fmad_v2s32_denorm +machineFunctionInfo: + mode: + fp32-denormals: true + +body: | + bb.0: + liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $vgpr4_vgpr5 + + ; GFX6-LABEL: name: test_fmad_v2s32_denorm + ; GFX6: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 + ; GFX6: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 + ; GFX6: [[COPY2:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr4_vgpr5 + ; GFX6: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) + ; GFX6: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) + ; GFX6: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY2]](<2 x s32>) + ; GFX6: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR %10(s32), %11(s32) + ; GFX6: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; GFX6: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[UV1]], [[UV3]] + ; GFX6: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[FMUL]], [[UV5]] + ; GFX6: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[UV]], [[UV2]] + ; GFX6: [[FADD1:%[0-9]+]]:_(s32) = G_FADD [[FMUL1]], [[UV4]] + ; GFX7-LABEL: name: test_fmad_v2s32_denorm + ; GFX7: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 + ; GFX7: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 + ; GFX7: [[COPY2:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr4_vgpr5 + ; GFX7: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) + ; GFX7: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) + ; GFX7: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY2]](<2 x s32>) + ; GFX7: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR %10(s32), %11(s32) + ; GFX7: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; GFX7: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[UV1]], [[UV3]] + ; GFX7: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[FMUL]], [[UV5]] + ; GFX7: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[UV]], [[UV2]] + ; GFX7: [[FADD1:%[0-9]+]]:_(s32) = G_FADD [[FMUL1]], [[UV4]] + ; GFX10-LABEL: name: test_fmad_v2s32_denorm + ; GFX10: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 + ; GFX10: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 + ; GFX10: [[COPY2:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr4_vgpr5 + ; GFX10: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) + ; GFX10: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) + ; GFX10: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY2]](<2 x s32>) + ; GFX10: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR %10(s32), %11(s32) + ; GFX10: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; GFX10: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[UV1]], [[UV3]] + ; GFX10: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[FMUL]], [[UV5]] + ; GFX10: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[UV]], [[UV2]] + ; GFX10: [[FADD1:%[0-9]+]]:_(s32) = G_FADD [[FMUL1]], [[UV4]] + %0:_(<2 x s32>) = COPY $vgpr0_vgpr1 + %1:_(<2 x s32>) = COPY $vgpr2_vgpr3 + %2:_(<2 x s32>) = COPY $vgpr4_vgpr5 + %3:_(<2 x s32>) = G_FMAD %0, %1, %2 + $vgpr0_vgpr1 = COPY %3 +... + +--- +name: test_fmad_v3s32_denorm +machineFunctionInfo: + mode: + fp32-denormals: true + +body: | + bb.0: + liveins: $vgpr0_vgpr1_vgpr2, $vgpr3_vgpr4_vgpr5, $vgpr6_vgpr7_vgpr8 + + ; GFX6-LABEL: name: test_fmad_v3s32_denorm + ; GFX6: [[COPY:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2 + ; GFX6: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr3_vgpr4_vgpr5 + ; GFX6: [[COPY2:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr6_vgpr7_vgpr8 + ; GFX6: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<3 x s32>) + ; GFX6: [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<3 x s32>) + ; GFX6: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY2]](<3 x s32>) + ; GFX6: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR %13(s32), %14(s32), %15(s32) + ; GFX6: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>) + ; GFX6: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[UV2]], [[UV5]] + ; GFX6: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[FMUL]], [[UV8]] + ; GFX6: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[UV1]], [[UV4]] + ; GFX6: [[FADD1:%[0-9]+]]:_(s32) = G_FADD [[FMUL1]], [[UV7]] + ; GFX6: [[FMUL2:%[0-9]+]]:_(s32) = G_FMUL [[UV]], [[UV3]] + ; GFX6: [[FADD2:%[0-9]+]]:_(s32) = G_FADD [[FMUL2]], [[UV6]] + ; GFX7-LABEL: name: test_fmad_v3s32_denorm + ; GFX7: [[COPY:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2 + ; GFX7: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr3_vgpr4_vgpr5 + ; GFX7: [[COPY2:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr6_vgpr7_vgpr8 + ; GFX7: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<3 x s32>) + ; GFX7: [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<3 x s32>) + ; GFX7: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY2]](<3 x s32>) + ; GFX7: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR %13(s32), %14(s32), %15(s32) + ; GFX7: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>) + ; GFX7: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[UV2]], [[UV5]] + ; GFX7: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[FMUL]], [[UV8]] + ; GFX7: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[UV1]], [[UV4]] + ; GFX7: [[FADD1:%[0-9]+]]:_(s32) = G_FADD [[FMUL1]], [[UV7]] + ; GFX7: [[FMUL2:%[0-9]+]]:_(s32) = G_FMUL [[UV]], [[UV3]] + ; GFX7: [[FADD2:%[0-9]+]]:_(s32) = G_FADD [[FMUL2]], [[UV6]] + ; GFX10-LABEL: name: test_fmad_v3s32_denorm + ; GFX10: [[COPY:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2 + ; GFX10: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr3_vgpr4_vgpr5 + ; GFX10: [[COPY2:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr6_vgpr7_vgpr8 + ; GFX10: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<3 x s32>) + ; GFX10: [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<3 x s32>) + ; GFX10: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY2]](<3 x s32>) + ; GFX10: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR %13(s32), %14(s32), %15(s32) + ; GFX10: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>) + ; GFX10: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[UV2]], [[UV5]] + ; GFX10: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[FMUL]], [[UV8]] + ; GFX10: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[UV1]], [[UV4]] + ; GFX10: [[FADD1:%[0-9]+]]:_(s32) = G_FADD [[FMUL1]], [[UV7]] + ; GFX10: [[FMUL2:%[0-9]+]]:_(s32) = G_FMUL [[UV]], [[UV3]] + ; GFX10: [[FADD2:%[0-9]+]]:_(s32) = G_FADD [[FMUL2]], [[UV6]] + %0:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2 + %1:_(<3 x s32>) = COPY $vgpr3_vgpr4_vgpr5 + %2:_(<3 x s32>) = COPY $vgpr6_vgpr7_vgpr8 + %3:_(<3 x s32>) = G_FMAD %0, %1, %2 + $vgpr0_vgpr1_vgpr2 = COPY %3 +... + +--- +name: test_fmad_v4s32_denorm +machineFunctionInfo: + mode: + fp32-denormals: true + +body: | + bb.0: + liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4_vgpr5_vgpr6_vgpr7, $vgpr8_vgpr9_vgpr10_vgpr11 + + ; GFX6-LABEL: name: test_fmad_v4s32_denorm + ; GFX6: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + ; GFX6: [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 + ; GFX6: [[COPY2:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr8_vgpr9_vgpr10_vgpr11 + ; GFX6: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<4 x s32>) + ; GFX6: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<4 x s32>) + ; GFX6: [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY2]](<4 x s32>) + ; GFX6: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR %16(s32), %17(s32), %18(s32), %19(s32) + ; GFX6: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>) + ; GFX6: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[UV3]], [[UV7]] + ; GFX6: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[FMUL]], [[UV11]] + ; GFX6: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[UV2]], [[UV6]] + ; GFX6: [[FADD1:%[0-9]+]]:_(s32) = G_FADD [[FMUL1]], [[UV10]] + ; GFX6: [[FMUL2:%[0-9]+]]:_(s32) = G_FMUL [[UV1]], [[UV5]] + ; GFX6: [[FADD2:%[0-9]+]]:_(s32) = G_FADD [[FMUL2]], [[UV9]] + ; GFX6: [[FMUL3:%[0-9]+]]:_(s32) = G_FMUL [[UV]], [[UV4]] + ; GFX6: [[FADD3:%[0-9]+]]:_(s32) = G_FADD [[FMUL3]], [[UV8]] + ; GFX7-LABEL: name: test_fmad_v4s32_denorm + ; GFX7: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + ; GFX7: [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 + ; GFX7: [[COPY2:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr8_vgpr9_vgpr10_vgpr11 + ; GFX7: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<4 x s32>) + ; GFX7: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<4 x s32>) + ; GFX7: [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY2]](<4 x s32>) + ; GFX7: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR %16(s32), %17(s32), %18(s32), %19(s32) + ; GFX7: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>) + ; GFX7: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[UV3]], [[UV7]] + ; GFX7: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[FMUL]], [[UV11]] + ; GFX7: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[UV2]], [[UV6]] + ; GFX7: [[FADD1:%[0-9]+]]:_(s32) = G_FADD [[FMUL1]], [[UV10]] + ; GFX7: [[FMUL2:%[0-9]+]]:_(s32) = G_FMUL [[UV1]], [[UV5]] + ; GFX7: [[FADD2:%[0-9]+]]:_(s32) = G_FADD [[FMUL2]], [[UV9]] + ; GFX7: [[FMUL3:%[0-9]+]]:_(s32) = G_FMUL [[UV]], [[UV4]] + ; GFX7: [[FADD3:%[0-9]+]]:_(s32) = G_FADD [[FMUL3]], [[UV8]] + ; GFX10-LABEL: name: test_fmad_v4s32_denorm + ; GFX10: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + ; GFX10: [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 + ; GFX10: [[COPY2:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr8_vgpr9_vgpr10_vgpr11 + ; GFX10: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<4 x s32>) + ; GFX10: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<4 x s32>) + ; GFX10: [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY2]](<4 x s32>) + ; GFX10: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR %16(s32), %17(s32), %18(s32), %19(s32) + ; GFX10: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>) + ; GFX10: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[UV3]], [[UV7]] + ; GFX10: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[FMUL]], [[UV11]] + ; GFX10: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[UV2]], [[UV6]] + ; GFX10: [[FADD1:%[0-9]+]]:_(s32) = G_FADD [[FMUL1]], [[UV10]] + ; GFX10: [[FMUL2:%[0-9]+]]:_(s32) = G_FMUL [[UV1]], [[UV5]] + ; GFX10: [[FADD2:%[0-9]+]]:_(s32) = G_FADD [[FMUL2]], [[UV9]] + ; GFX10: [[FMUL3:%[0-9]+]]:_(s32) = G_FMUL [[UV]], [[UV4]] + ; GFX10: [[FADD3:%[0-9]+]]:_(s32) = G_FADD [[FMUL3]], [[UV8]] + %0:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + %1:_(<4 x s32>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 + %2:_(<4 x s32>) = COPY $vgpr8_vgpr9_vgpr10_vgpr11 + %3:_(<4 x s32>) = G_FMAD %0, %1, %2 + $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %3 +... Index: llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fmad.s64.mir =================================================================== --- /dev/null +++ llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fmad.s64.mir @@ -0,0 +1,112 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=tahiti -run-pass=legalizer %s -o - | FileCheck %s +# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -run-pass=legalizer %s -o - | FileCheck %s +# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 -run-pass=legalizer %s -o - | FileCheck %s + +--- +name: test_fmad_s64_flush +machineFunctionInfo: + mode: + fp64-fp16-denormals: false + +body: | + bb.0: + liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $vgpr4_vgpr5 + + ; CHECK-LABEL: name: test_fmad_s64_flush + ; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 + ; CHECK: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 + ; CHECK: [[COPY2:%[0-9]+]]:_(s64) = COPY $vgpr4_vgpr5 + ; CHECK: [[FMUL:%[0-9]+]]:_(s64) = G_FMUL [[COPY]], [[COPY1]] + ; CHECK: [[FADD:%[0-9]+]]:_(s64) = G_FADD [[FMUL]], [[COPY2]] + ; CHECK: $vgpr0_vgpr1 = COPY [[FADD]](s64) + %0:_(s64) = COPY $vgpr0_vgpr1 + %1:_(s64) = COPY $vgpr2_vgpr3 + %2:_(s64) = COPY $vgpr4_vgpr5 + %3:_(s64) = G_FMAD %0, %1, %2 + $vgpr0_vgpr1 = COPY %3 +... + +--- +name: test_fmad_v2s64_flush +machineFunctionInfo: + mode: + fp64-fp16-denormals: false + +body: | + bb.0: + liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4_vgpr5_vgpr6_vgpr7, $vgpr8_vgpr9_vgpr10_vgpr11 + + ; CHECK-LABEL: name: test_fmad_v2s64_flush + ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + ; CHECK: [[COPY1:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 + ; CHECK: [[COPY2:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr8_vgpr9_vgpr10_vgpr11 + ; CHECK: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](<2 x s64>) + ; CHECK: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY1]](<2 x s64>) + ; CHECK: [[UV4:%[0-9]+]]:_(s64), [[UV5:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY2]](<2 x s64>) + ; CHECK: [[FMUL:%[0-9]+]]:_(s64) = G_FMUL [[UV]], [[UV2]] + ; CHECK: [[FADD:%[0-9]+]]:_(s64) = G_FADD [[FMUL]], [[UV4]] + ; CHECK: [[FMUL1:%[0-9]+]]:_(s64) = G_FMUL [[UV1]], [[UV3]] + ; CHECK: [[FADD1:%[0-9]+]]:_(s64) = G_FADD [[FMUL1]], [[UV5]] + ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[FADD]](s64), [[FADD1]](s64) + ; CHECK: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) + %0:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + %1:_(<2 x s64>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 + %2:_(<2 x s64>) = COPY $vgpr8_vgpr9_vgpr10_vgpr11 + %3:_(<2 x s64>) = G_FMAD %0, %1, %2 + $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %3 +... + +--- +name: test_fmad_s64_denorm +machineFunctionInfo: + mode: + fp64-fp16-denormals: true + +body: | + bb.0: + liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $vgpr4_vgpr5 + + ; CHECK-LABEL: name: test_fmad_s64_denorm + ; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 + ; CHECK: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 + ; CHECK: [[COPY2:%[0-9]+]]:_(s64) = COPY $vgpr4_vgpr5 + ; CHECK: [[FMUL:%[0-9]+]]:_(s64) = G_FMUL [[COPY]], [[COPY1]] + ; CHECK: [[FADD:%[0-9]+]]:_(s64) = G_FADD [[FMUL]], [[COPY2]] + ; CHECK: $vgpr0_vgpr1 = COPY [[FADD]](s64) + %0:_(s64) = COPY $vgpr0_vgpr1 + %1:_(s64) = COPY $vgpr2_vgpr3 + %2:_(s64) = COPY $vgpr4_vgpr5 + %3:_(s64) = G_FMAD %0, %1, %2 + $vgpr0_vgpr1 = COPY %3 +... + +--- +name: test_fmad_v2s64_denorm +machineFunctionInfo: + mode: + fp64-fp16-denormals: true + +body: | + bb.0: + liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4_vgpr5_vgpr6_vgpr7, $vgpr8_vgpr9_vgpr10_vgpr11 + + ; CHECK-LABEL: name: test_fmad_v2s64_denorm + ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + ; CHECK: [[COPY1:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 + ; CHECK: [[COPY2:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr8_vgpr9_vgpr10_vgpr11 + ; CHECK: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](<2 x s64>) + ; CHECK: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY1]](<2 x s64>) + ; CHECK: [[UV4:%[0-9]+]]:_(s64), [[UV5:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY2]](<2 x s64>) + ; CHECK: [[FMUL:%[0-9]+]]:_(s64) = G_FMUL [[UV]], [[UV2]] + ; CHECK: [[FADD:%[0-9]+]]:_(s64) = G_FADD [[FMUL]], [[UV4]] + ; CHECK: [[FMUL1:%[0-9]+]]:_(s64) = G_FMUL [[UV1]], [[UV3]] + ; CHECK: [[FADD1:%[0-9]+]]:_(s64) = G_FADD [[FMUL1]], [[UV5]] + ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[FADD]](s64), [[FADD1]](s64) + ; CHECK: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) + %0:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + %1:_(<2 x s64>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 + %2:_(<2 x s64>) = COPY $vgpr8_vgpr9_vgpr10_vgpr11 + %3:_(<2 x s64>) = G_FMAD %0, %1, %2 + $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %3 +... Index: llvm/test/CodeGen/AMDGPU/omod-nsz-flag.mir =================================================================== --- llvm/test/CodeGen/AMDGPU/omod-nsz-flag.mir +++ llvm/test/CodeGen/AMDGPU/omod-nsz-flag.mir @@ -12,6 +12,7 @@ machineFunctionInfo: mode: ieee: false + fp32-denormals: false body: | bb.0: @@ -33,6 +34,7 @@ machineFunctionInfo: mode: ieee: false + fp32-denormals: false body: | bb.0: @@ -54,6 +56,7 @@ machineFunctionInfo: mode: ieee: false + fp32-denormals: false body: | bb.0: