Index: llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h =================================================================== --- llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h +++ llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h @@ -107,6 +107,10 @@ const TargetLowering &getTargetLowering() const; + /// \return true if the combine is running prior to legalization and if \p + /// Query is legal on the target. + bool isLegal(const LegalityQuery &Query) const; + /// \return true if the combine is running prior to legalization, or if \p /// Query is legal on the target. bool isLegalOrBeforeLegalizer(const LegalityQuery &Query) const; @@ -337,6 +341,15 @@ bool matchCombineFAbsOfFAbs(MachineInstr &MI, Register &Src); bool applyCombineFAbsOfFAbs(MachineInstr &MI, Register &Src); + /// Transform (fadd (fmul x, y), z) -> (fma x, y, z) + /// (fadd (fmul x, y), z) -> (fmad x, y, z) + bool matchCombineFAddFMulToFMadOrFMA( + MachineInstr &MI, + std::tuple &MatchInfo); + bool applyCombineFAddFMulToFMadOrFMA( + MachineInstr &MI, + std::tuple &MatchInfo); + /// Transform trunc ([asz]ext x) to x or ([asz]ext x) or (trunc x). bool matchCombineTruncOfExt(MachineInstr &MI, std::pair &MatchInfo); @@ -563,6 +576,10 @@ SmallDenseMap &MemOffset2Idx, const SmallVector &RegsToVisit, const unsigned MemSizeInBits); + + /// Checks if \p MI is TargetOpcode::G_FMUL and contractable either + /// due to global flags or MachineInstr flags. + bool isContractableFMul(const MachineInstr &MI, bool AllowFusionGlobally); }; } // namespace llvm Index: llvm/include/llvm/CodeGen/TargetLowering.h =================================================================== --- llvm/include/llvm/CodeGen/TargetLowering.h +++ llvm/include/llvm/CodeGen/TargetLowering.h @@ -784,9 +784,12 @@ /// Return true if target always beneficiates from combining into FMA for a /// given value type. This must typically return false on targets where FMA /// takes more cycles to execute than FADD. - virtual bool enableAggressiveFMAFusion(EVT VT) const { - return false; - } + virtual bool enableAggressiveFMAFusion(EVT VT) const { return false; } + + /// Return true if target always beneficiates from combining into FMA for a + /// given value type. This must typically return false on targets where FMA + /// takes more cycles to execute than FADD. + virtual bool enableAggressiveFMAFusion(LLT Ty) const { return false; } /// Return the ValueType of the result of SETCC operations. virtual EVT getSetCCResultType(const DataLayout &DL, LLVMContext &Context, @@ -2663,6 +2666,14 @@ return isFPExtFree(DestVT, SrcVT); } + /// Return true if an fpext operation input to an \p Opcode operation is free + /// (for instance, because half-precision floating-point numbers are + /// implicitly extended to float-precision) for an FMA instruction. + virtual bool isFPExtFoldable(const MachineInstr &MI, unsigned Opcode, + LLT DestTy, LLT SrcTy) const { + return false; + } + /// Return true if folding a vector load into ExtVal (a sign, zero, or any /// extend node) is profitable. virtual bool isVectorLoadExtDesirable(SDValue ExtVal) const { return false; } @@ -2694,11 +2705,46 @@ return false; } + /// Return true if an FMA operation is faster than a pair of fmul and fadd + /// instructions. fmuladd intrinsics will be expanded to FMAs when this method + /// returns true, otherwise fmuladd is expanded to fmul + fadd. + /// + /// NOTE: This may be called before legalization on types for which FMAs are + /// not legal, but should return true if those types will eventually legalize + /// to types that support FMAs. After legalization, it will only be called on + /// types that support FMAs (via Legal or Custom actions) + virtual bool isFMAFasterThanFMulAndFAdd(const MachineFunction &MF, + const LLT) const { + return false; + } + /// IR version virtual bool isFMAFasterThanFMulAndFAdd(const Function &F, Type *) const { return false; } + /// Returns true if be combined with to form a TargetOpcode::G_FMAD. \p N + /// may be an TargetOpcode::G_FADD, TargetOpcode::G_FSUB, or an + /// TargetOpcode::G_FMUL which will be distributed into an fadd/fsub. + virtual bool isFMADLegal(const MachineInstr &MI, const LLT Ty) const { + assert((MI.getOpcode() == TargetOpcode::G_FADD || + MI.getOpcode() == TargetOpcode::G_FSUB || + MI.getOpcode() == TargetOpcode::G_FMUL) && + "unexpected node in FMAD forming combine"); + switch (Ty.getScalarSizeInBits()) { + case 16: + return isOperationLegal(TargetOpcode::G_FMAD, MVT::f16); + case 32: + return isOperationLegal(TargetOpcode::G_FMAD, MVT::f32); + case 64: + return isOperationLegal(TargetOpcode::G_FMAD, MVT::f64); + default: + break; + } + + return false; + } + /// Returns true if be combined with to form an ISD::FMAD. \p N may be an /// ISD::FADD, ISD::FSUB, or an ISD::FMUL which will be distributed into an /// fadd/fsub. Index: llvm/include/llvm/Target/GlobalISel/Combine.td =================================================================== --- llvm/include/llvm/Target/GlobalISel/Combine.td +++ llvm/include/llvm/Target/GlobalISel/Combine.td @@ -575,6 +575,20 @@ [{ return Helper.matchExtendThroughPhis(*${root}, ${matchinfo}); }]), (apply [{ return Helper.applyExtendThroughPhis(*${root}, ${matchinfo}); }])>; +// Transform (fadd x, (fmul y, z)) -> (fma y, z, x) +// (fadd x, (fmul y, z)) -> (fmad y, z, x) +// Transform (fadd (fmul x, y), z) -> (fma x, y, z) +// (fadd (fmul x, y), z) -> (fmad x, y, z) +def combine_fadd_fmul_to_fmad_or_fma_info : + GIDefMatchData<"std::tuple">; +def combine_fadd_fmul_to_fmad_or_fma: GICombineRule< + (defs root:$root, combine_fadd_fmul_to_fmad_or_fma_info:$info), + (match (wip_match_opcode G_FADD):$root, + [{ return Helper.matchCombineFAddFMulToFMadOrFMA(*${root}, + ${info}); }]), + (apply [{ return Helper.applyCombineFAddFMulToFMadOrFMA(*${root}, + ${info}); }])>; + // Currently only the one combine above. def insert_vec_elt_combines : GICombineGroup< [combine_insert_vec_elts_build_vector]>; @@ -649,4 +663,5 @@ unmerge_zext_to_zext, trunc_ext_fold, trunc_shl, const_combines, xor_of_and_with_same_reg, ptr_add_with_zero, shift_immed_chain, shift_of_shifted_logic_chain, load_or_combine, - div_rem_to_divrem, funnel_shift_combines]>; + div_rem_to_divrem, funnel_shift_combines, + combine_fadd_fmul_to_fmad_or_fma]>; Index: llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp =================================================================== --- llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp +++ llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp @@ -118,6 +118,10 @@ return !LI || LI->getAction(Query).Action == LegalizeActions::Legal; } +bool CombinerHelper::isLegal(const LegalityQuery &Query) const { + return LI && LI->getAction(Query).Action == LegalizeActions::Legal; +} + void CombinerHelper::replaceRegWith(MachineRegisterInfo &MRI, Register FromReg, Register ToReg) const { Observer.changingAllUsesOfReg(MRI, FromReg); @@ -3894,6 +3898,104 @@ Observer.changedInstr(MI); } +static bool isContractable(MachineInstr &MI) { + return MI.getFlag(MachineInstr::MIFlag::FmContract) || + MI.getFlag(MachineInstr::MIFlag::FmReassoc); +} + +bool CombinerHelper::isContractableFMul(const MachineInstr &MI, + bool AllowFusionGlobally) { + if (MI.getOpcode() != TargetOpcode::G_FMUL) + return false; + return AllowFusionGlobally || MI.getFlag(MachineInstr::MIFlag::FmReassoc) || + MI.getFlag(MachineInstr::MIFlag::FmContract); +} + +static bool hasMoreUses(const MachineInstr &MI0, const MachineInstr &MI1, + const MachineRegisterInfo &MRI) { + return std::distance(MRI.use_instr_nodbg_begin(MI0.getOperand(0).getReg()), + MRI.use_instr_nodbg_end()) > + std::distance(MRI.use_instr_nodbg_begin(MI1.getOperand(0).getReg()), + MRI.use_instr_nodbg_end()); +} + +bool CombinerHelper::matchCombineFAddFMulToFMadOrFMA( + MachineInstr &MI, + std::tuple &MatchInfo) { + assert(MI.getOpcode() == TargetOpcode::G_FADD); + + auto *MF = MI.getParent()->getParent(); + const auto &TLI = *MF->getSubtarget().getTargetLowering(); + const TargetOptions &Options = MF->getTarget().Options; + LLT DstType = MRI.getType(MI.getOperand(0).getReg()); + MachineInstr *MI0 = MRI.getVRegDef(MI.getOperand(1).getReg()); + MachineInstr *MI1 = MRI.getVRegDef(MI.getOperand(2).getReg()); + + bool LegalOperations = LI; + // Floating-point multiply-add with intermediate rounding. + bool HasFMAD = (LegalOperations && TLI.isFMADLegal(MI, DstType)); + // Floating-point multiply-add without intermediate rounding. + bool HasFMA = TLI.isFMAFasterThanFMulAndFAdd(*MF, DstType) && + (!LegalOperations || isLegal({TargetOpcode::G_FMA, {DstType}})); + + // No valid opcode, do not combine. + if (!HasFMAD && !HasFMA) + return false; + + bool CanFuse = Options.UnsafeFPMath || isContractable(MI); + bool AllowFusionGlobally = + (Options.AllowFPOpFusion == FPOpFusion::Fast || CanFuse || HasFMAD); + + // If the addition is not contractable, do not combine. + if (!AllowFusionGlobally && !isContractable(MI) && + !MI.getFlag(MachineInstr::MIFlag::FmReassoc)) + return false; + + unsigned PreferredFusedOpcode = + HasFMAD ? TargetOpcode::G_FMAD : TargetOpcode::G_FMA; + bool Aggressive = TLI.enableAggressiveFMAFusion(DstType); + + // If we have two choices trying to fold (fadd (fmul u, v), (fmul x, y)), + // prefer to fold the multiply with fewer uses. + if (Aggressive && isContractableFMul(*MI0, AllowFusionGlobally) && + isContractableFMul(*MI1, AllowFusionGlobally)) { + if (hasMoreUses(*MI0, *MI1, MRI)) + std::swap(MI0, MI1); + } + + // fold (fadd (fmul x, y), z) -> (fma x, y, z) + if (isContractableFMul(*MI0, AllowFusionGlobally) && + (Aggressive || MRI.hasOneNonDBGUse(MI0->getOperand(0).getReg()))) { + MatchInfo = {MI0->getOperand(1).getReg(), MI0->getOperand(2).getReg(), + MI1->getOperand(0).getReg(), PreferredFusedOpcode}; + return true; + } + + // fold (fadd x, (fmul y, z)) -> (fma y, z, x) + if (isContractableFMul(*MI1, AllowFusionGlobally) && + (Aggressive || MRI.hasOneNonDBGUse(MI1->getOperand(0).getReg()))) { + MatchInfo = {MI1->getOperand(1).getReg(), MI1->getOperand(2).getReg(), + MI0->getOperand(0).getReg(), PreferredFusedOpcode}; + return true; + } + + return false; +} + +bool CombinerHelper::applyCombineFAddFMulToFMadOrFMA( + MachineInstr &MI, + std::tuple &MatchInfo) { + Register Src1, Src2, Src3; + unsigned PreferredFusedOpcode; + std::tie(Src1, Src2, Src3, PreferredFusedOpcode) = MatchInfo; + + Builder.setInstrAndDebugLoc(MI); + Builder.buildInstr(PreferredFusedOpcode, {MI.getOperand(0).getReg()}, + {Src1, Src2, Src3}); + MI.eraseFromParent(); + return true; +} + bool CombinerHelper::tryCombine(MachineInstr &MI) { if (tryCombineCopy(MI)) return true; Index: llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp =================================================================== --- llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp +++ llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp @@ -3325,9 +3325,12 @@ InstructionSelector::ComplexRendererFns AMDGPUInstructionSelector::selectVOP3OpSelMods(MachineOperand &Root) const { // FIXME: Handle op_sel + Register Src; + unsigned Mods; + std::tie(Src, Mods) = selectVOP3ModsImpl(Root); return {{ - [=](MachineInstrBuilder &MIB) { MIB.addReg(Root.getReg()); }, - [=](MachineInstrBuilder &MIB) { MIB.addImm(0); } // src_mods + [=](MachineInstrBuilder &MIB) { MIB.addReg(Src); }, + [=](MachineInstrBuilder &MIB) { MIB.addImm(Mods); } // src_mods }}; } Index: llvm/lib/Target/AMDGPU/SIISelLowering.h =================================================================== --- llvm/lib/Target/AMDGPU/SIISelLowering.h +++ llvm/lib/Target/AMDGPU/SIISelLowering.h @@ -252,6 +252,8 @@ bool isFPExtFoldable(const SelectionDAG &DAG, unsigned Opcode, EVT DestVT, EVT SrcVT) const override; + bool isFPExtFoldable(const MachineInstr &MI, unsigned Opcode, LLT DestTy, + LLT SrcTy) const override; bool isShuffleMaskLegal(ArrayRef /*Mask*/, EVT /*VT*/) const override; @@ -377,6 +379,7 @@ MachineBasicBlock *BB) const override; bool hasBitPreservingFPLogic(EVT VT) const override; + bool enableAggressiveFMAFusion(LLT Ty) const override; bool enableAggressiveFMAFusion(EVT VT) const override; EVT getSetCCResultType(const DataLayout &DL, LLVMContext &Context, EVT VT) const override; @@ -385,6 +388,9 @@ bool isFMAFasterThanFMulAndFAdd(const MachineFunction &MF, EVT VT) const override; + bool isFMAFasterThanFMulAndFAdd(const MachineFunction &MF, + const LLT Ty) const override; + bool isFMADLegal(const MachineInstr &MI, const LLT Ty) const override; bool isFMADLegal(const SelectionDAG &DAG, const SDNode *N) const override; SDValue splitUnaryVectorOp(SDValue Op, SelectionDAG &DAG) const; Index: llvm/lib/Target/AMDGPU/SIISelLowering.cpp =================================================================== --- llvm/lib/Target/AMDGPU/SIISelLowering.cpp +++ llvm/lib/Target/AMDGPU/SIISelLowering.cpp @@ -873,6 +873,16 @@ !hasFP32Denormals(DAG.getMachineFunction()); } +bool SITargetLowering::isFPExtFoldable(const MachineInstr &MI, unsigned Opcode, + LLT DestTy, LLT SrcTy) const { + return ((Opcode == TargetOpcode::G_FMAD && Subtarget->hasMadMixInsts()) || + (Opcode == TargetOpcode::G_FMA && Subtarget->hasFmaMixInsts())) && + DestTy.getScalarSizeInBits() == 32 && + SrcTy.getScalarSizeInBits() == 16 && + // TODO: This probably only requires no input flushing? + !hasFP32Denormals(*MI.getMF()); +} + bool SITargetLowering::isShuffleMaskLegal(ArrayRef, EVT) const { // SI has some legal vector types, but no legal vector operations. Say no // shuffles are legal in order to prefer scalarizing some vector operations. @@ -4313,6 +4323,8 @@ return true; } +bool SITargetLowering::enableAggressiveFMAFusion(LLT Ty) const { return true; } + EVT SITargetLowering::getSetCCResultType(const DataLayout &DL, LLVMContext &Ctx, EVT VT) const { if (!VT.isVector()) { @@ -4378,6 +4390,34 @@ return false; } +bool SITargetLowering::isFMAFasterThanFMulAndFAdd(const MachineFunction &MF, + const LLT Ty) const { + switch (Ty.getScalarSizeInBits()) { + case 16: + return isFMAFasterThanFMulAndFAdd(MF, MVT::f16); + case 32: + return isFMAFasterThanFMulAndFAdd(MF, MVT::f32); + case 64: + return isFMAFasterThanFMulAndFAdd(MF, MVT::f64); + default: + break; + } + + return false; +} + +bool SITargetLowering::isFMADLegal(const MachineInstr &MI, const LLT Ty) const { + if (!Ty.isScalar()) + return false; + + if (Ty.getScalarSizeInBits() == 16) + return Subtarget->hasMadF16() && !hasFP64FP16Denormals(*MI.getMF()); + if (Ty.getScalarSizeInBits() == 32) + return Subtarget->hasMadMacF32Insts() && !hasFP32Denormals(*MI.getMF()); + + return false; +} + bool SITargetLowering::isFMADLegal(const SelectionDAG &DAG, const SDNode *N) const { // TODO: Check future ftz flag Index: llvm/test/CodeGen/AMDGPU/GlobalISel/combine-fma-add-mul-post-legalize.mir =================================================================== --- /dev/null +++ llvm/test/CodeGen/AMDGPU/GlobalISel/combine-fma-add-mul-post-legalize.mir @@ -0,0 +1,2480 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -global-isel -march=amdgcn -mcpu=gfx900 -run-pass=amdgpu-postlegalizer-combiner %s -o - | FileCheck -check-prefix=GFX9 %s +# RUN: llc -global-isel -march=amdgcn -mcpu=gfx900 -run-pass=amdgpu-postlegalizer-combiner -fp-contract=fast %s -o - | FileCheck -check-prefix=GFX9-CONTRACT %s +# RUN: llc -global-isel -march=amdgcn -mcpu=gfx900 -run-pass=amdgpu-postlegalizer-combiner --denormal-fp-math=preserve-sign %s -o - | FileCheck -check-prefix=GFX9-DENORM %s +# RUN: llc -global-isel -march=amdgcn -mcpu=gfx1010 -run-pass=amdgpu-postlegalizer-combiner %s -o - | FileCheck -check-prefix=GFX10 %s +# RUN: llc -global-isel -march=amdgcn -mcpu=gfx1010 -run-pass=amdgpu-postlegalizer-combiner -fp-contract=fast %s -o - | FileCheck -check-prefix=GFX10-CONTRACT %s +# RUN: llc -global-isel -march=amdgcn -mcpu=gfx1010 -run-pass=amdgpu-postlegalizer-combiner --denormal-fp-math=preserve-sign %s -o - | FileCheck -check-prefix=GFX10-DENORM %s + +--- +name: test_f32_add_mul +alignment: 1 +tracksRegLiveness: true +registers: + - { id: 0, class: _ } + - { id: 1, class: _ } + - { id: 2, class: _ } + - { id: 3, class: sgpr_64 } + - { id: 4, class: _ } + - { id: 5, class: _ } + - { id: 6, class: ccr_sgpr_64 } +liveins: + - { reg: '$sgpr30_sgpr31', virtual-reg: '%3' } +frameInfo: + maxAlignment: 1 +machineFunctionInfo: + maxKernArgAlign: 1 + scratchRSrcReg: '$sgpr0_sgpr1_sgpr2_sgpr3' + frameOffsetReg: '$sgpr33' + stackPtrOffsetReg: '$sgpr32' + argumentInfo: + privateSegmentBuffer: { reg: '$sgpr0_sgpr1_sgpr2_sgpr3' } + occupancy: 10 +body: | + bb.1.entry: + liveins: $vgpr0, $vgpr1, $vgpr2, $sgpr30_sgpr31 + + ; GFX9-LABEL: name: test_f32_add_mul + ; GFX9: liveins: $vgpr0, $vgpr1, $vgpr2, $sgpr30_sgpr31 + ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; GFX9: [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; GFX9: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[COPY]], [[COPY1]], [[COPY2]] + ; GFX9: $vgpr0 = COPY [[FMA]](s32) + ; GFX9: [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY3]] + ; GFX9: S_SETPC_B64_return [[COPY4]], implicit $vgpr0 + ; GFX9-CONTRACT-LABEL: name: test_f32_add_mul + ; GFX9-CONTRACT: liveins: $vgpr0, $vgpr1, $vgpr2, $sgpr30_sgpr31 + ; GFX9-CONTRACT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX9-CONTRACT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX9-CONTRACT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; GFX9-CONTRACT: [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; GFX9-CONTRACT: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[COPY]], [[COPY1]], [[COPY2]] + ; GFX9-CONTRACT: $vgpr0 = COPY [[FMA]](s32) + ; GFX9-CONTRACT: [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY3]] + ; GFX9-CONTRACT: S_SETPC_B64_return [[COPY4]], implicit $vgpr0 + ; GFX9-DENORM-LABEL: name: test_f32_add_mul + ; GFX9-DENORM: liveins: $vgpr0, $vgpr1, $vgpr2, $sgpr30_sgpr31 + ; GFX9-DENORM: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX9-DENORM: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX9-DENORM: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; GFX9-DENORM: [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; GFX9-DENORM: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[COPY]], [[COPY1]], [[COPY2]] + ; GFX9-DENORM: $vgpr0 = COPY [[FMA]](s32) + ; GFX9-DENORM: [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY3]] + ; GFX9-DENORM: S_SETPC_B64_return [[COPY4]], implicit $vgpr0 + ; GFX10-LABEL: name: test_f32_add_mul + ; GFX10: liveins: $vgpr0, $vgpr1, $vgpr2, $sgpr30_sgpr31 + ; GFX10: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX10: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX10: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; GFX10: [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; GFX10: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[COPY]], [[COPY1]], [[COPY2]] + ; GFX10: $vgpr0 = COPY [[FMA]](s32) + ; GFX10: [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY3]] + ; GFX10: S_SETPC_B64_return [[COPY4]], implicit $vgpr0 + ; GFX10-CONTRACT-LABEL: name: test_f32_add_mul + ; GFX10-CONTRACT: liveins: $vgpr0, $vgpr1, $vgpr2, $sgpr30_sgpr31 + ; GFX10-CONTRACT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX10-CONTRACT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX10-CONTRACT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; GFX10-CONTRACT: [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; GFX10-CONTRACT: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[COPY]], [[COPY1]], [[COPY2]] + ; GFX10-CONTRACT: $vgpr0 = COPY [[FMA]](s32) + ; GFX10-CONTRACT: [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY3]] + ; GFX10-CONTRACT: S_SETPC_B64_return [[COPY4]], implicit $vgpr0 + ; GFX10-DENORM-LABEL: name: test_f32_add_mul + ; GFX10-DENORM: liveins: $vgpr0, $vgpr1, $vgpr2, $sgpr30_sgpr31 + ; GFX10-DENORM: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX10-DENORM: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX10-DENORM: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; GFX10-DENORM: [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; GFX10-DENORM: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[COPY]], [[COPY1]], [[COPY2]] + ; GFX10-DENORM: $vgpr0 = COPY [[FMA]](s32) + ; GFX10-DENORM: [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY3]] + ; GFX10-DENORM: S_SETPC_B64_return [[COPY4]], implicit $vgpr0 + %0:_(s32) = COPY $vgpr0 + %1:_(s32) = COPY $vgpr1 + %2:_(s32) = COPY $vgpr2 + %3:sgpr_64 = COPY $sgpr30_sgpr31 + %4:_(s32) = reassoc G_FMUL %0, %1 + %5:_(s32) = reassoc G_FADD %4, %2 + $vgpr0 = COPY %5(s32) + %6:ccr_sgpr_64 = COPY %3 + S_SETPC_B64_return %6, implicit $vgpr0 + +... +--- +name: test_f32_add_mul_rhs +alignment: 1 +tracksRegLiveness: true +registers: + - { id: 0, class: _ } + - { id: 1, class: _ } + - { id: 2, class: _ } + - { id: 3, class: sgpr_64 } + - { id: 4, class: _ } + - { id: 5, class: _ } + - { id: 6, class: ccr_sgpr_64 } +liveins: + - { reg: '$sgpr30_sgpr31', virtual-reg: '%3' } +frameInfo: + maxAlignment: 1 +machineFunctionInfo: + maxKernArgAlign: 1 + scratchRSrcReg: '$sgpr0_sgpr1_sgpr2_sgpr3' + frameOffsetReg: '$sgpr33' + stackPtrOffsetReg: '$sgpr32' + argumentInfo: + privateSegmentBuffer: { reg: '$sgpr0_sgpr1_sgpr2_sgpr3' } + occupancy: 10 +body: | + bb.1.entry: + liveins: $vgpr0, $vgpr1, $vgpr2, $sgpr30_sgpr31 + + ; GFX9-LABEL: name: test_f32_add_mul_rhs + ; GFX9: liveins: $vgpr0, $vgpr1, $vgpr2, $sgpr30_sgpr31 + ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; GFX9: [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; GFX9: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[COPY]], [[COPY1]], [[COPY2]] + ; GFX9: $vgpr0 = COPY [[FMA]](s32) + ; GFX9: [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY3]] + ; GFX9: S_SETPC_B64_return [[COPY4]], implicit $vgpr0 + ; GFX9-CONTRACT-LABEL: name: test_f32_add_mul_rhs + ; GFX9-CONTRACT: liveins: $vgpr0, $vgpr1, $vgpr2, $sgpr30_sgpr31 + ; GFX9-CONTRACT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX9-CONTRACT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX9-CONTRACT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; GFX9-CONTRACT: [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; GFX9-CONTRACT: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[COPY]], [[COPY1]], [[COPY2]] + ; GFX9-CONTRACT: $vgpr0 = COPY [[FMA]](s32) + ; GFX9-CONTRACT: [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY3]] + ; GFX9-CONTRACT: S_SETPC_B64_return [[COPY4]], implicit $vgpr0 + ; GFX9-DENORM-LABEL: name: test_f32_add_mul_rhs + ; GFX9-DENORM: liveins: $vgpr0, $vgpr1, $vgpr2, $sgpr30_sgpr31 + ; GFX9-DENORM: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX9-DENORM: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX9-DENORM: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; GFX9-DENORM: [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; GFX9-DENORM: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[COPY]], [[COPY1]], [[COPY2]] + ; GFX9-DENORM: $vgpr0 = COPY [[FMA]](s32) + ; GFX9-DENORM: [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY3]] + ; GFX9-DENORM: S_SETPC_B64_return [[COPY4]], implicit $vgpr0 + ; GFX10-LABEL: name: test_f32_add_mul_rhs + ; GFX10: liveins: $vgpr0, $vgpr1, $vgpr2, $sgpr30_sgpr31 + ; GFX10: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX10: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX10: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; GFX10: [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; GFX10: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[COPY]], [[COPY1]], [[COPY2]] + ; GFX10: $vgpr0 = COPY [[FMA]](s32) + ; GFX10: [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY3]] + ; GFX10: S_SETPC_B64_return [[COPY4]], implicit $vgpr0 + ; GFX10-CONTRACT-LABEL: name: test_f32_add_mul_rhs + ; GFX10-CONTRACT: liveins: $vgpr0, $vgpr1, $vgpr2, $sgpr30_sgpr31 + ; GFX10-CONTRACT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX10-CONTRACT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX10-CONTRACT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; GFX10-CONTRACT: [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; GFX10-CONTRACT: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[COPY]], [[COPY1]], [[COPY2]] + ; GFX10-CONTRACT: $vgpr0 = COPY [[FMA]](s32) + ; GFX10-CONTRACT: [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY3]] + ; GFX10-CONTRACT: S_SETPC_B64_return [[COPY4]], implicit $vgpr0 + ; GFX10-DENORM-LABEL: name: test_f32_add_mul_rhs + ; GFX10-DENORM: liveins: $vgpr0, $vgpr1, $vgpr2, $sgpr30_sgpr31 + ; GFX10-DENORM: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX10-DENORM: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX10-DENORM: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; GFX10-DENORM: [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; GFX10-DENORM: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[COPY]], [[COPY1]], [[COPY2]] + ; GFX10-DENORM: $vgpr0 = COPY [[FMA]](s32) + ; GFX10-DENORM: [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY3]] + ; GFX10-DENORM: S_SETPC_B64_return [[COPY4]], implicit $vgpr0 + %0:_(s32) = COPY $vgpr0 + %1:_(s32) = COPY $vgpr1 + %2:_(s32) = COPY $vgpr2 + %3:sgpr_64 = COPY $sgpr30_sgpr31 + %4:_(s32) = reassoc G_FMUL %0, %1 + %5:_(s32) = reassoc G_FADD %2, %4 + $vgpr0 = COPY %5(s32) + %6:ccr_sgpr_64 = COPY %3 + S_SETPC_B64_return %6, implicit $vgpr0 + +... +--- +name: test_half_add_mul +alignment: 1 +tracksRegLiveness: true +registers: + - { id: 0, class: _ } + - { id: 1, class: _ } + - { id: 2, class: _ } + - { id: 3, class: sgpr_64 } + - { id: 4, class: _ } + - { id: 5, class: _ } + - { id: 6, class: _ } + - { id: 7, class: _ } + - { id: 8, class: _ } + - { id: 9, class: ccr_sgpr_64 } + - { id: 10, class: _ } +liveins: + - { reg: '$sgpr30_sgpr31', virtual-reg: '%3' } +frameInfo: + maxAlignment: 1 +machineFunctionInfo: + maxKernArgAlign: 1 + scratchRSrcReg: '$sgpr0_sgpr1_sgpr2_sgpr3' + frameOffsetReg: '$sgpr33' + stackPtrOffsetReg: '$sgpr32' + argumentInfo: + privateSegmentBuffer: { reg: '$sgpr0_sgpr1_sgpr2_sgpr3' } + occupancy: 10 +body: | + bb.1.entry: + liveins: $vgpr0, $vgpr1, $vgpr2, $sgpr30_sgpr31 + + ; GFX9-LABEL: name: test_half_add_mul + ; GFX9: liveins: $vgpr0, $vgpr1, $vgpr2, $sgpr30_sgpr31 + ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) + ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX9: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) + ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; GFX9: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s32) + ; GFX9: [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; GFX9: [[FMA:%[0-9]+]]:_(s16) = G_FMA [[TRUNC]], [[TRUNC1]], [[TRUNC2]] + ; GFX9: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FMA]](s16) + ; GFX9: $vgpr0 = COPY [[ANYEXT]](s32) + ; GFX9: [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY3]] + ; GFX9: S_SETPC_B64_return [[COPY4]], implicit $vgpr0 + ; GFX9-CONTRACT-LABEL: name: test_half_add_mul + ; GFX9-CONTRACT: liveins: $vgpr0, $vgpr1, $vgpr2, $sgpr30_sgpr31 + ; GFX9-CONTRACT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX9-CONTRACT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) + ; GFX9-CONTRACT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX9-CONTRACT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) + ; GFX9-CONTRACT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; GFX9-CONTRACT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s32) + ; GFX9-CONTRACT: [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; GFX9-CONTRACT: [[FMA:%[0-9]+]]:_(s16) = G_FMA [[TRUNC]], [[TRUNC1]], [[TRUNC2]] + ; GFX9-CONTRACT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FMA]](s16) + ; GFX9-CONTRACT: $vgpr0 = COPY [[ANYEXT]](s32) + ; GFX9-CONTRACT: [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY3]] + ; GFX9-CONTRACT: S_SETPC_B64_return [[COPY4]], implicit $vgpr0 + ; GFX9-DENORM-LABEL: name: test_half_add_mul + ; GFX9-DENORM: liveins: $vgpr0, $vgpr1, $vgpr2, $sgpr30_sgpr31 + ; GFX9-DENORM: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX9-DENORM: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) + ; GFX9-DENORM: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX9-DENORM: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) + ; GFX9-DENORM: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; GFX9-DENORM: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s32) + ; GFX9-DENORM: [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; GFX9-DENORM: [[FMA:%[0-9]+]]:_(s16) = G_FMA [[TRUNC]], [[TRUNC1]], [[TRUNC2]] + ; GFX9-DENORM: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FMA]](s16) + ; GFX9-DENORM: $vgpr0 = COPY [[ANYEXT]](s32) + ; GFX9-DENORM: [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY3]] + ; GFX9-DENORM: S_SETPC_B64_return [[COPY4]], implicit $vgpr0 + ; GFX10-LABEL: name: test_half_add_mul + ; GFX10: liveins: $vgpr0, $vgpr1, $vgpr2, $sgpr30_sgpr31 + ; GFX10: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX10: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) + ; GFX10: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX10: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) + ; GFX10: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; GFX10: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s32) + ; GFX10: [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; GFX10: [[FMA:%[0-9]+]]:_(s16) = G_FMA [[TRUNC]], [[TRUNC1]], [[TRUNC2]] + ; GFX10: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FMA]](s16) + ; GFX10: $vgpr0 = COPY [[ANYEXT]](s32) + ; GFX10: [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY3]] + ; GFX10: S_SETPC_B64_return [[COPY4]], implicit $vgpr0 + ; GFX10-CONTRACT-LABEL: name: test_half_add_mul + ; GFX10-CONTRACT: liveins: $vgpr0, $vgpr1, $vgpr2, $sgpr30_sgpr31 + ; GFX10-CONTRACT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX10-CONTRACT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) + ; GFX10-CONTRACT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX10-CONTRACT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) + ; GFX10-CONTRACT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; GFX10-CONTRACT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s32) + ; GFX10-CONTRACT: [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; GFX10-CONTRACT: [[FMA:%[0-9]+]]:_(s16) = G_FMA [[TRUNC]], [[TRUNC1]], [[TRUNC2]] + ; GFX10-CONTRACT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FMA]](s16) + ; GFX10-CONTRACT: $vgpr0 = COPY [[ANYEXT]](s32) + ; GFX10-CONTRACT: [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY3]] + ; GFX10-CONTRACT: S_SETPC_B64_return [[COPY4]], implicit $vgpr0 + ; GFX10-DENORM-LABEL: name: test_half_add_mul + ; GFX10-DENORM: liveins: $vgpr0, $vgpr1, $vgpr2, $sgpr30_sgpr31 + ; GFX10-DENORM: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX10-DENORM: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) + ; GFX10-DENORM: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX10-DENORM: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) + ; GFX10-DENORM: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; GFX10-DENORM: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s32) + ; GFX10-DENORM: [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; GFX10-DENORM: [[FMA:%[0-9]+]]:_(s16) = G_FMA [[TRUNC]], [[TRUNC1]], [[TRUNC2]] + ; GFX10-DENORM: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FMA]](s16) + ; GFX10-DENORM: $vgpr0 = COPY [[ANYEXT]](s32) + ; GFX10-DENORM: [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY3]] + ; GFX10-DENORM: S_SETPC_B64_return [[COPY4]], implicit $vgpr0 + %4:_(s32) = COPY $vgpr0 + %0:_(s16) = G_TRUNC %4(s32) + %5:_(s32) = COPY $vgpr1 + %1:_(s16) = G_TRUNC %5(s32) + %6:_(s32) = COPY $vgpr2 + %2:_(s16) = G_TRUNC %6(s32) + %3:sgpr_64 = COPY $sgpr30_sgpr31 + %7:_(s16) = reassoc G_FMUL %0, %1 + %8:_(s16) = reassoc G_FADD %7, %2 + %10:_(s32) = G_ANYEXT %8(s16) + $vgpr0 = COPY %10(s32) + %9:ccr_sgpr_64 = COPY %3 + S_SETPC_B64_return %9, implicit $vgpr0 + +... +--- +name: test_half_add_mul_rhs +alignment: 1 +tracksRegLiveness: true +registers: + - { id: 0, class: _ } + - { id: 1, class: _ } + - { id: 2, class: _ } + - { id: 3, class: sgpr_64 } + - { id: 4, class: _ } + - { id: 5, class: _ } + - { id: 6, class: _ } + - { id: 7, class: _ } + - { id: 8, class: _ } + - { id: 9, class: ccr_sgpr_64 } + - { id: 10, class: _ } +liveins: + - { reg: '$sgpr30_sgpr31', virtual-reg: '%3' } +frameInfo: + maxAlignment: 1 +machineFunctionInfo: + maxKernArgAlign: 1 + scratchRSrcReg: '$sgpr0_sgpr1_sgpr2_sgpr3' + frameOffsetReg: '$sgpr33' + stackPtrOffsetReg: '$sgpr32' + argumentInfo: + privateSegmentBuffer: { reg: '$sgpr0_sgpr1_sgpr2_sgpr3' } + occupancy: 10 +body: | + bb.1.entry: + liveins: $vgpr0, $vgpr1, $vgpr2, $sgpr30_sgpr31 + + ; GFX9-LABEL: name: test_half_add_mul_rhs + ; GFX9: liveins: $vgpr0, $vgpr1, $vgpr2, $sgpr30_sgpr31 + ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) + ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX9: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) + ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; GFX9: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s32) + ; GFX9: [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; GFX9: [[FMA:%[0-9]+]]:_(s16) = G_FMA [[TRUNC]], [[TRUNC1]], [[TRUNC2]] + ; GFX9: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FMA]](s16) + ; GFX9: $vgpr0 = COPY [[ANYEXT]](s32) + ; GFX9: [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY3]] + ; GFX9: S_SETPC_B64_return [[COPY4]], implicit $vgpr0 + ; GFX9-CONTRACT-LABEL: name: test_half_add_mul_rhs + ; GFX9-CONTRACT: liveins: $vgpr0, $vgpr1, $vgpr2, $sgpr30_sgpr31 + ; GFX9-CONTRACT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX9-CONTRACT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) + ; GFX9-CONTRACT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX9-CONTRACT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) + ; GFX9-CONTRACT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; GFX9-CONTRACT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s32) + ; GFX9-CONTRACT: [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; GFX9-CONTRACT: [[FMA:%[0-9]+]]:_(s16) = G_FMA [[TRUNC]], [[TRUNC1]], [[TRUNC2]] + ; GFX9-CONTRACT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FMA]](s16) + ; GFX9-CONTRACT: $vgpr0 = COPY [[ANYEXT]](s32) + ; GFX9-CONTRACT: [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY3]] + ; GFX9-CONTRACT: S_SETPC_B64_return [[COPY4]], implicit $vgpr0 + ; GFX9-DENORM-LABEL: name: test_half_add_mul_rhs + ; GFX9-DENORM: liveins: $vgpr0, $vgpr1, $vgpr2, $sgpr30_sgpr31 + ; GFX9-DENORM: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX9-DENORM: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) + ; GFX9-DENORM: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX9-DENORM: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) + ; GFX9-DENORM: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; GFX9-DENORM: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s32) + ; GFX9-DENORM: [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; GFX9-DENORM: [[FMA:%[0-9]+]]:_(s16) = G_FMA [[TRUNC]], [[TRUNC1]], [[TRUNC2]] + ; GFX9-DENORM: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FMA]](s16) + ; GFX9-DENORM: $vgpr0 = COPY [[ANYEXT]](s32) + ; GFX9-DENORM: [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY3]] + ; GFX9-DENORM: S_SETPC_B64_return [[COPY4]], implicit $vgpr0 + ; GFX10-LABEL: name: test_half_add_mul_rhs + ; GFX10: liveins: $vgpr0, $vgpr1, $vgpr2, $sgpr30_sgpr31 + ; GFX10: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX10: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) + ; GFX10: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX10: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) + ; GFX10: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; GFX10: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s32) + ; GFX10: [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; GFX10: [[FMA:%[0-9]+]]:_(s16) = G_FMA [[TRUNC]], [[TRUNC1]], [[TRUNC2]] + ; GFX10: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FMA]](s16) + ; GFX10: $vgpr0 = COPY [[ANYEXT]](s32) + ; GFX10: [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY3]] + ; GFX10: S_SETPC_B64_return [[COPY4]], implicit $vgpr0 + ; GFX10-CONTRACT-LABEL: name: test_half_add_mul_rhs + ; GFX10-CONTRACT: liveins: $vgpr0, $vgpr1, $vgpr2, $sgpr30_sgpr31 + ; GFX10-CONTRACT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX10-CONTRACT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) + ; GFX10-CONTRACT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX10-CONTRACT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) + ; GFX10-CONTRACT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; GFX10-CONTRACT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s32) + ; GFX10-CONTRACT: [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; GFX10-CONTRACT: [[FMA:%[0-9]+]]:_(s16) = G_FMA [[TRUNC]], [[TRUNC1]], [[TRUNC2]] + ; GFX10-CONTRACT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FMA]](s16) + ; GFX10-CONTRACT: $vgpr0 = COPY [[ANYEXT]](s32) + ; GFX10-CONTRACT: [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY3]] + ; GFX10-CONTRACT: S_SETPC_B64_return [[COPY4]], implicit $vgpr0 + ; GFX10-DENORM-LABEL: name: test_half_add_mul_rhs + ; GFX10-DENORM: liveins: $vgpr0, $vgpr1, $vgpr2, $sgpr30_sgpr31 + ; GFX10-DENORM: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX10-DENORM: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) + ; GFX10-DENORM: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX10-DENORM: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) + ; GFX10-DENORM: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; GFX10-DENORM: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s32) + ; GFX10-DENORM: [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; GFX10-DENORM: [[FMA:%[0-9]+]]:_(s16) = G_FMA [[TRUNC]], [[TRUNC1]], [[TRUNC2]] + ; GFX10-DENORM: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FMA]](s16) + ; GFX10-DENORM: $vgpr0 = COPY [[ANYEXT]](s32) + ; GFX10-DENORM: [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY3]] + ; GFX10-DENORM: S_SETPC_B64_return [[COPY4]], implicit $vgpr0 + %4:_(s32) = COPY $vgpr0 + %0:_(s16) = G_TRUNC %4(s32) + %5:_(s32) = COPY $vgpr1 + %1:_(s16) = G_TRUNC %5(s32) + %6:_(s32) = COPY $vgpr2 + %2:_(s16) = G_TRUNC %6(s32) + %3:sgpr_64 = COPY $sgpr30_sgpr31 + %7:_(s16) = reassoc G_FMUL %0, %1 + %8:_(s16) = reassoc G_FADD %2, %7 + %10:_(s32) = G_ANYEXT %8(s16) + $vgpr0 = COPY %10(s32) + %9:ccr_sgpr_64 = COPY %3 + S_SETPC_B64_return %9, implicit $vgpr0 + +... +--- +name: test_double_add_mul +alignment: 1 +tracksRegLiveness: true +registers: + - { id: 0, class: _ } + - { id: 1, class: _ } + - { id: 2, class: _ } + - { id: 3, class: sgpr_64 } + - { id: 4, class: _ } + - { id: 5, class: _ } + - { id: 6, class: _ } + - { id: 7, class: _ } + - { id: 8, class: _ } + - { id: 9, class: _ } + - { id: 10, class: _ } + - { id: 11, class: _ } + - { id: 12, class: ccr_sgpr_64 } + - { id: 13, class: _ } + - { id: 14, class: _ } +liveins: + - { reg: '$sgpr30_sgpr31', virtual-reg: '%3' } +frameInfo: + maxAlignment: 1 +machineFunctionInfo: + maxKernArgAlign: 1 + scratchRSrcReg: '$sgpr0_sgpr1_sgpr2_sgpr3' + frameOffsetReg: '$sgpr33' + stackPtrOffsetReg: '$sgpr32' + argumentInfo: + privateSegmentBuffer: { reg: '$sgpr0_sgpr1_sgpr2_sgpr3' } + occupancy: 10 +body: | + bb.1.entry: + liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $sgpr30_sgpr31 + + ; GFX9-LABEL: name: test_double_add_mul + ; GFX9: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $sgpr30_sgpr31 + ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX9: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) + ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; GFX9: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 + ; GFX9: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) + ; GFX9: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 + ; GFX9: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 + ; GFX9: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32) + ; GFX9: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; GFX9: [[FMA:%[0-9]+]]:_(s64) = G_FMA [[MV]], [[MV1]], [[MV2]] + ; GFX9: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FMA]](s64) + ; GFX9: $vgpr0 = COPY [[UV]](s32) + ; GFX9: $vgpr1 = COPY [[UV1]](s32) + ; GFX9: [[COPY7:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY6]] + ; GFX9: S_SETPC_B64_return [[COPY7]], implicit $vgpr0, implicit $vgpr1 + ; GFX9-CONTRACT-LABEL: name: test_double_add_mul + ; GFX9-CONTRACT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $sgpr30_sgpr31 + ; GFX9-CONTRACT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX9-CONTRACT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX9-CONTRACT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) + ; GFX9-CONTRACT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; GFX9-CONTRACT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 + ; GFX9-CONTRACT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) + ; GFX9-CONTRACT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 + ; GFX9-CONTRACT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 + ; GFX9-CONTRACT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32) + ; GFX9-CONTRACT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; GFX9-CONTRACT: [[FMA:%[0-9]+]]:_(s64) = G_FMA [[MV]], [[MV1]], [[MV2]] + ; GFX9-CONTRACT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FMA]](s64) + ; GFX9-CONTRACT: $vgpr0 = COPY [[UV]](s32) + ; GFX9-CONTRACT: $vgpr1 = COPY [[UV1]](s32) + ; GFX9-CONTRACT: [[COPY7:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY6]] + ; GFX9-CONTRACT: S_SETPC_B64_return [[COPY7]], implicit $vgpr0, implicit $vgpr1 + ; GFX9-DENORM-LABEL: name: test_double_add_mul + ; GFX9-DENORM: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $sgpr30_sgpr31 + ; GFX9-DENORM: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX9-DENORM: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX9-DENORM: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) + ; GFX9-DENORM: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; GFX9-DENORM: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 + ; GFX9-DENORM: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) + ; GFX9-DENORM: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 + ; GFX9-DENORM: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 + ; GFX9-DENORM: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32) + ; GFX9-DENORM: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; GFX9-DENORM: [[FMA:%[0-9]+]]:_(s64) = G_FMA [[MV]], [[MV1]], [[MV2]] + ; GFX9-DENORM: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FMA]](s64) + ; GFX9-DENORM: $vgpr0 = COPY [[UV]](s32) + ; GFX9-DENORM: $vgpr1 = COPY [[UV1]](s32) + ; GFX9-DENORM: [[COPY7:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY6]] + ; GFX9-DENORM: S_SETPC_B64_return [[COPY7]], implicit $vgpr0, implicit $vgpr1 + ; GFX10-LABEL: name: test_double_add_mul + ; GFX10: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $sgpr30_sgpr31 + ; GFX10: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX10: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX10: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) + ; GFX10: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; GFX10: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 + ; GFX10: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) + ; GFX10: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 + ; GFX10: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 + ; GFX10: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32) + ; GFX10: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; GFX10: [[FMA:%[0-9]+]]:_(s64) = G_FMA [[MV]], [[MV1]], [[MV2]] + ; GFX10: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FMA]](s64) + ; GFX10: $vgpr0 = COPY [[UV]](s32) + ; GFX10: $vgpr1 = COPY [[UV1]](s32) + ; GFX10: [[COPY7:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY6]] + ; GFX10: S_SETPC_B64_return [[COPY7]], implicit $vgpr0, implicit $vgpr1 + ; GFX10-CONTRACT-LABEL: name: test_double_add_mul + ; GFX10-CONTRACT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $sgpr30_sgpr31 + ; GFX10-CONTRACT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX10-CONTRACT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX10-CONTRACT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) + ; GFX10-CONTRACT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; GFX10-CONTRACT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 + ; GFX10-CONTRACT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) + ; GFX10-CONTRACT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 + ; GFX10-CONTRACT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 + ; GFX10-CONTRACT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32) + ; GFX10-CONTRACT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; GFX10-CONTRACT: [[FMA:%[0-9]+]]:_(s64) = G_FMA [[MV]], [[MV1]], [[MV2]] + ; GFX10-CONTRACT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FMA]](s64) + ; GFX10-CONTRACT: $vgpr0 = COPY [[UV]](s32) + ; GFX10-CONTRACT: $vgpr1 = COPY [[UV1]](s32) + ; GFX10-CONTRACT: [[COPY7:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY6]] + ; GFX10-CONTRACT: S_SETPC_B64_return [[COPY7]], implicit $vgpr0, implicit $vgpr1 + ; GFX10-DENORM-LABEL: name: test_double_add_mul + ; GFX10-DENORM: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $sgpr30_sgpr31 + ; GFX10-DENORM: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX10-DENORM: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX10-DENORM: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) + ; GFX10-DENORM: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; GFX10-DENORM: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 + ; GFX10-DENORM: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) + ; GFX10-DENORM: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 + ; GFX10-DENORM: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 + ; GFX10-DENORM: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32) + ; GFX10-DENORM: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; GFX10-DENORM: [[FMA:%[0-9]+]]:_(s64) = G_FMA [[MV]], [[MV1]], [[MV2]] + ; GFX10-DENORM: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FMA]](s64) + ; GFX10-DENORM: $vgpr0 = COPY [[UV]](s32) + ; GFX10-DENORM: $vgpr1 = COPY [[UV1]](s32) + ; GFX10-DENORM: [[COPY7:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY6]] + ; GFX10-DENORM: S_SETPC_B64_return [[COPY7]], implicit $vgpr0, implicit $vgpr1 + %4:_(s32) = COPY $vgpr0 + %5:_(s32) = COPY $vgpr1 + %0:_(s64) = G_MERGE_VALUES %4(s32), %5(s32) + %6:_(s32) = COPY $vgpr2 + %7:_(s32) = COPY $vgpr3 + %1:_(s64) = G_MERGE_VALUES %6(s32), %7(s32) + %8:_(s32) = COPY $vgpr4 + %9:_(s32) = COPY $vgpr5 + %2:_(s64) = G_MERGE_VALUES %8(s32), %9(s32) + %3:sgpr_64 = COPY $sgpr30_sgpr31 + %10:_(s64) = reassoc G_FMUL %0, %1 + %11:_(s64) = reassoc G_FADD %10, %2 + %13:_(s32), %14:_(s32) = G_UNMERGE_VALUES %11(s64) + $vgpr0 = COPY %13(s32) + $vgpr1 = COPY %14(s32) + %12:ccr_sgpr_64 = COPY %3 + S_SETPC_B64_return %12, implicit $vgpr0, implicit $vgpr1 + +... +--- +name: test_double_add_mul_rhs +alignment: 1 +tracksRegLiveness: true +registers: + - { id: 0, class: _ } + - { id: 1, class: _ } + - { id: 2, class: _ } + - { id: 3, class: sgpr_64 } + - { id: 4, class: _ } + - { id: 5, class: _ } + - { id: 6, class: _ } + - { id: 7, class: _ } + - { id: 8, class: _ } + - { id: 9, class: _ } + - { id: 10, class: _ } + - { id: 11, class: _ } + - { id: 12, class: ccr_sgpr_64 } + - { id: 13, class: _ } + - { id: 14, class: _ } +liveins: + - { reg: '$sgpr30_sgpr31', virtual-reg: '%3' } +frameInfo: + maxAlignment: 1 +machineFunctionInfo: + maxKernArgAlign: 1 + scratchRSrcReg: '$sgpr0_sgpr1_sgpr2_sgpr3' + frameOffsetReg: '$sgpr33' + stackPtrOffsetReg: '$sgpr32' + argumentInfo: + privateSegmentBuffer: { reg: '$sgpr0_sgpr1_sgpr2_sgpr3' } + occupancy: 10 +body: | + bb.1.entry: + liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $sgpr30_sgpr31 + + ; GFX9-LABEL: name: test_double_add_mul_rhs + ; GFX9: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $sgpr30_sgpr31 + ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX9: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) + ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; GFX9: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 + ; GFX9: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) + ; GFX9: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 + ; GFX9: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 + ; GFX9: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32) + ; GFX9: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; GFX9: [[FMA:%[0-9]+]]:_(s64) = G_FMA [[MV]], [[MV1]], [[MV2]] + ; GFX9: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FMA]](s64) + ; GFX9: $vgpr0 = COPY [[UV]](s32) + ; GFX9: $vgpr1 = COPY [[UV1]](s32) + ; GFX9: [[COPY7:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY6]] + ; GFX9: S_SETPC_B64_return [[COPY7]], implicit $vgpr0, implicit $vgpr1 + ; GFX9-CONTRACT-LABEL: name: test_double_add_mul_rhs + ; GFX9-CONTRACT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $sgpr30_sgpr31 + ; GFX9-CONTRACT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX9-CONTRACT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX9-CONTRACT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) + ; GFX9-CONTRACT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; GFX9-CONTRACT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 + ; GFX9-CONTRACT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) + ; GFX9-CONTRACT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 + ; GFX9-CONTRACT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 + ; GFX9-CONTRACT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32) + ; GFX9-CONTRACT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; GFX9-CONTRACT: [[FMA:%[0-9]+]]:_(s64) = G_FMA [[MV]], [[MV1]], [[MV2]] + ; GFX9-CONTRACT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FMA]](s64) + ; GFX9-CONTRACT: $vgpr0 = COPY [[UV]](s32) + ; GFX9-CONTRACT: $vgpr1 = COPY [[UV1]](s32) + ; GFX9-CONTRACT: [[COPY7:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY6]] + ; GFX9-CONTRACT: S_SETPC_B64_return [[COPY7]], implicit $vgpr0, implicit $vgpr1 + ; GFX9-DENORM-LABEL: name: test_double_add_mul_rhs + ; GFX9-DENORM: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $sgpr30_sgpr31 + ; GFX9-DENORM: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX9-DENORM: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX9-DENORM: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) + ; GFX9-DENORM: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; GFX9-DENORM: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 + ; GFX9-DENORM: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) + ; GFX9-DENORM: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 + ; GFX9-DENORM: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 + ; GFX9-DENORM: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32) + ; GFX9-DENORM: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; GFX9-DENORM: [[FMA:%[0-9]+]]:_(s64) = G_FMA [[MV]], [[MV1]], [[MV2]] + ; GFX9-DENORM: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FMA]](s64) + ; GFX9-DENORM: $vgpr0 = COPY [[UV]](s32) + ; GFX9-DENORM: $vgpr1 = COPY [[UV1]](s32) + ; GFX9-DENORM: [[COPY7:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY6]] + ; GFX9-DENORM: S_SETPC_B64_return [[COPY7]], implicit $vgpr0, implicit $vgpr1 + ; GFX10-LABEL: name: test_double_add_mul_rhs + ; GFX10: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $sgpr30_sgpr31 + ; GFX10: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX10: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX10: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) + ; GFX10: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; GFX10: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 + ; GFX10: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) + ; GFX10: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 + ; GFX10: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 + ; GFX10: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32) + ; GFX10: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; GFX10: [[FMA:%[0-9]+]]:_(s64) = G_FMA [[MV]], [[MV1]], [[MV2]] + ; GFX10: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FMA]](s64) + ; GFX10: $vgpr0 = COPY [[UV]](s32) + ; GFX10: $vgpr1 = COPY [[UV1]](s32) + ; GFX10: [[COPY7:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY6]] + ; GFX10: S_SETPC_B64_return [[COPY7]], implicit $vgpr0, implicit $vgpr1 + ; GFX10-CONTRACT-LABEL: name: test_double_add_mul_rhs + ; GFX10-CONTRACT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $sgpr30_sgpr31 + ; GFX10-CONTRACT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX10-CONTRACT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX10-CONTRACT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) + ; GFX10-CONTRACT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; GFX10-CONTRACT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 + ; GFX10-CONTRACT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) + ; GFX10-CONTRACT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 + ; GFX10-CONTRACT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 + ; GFX10-CONTRACT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32) + ; GFX10-CONTRACT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; GFX10-CONTRACT: [[FMA:%[0-9]+]]:_(s64) = G_FMA [[MV]], [[MV1]], [[MV2]] + ; GFX10-CONTRACT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FMA]](s64) + ; GFX10-CONTRACT: $vgpr0 = COPY [[UV]](s32) + ; GFX10-CONTRACT: $vgpr1 = COPY [[UV1]](s32) + ; GFX10-CONTRACT: [[COPY7:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY6]] + ; GFX10-CONTRACT: S_SETPC_B64_return [[COPY7]], implicit $vgpr0, implicit $vgpr1 + ; GFX10-DENORM-LABEL: name: test_double_add_mul_rhs + ; GFX10-DENORM: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $sgpr30_sgpr31 + ; GFX10-DENORM: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX10-DENORM: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX10-DENORM: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) + ; GFX10-DENORM: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; GFX10-DENORM: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 + ; GFX10-DENORM: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) + ; GFX10-DENORM: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 + ; GFX10-DENORM: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 + ; GFX10-DENORM: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32) + ; GFX10-DENORM: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; GFX10-DENORM: [[FMA:%[0-9]+]]:_(s64) = G_FMA [[MV]], [[MV1]], [[MV2]] + ; GFX10-DENORM: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FMA]](s64) + ; GFX10-DENORM: $vgpr0 = COPY [[UV]](s32) + ; GFX10-DENORM: $vgpr1 = COPY [[UV1]](s32) + ; GFX10-DENORM: [[COPY7:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY6]] + ; GFX10-DENORM: S_SETPC_B64_return [[COPY7]], implicit $vgpr0, implicit $vgpr1 + %4:_(s32) = COPY $vgpr0 + %5:_(s32) = COPY $vgpr1 + %0:_(s64) = G_MERGE_VALUES %4(s32), %5(s32) + %6:_(s32) = COPY $vgpr2 + %7:_(s32) = COPY $vgpr3 + %1:_(s64) = G_MERGE_VALUES %6(s32), %7(s32) + %8:_(s32) = COPY $vgpr4 + %9:_(s32) = COPY $vgpr5 + %2:_(s64) = G_MERGE_VALUES %8(s32), %9(s32) + %3:sgpr_64 = COPY $sgpr30_sgpr31 + %10:_(s64) = reassoc G_FMUL %0, %1 + %11:_(s64) = reassoc G_FADD %2, %10 + %13:_(s32), %14:_(s32) = G_UNMERGE_VALUES %11(s64) + $vgpr0 = COPY %13(s32) + $vgpr1 = COPY %14(s32) + %12:ccr_sgpr_64 = COPY %3 + S_SETPC_B64_return %12, implicit $vgpr0, implicit $vgpr1 + +... +--- +name: test_4xfloat_add_mul +alignment: 1 +tracksRegLiveness: true +registers: + - { id: 0, class: _ } + - { id: 1, class: _ } + - { id: 2, class: _ } + - { id: 3, class: sgpr_64 } + - { id: 4, class: _ } + - { id: 5, class: _ } + - { id: 6, class: _ } + - { id: 7, class: _ } + - { id: 8, class: _ } + - { id: 9, class: _ } + - { id: 10, class: _ } + - { id: 11, class: _ } + - { id: 12, class: _ } + - { id: 13, class: _ } + - { id: 14, class: _ } + - { id: 15, class: _ } + - { id: 16, class: _ } + - { id: 17, class: _ } + - { id: 18, class: ccr_sgpr_64 } + - { id: 19, class: _ } + - { id: 20, class: _ } + - { id: 21, class: _ } + - { id: 22, class: _ } +liveins: + - { reg: '$sgpr30_sgpr31', virtual-reg: '%3' } +frameInfo: + maxAlignment: 1 +machineFunctionInfo: + maxKernArgAlign: 1 + scratchRSrcReg: '$sgpr0_sgpr1_sgpr2_sgpr3' + frameOffsetReg: '$sgpr33' + stackPtrOffsetReg: '$sgpr32' + argumentInfo: + privateSegmentBuffer: { reg: '$sgpr0_sgpr1_sgpr2_sgpr3' } + occupancy: 10 +body: | + bb.1.entry: + liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $sgpr30_sgpr31 + + ; GFX9-LABEL: name: test_4xfloat_add_mul + ; GFX9: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $sgpr30_sgpr31 + ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; GFX9: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 + ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) + ; GFX9: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 + ; GFX9: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 + ; GFX9: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6 + ; GFX9: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7 + ; GFX9: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) + ; GFX9: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8 + ; GFX9: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr9 + ; GFX9: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr10 + ; GFX9: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr11 + ; GFX9: [[BUILD_VECTOR2:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) + ; GFX9: [[COPY12:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; GFX9: [[FMUL:%[0-9]+]]:_(<4 x s32>) = reassoc G_FMUL [[BUILD_VECTOR]], [[BUILD_VECTOR1]] + ; GFX9: [[FADD:%[0-9]+]]:_(<4 x s32>) = reassoc G_FADD [[FMUL]], [[BUILD_VECTOR2]] + ; GFX9: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FADD]](<4 x s32>) + ; GFX9: $vgpr0 = COPY [[UV]](s32) + ; GFX9: $vgpr1 = COPY [[UV1]](s32) + ; GFX9: $vgpr2 = COPY [[UV2]](s32) + ; GFX9: $vgpr3 = COPY [[UV3]](s32) + ; GFX9: [[COPY13:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY12]] + ; GFX9: S_SETPC_B64_return [[COPY13]], implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 + ; GFX9-CONTRACT-LABEL: name: test_4xfloat_add_mul + ; GFX9-CONTRACT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $sgpr30_sgpr31 + ; GFX9-CONTRACT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX9-CONTRACT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX9-CONTRACT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; GFX9-CONTRACT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 + ; GFX9-CONTRACT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) + ; GFX9-CONTRACT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 + ; GFX9-CONTRACT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 + ; GFX9-CONTRACT: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6 + ; GFX9-CONTRACT: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7 + ; GFX9-CONTRACT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) + ; GFX9-CONTRACT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8 + ; GFX9-CONTRACT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr9 + ; GFX9-CONTRACT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr10 + ; GFX9-CONTRACT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr11 + ; GFX9-CONTRACT: [[BUILD_VECTOR2:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) + ; GFX9-CONTRACT: [[COPY12:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; GFX9-CONTRACT: [[FMUL:%[0-9]+]]:_(<4 x s32>) = reassoc G_FMUL [[BUILD_VECTOR]], [[BUILD_VECTOR1]] + ; GFX9-CONTRACT: [[FADD:%[0-9]+]]:_(<4 x s32>) = reassoc G_FADD [[FMUL]], [[BUILD_VECTOR2]] + ; GFX9-CONTRACT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FADD]](<4 x s32>) + ; GFX9-CONTRACT: $vgpr0 = COPY [[UV]](s32) + ; GFX9-CONTRACT: $vgpr1 = COPY [[UV1]](s32) + ; GFX9-CONTRACT: $vgpr2 = COPY [[UV2]](s32) + ; GFX9-CONTRACT: $vgpr3 = COPY [[UV3]](s32) + ; GFX9-CONTRACT: [[COPY13:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY12]] + ; GFX9-CONTRACT: S_SETPC_B64_return [[COPY13]], implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 + ; GFX9-DENORM-LABEL: name: test_4xfloat_add_mul + ; GFX9-DENORM: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $sgpr30_sgpr31 + ; GFX9-DENORM: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX9-DENORM: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX9-DENORM: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; GFX9-DENORM: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 + ; GFX9-DENORM: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) + ; GFX9-DENORM: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 + ; GFX9-DENORM: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 + ; GFX9-DENORM: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6 + ; GFX9-DENORM: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7 + ; GFX9-DENORM: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) + ; GFX9-DENORM: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8 + ; GFX9-DENORM: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr9 + ; GFX9-DENORM: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr10 + ; GFX9-DENORM: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr11 + ; GFX9-DENORM: [[BUILD_VECTOR2:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) + ; GFX9-DENORM: [[COPY12:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; GFX9-DENORM: [[FMUL:%[0-9]+]]:_(<4 x s32>) = reassoc G_FMUL [[BUILD_VECTOR]], [[BUILD_VECTOR1]] + ; GFX9-DENORM: [[FADD:%[0-9]+]]:_(<4 x s32>) = reassoc G_FADD [[FMUL]], [[BUILD_VECTOR2]] + ; GFX9-DENORM: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FADD]](<4 x s32>) + ; GFX9-DENORM: $vgpr0 = COPY [[UV]](s32) + ; GFX9-DENORM: $vgpr1 = COPY [[UV1]](s32) + ; GFX9-DENORM: $vgpr2 = COPY [[UV2]](s32) + ; GFX9-DENORM: $vgpr3 = COPY [[UV3]](s32) + ; GFX9-DENORM: [[COPY13:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY12]] + ; GFX9-DENORM: S_SETPC_B64_return [[COPY13]], implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 + ; GFX10-LABEL: name: test_4xfloat_add_mul + ; GFX10: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $sgpr30_sgpr31 + ; GFX10: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX10: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX10: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; GFX10: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 + ; GFX10: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) + ; GFX10: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 + ; GFX10: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 + ; GFX10: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6 + ; GFX10: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7 + ; GFX10: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) + ; GFX10: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8 + ; GFX10: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr9 + ; GFX10: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr10 + ; GFX10: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr11 + ; GFX10: [[BUILD_VECTOR2:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) + ; GFX10: [[COPY12:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; GFX10: [[FMUL:%[0-9]+]]:_(<4 x s32>) = reassoc G_FMUL [[BUILD_VECTOR]], [[BUILD_VECTOR1]] + ; GFX10: [[FADD:%[0-9]+]]:_(<4 x s32>) = reassoc G_FADD [[FMUL]], [[BUILD_VECTOR2]] + ; GFX10: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FADD]](<4 x s32>) + ; GFX10: $vgpr0 = COPY [[UV]](s32) + ; GFX10: $vgpr1 = COPY [[UV1]](s32) + ; GFX10: $vgpr2 = COPY [[UV2]](s32) + ; GFX10: $vgpr3 = COPY [[UV3]](s32) + ; GFX10: [[COPY13:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY12]] + ; GFX10: S_SETPC_B64_return [[COPY13]], implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 + ; GFX10-CONTRACT-LABEL: name: test_4xfloat_add_mul + ; GFX10-CONTRACT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $sgpr30_sgpr31 + ; GFX10-CONTRACT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX10-CONTRACT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX10-CONTRACT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; GFX10-CONTRACT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 + ; GFX10-CONTRACT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) + ; GFX10-CONTRACT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 + ; GFX10-CONTRACT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 + ; GFX10-CONTRACT: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6 + ; GFX10-CONTRACT: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7 + ; GFX10-CONTRACT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) + ; GFX10-CONTRACT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8 + ; GFX10-CONTRACT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr9 + ; GFX10-CONTRACT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr10 + ; GFX10-CONTRACT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr11 + ; GFX10-CONTRACT: [[BUILD_VECTOR2:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) + ; GFX10-CONTRACT: [[COPY12:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; GFX10-CONTRACT: [[FMUL:%[0-9]+]]:_(<4 x s32>) = reassoc G_FMUL [[BUILD_VECTOR]], [[BUILD_VECTOR1]] + ; GFX10-CONTRACT: [[FADD:%[0-9]+]]:_(<4 x s32>) = reassoc G_FADD [[FMUL]], [[BUILD_VECTOR2]] + ; GFX10-CONTRACT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FADD]](<4 x s32>) + ; GFX10-CONTRACT: $vgpr0 = COPY [[UV]](s32) + ; GFX10-CONTRACT: $vgpr1 = COPY [[UV1]](s32) + ; GFX10-CONTRACT: $vgpr2 = COPY [[UV2]](s32) + ; GFX10-CONTRACT: $vgpr3 = COPY [[UV3]](s32) + ; GFX10-CONTRACT: [[COPY13:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY12]] + ; GFX10-CONTRACT: S_SETPC_B64_return [[COPY13]], implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 + ; GFX10-DENORM-LABEL: name: test_4xfloat_add_mul + ; GFX10-DENORM: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $sgpr30_sgpr31 + ; GFX10-DENORM: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX10-DENORM: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX10-DENORM: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; GFX10-DENORM: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 + ; GFX10-DENORM: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) + ; GFX10-DENORM: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 + ; GFX10-DENORM: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 + ; GFX10-DENORM: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6 + ; GFX10-DENORM: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7 + ; GFX10-DENORM: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) + ; GFX10-DENORM: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8 + ; GFX10-DENORM: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr9 + ; GFX10-DENORM: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr10 + ; GFX10-DENORM: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr11 + ; GFX10-DENORM: [[BUILD_VECTOR2:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) + ; GFX10-DENORM: [[COPY12:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; GFX10-DENORM: [[FMUL:%[0-9]+]]:_(<4 x s32>) = reassoc G_FMUL [[BUILD_VECTOR]], [[BUILD_VECTOR1]] + ; GFX10-DENORM: [[FADD:%[0-9]+]]:_(<4 x s32>) = reassoc G_FADD [[FMUL]], [[BUILD_VECTOR2]] + ; GFX10-DENORM: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FADD]](<4 x s32>) + ; GFX10-DENORM: $vgpr0 = COPY [[UV]](s32) + ; GFX10-DENORM: $vgpr1 = COPY [[UV1]](s32) + ; GFX10-DENORM: $vgpr2 = COPY [[UV2]](s32) + ; GFX10-DENORM: $vgpr3 = COPY [[UV3]](s32) + ; GFX10-DENORM: [[COPY13:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY12]] + ; GFX10-DENORM: S_SETPC_B64_return [[COPY13]], implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 + %4:_(s32) = COPY $vgpr0 + %5:_(s32) = COPY $vgpr1 + %6:_(s32) = COPY $vgpr2 + %7:_(s32) = COPY $vgpr3 + %0:_(<4 x s32>) = G_BUILD_VECTOR %4(s32), %5(s32), %6(s32), %7(s32) + %8:_(s32) = COPY $vgpr4 + %9:_(s32) = COPY $vgpr5 + %10:_(s32) = COPY $vgpr6 + %11:_(s32) = COPY $vgpr7 + %1:_(<4 x s32>) = G_BUILD_VECTOR %8(s32), %9(s32), %10(s32), %11(s32) + %12:_(s32) = COPY $vgpr8 + %13:_(s32) = COPY $vgpr9 + %14:_(s32) = COPY $vgpr10 + %15:_(s32) = COPY $vgpr11 + %2:_(<4 x s32>) = G_BUILD_VECTOR %12(s32), %13(s32), %14(s32), %15(s32) + %3:sgpr_64 = COPY $sgpr30_sgpr31 + %16:_(<4 x s32>) = reassoc G_FMUL %0, %1 + %17:_(<4 x s32>) = reassoc G_FADD %16, %2 + %19:_(s32), %20:_(s32), %21:_(s32), %22:_(s32) = G_UNMERGE_VALUES %17(<4 x s32>) + $vgpr0 = COPY %19(s32) + $vgpr1 = COPY %20(s32) + $vgpr2 = COPY %21(s32) + $vgpr3 = COPY %22(s32) + %18:ccr_sgpr_64 = COPY %3 + S_SETPC_B64_return %18, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 + +... +--- +name: test_3xfloat_add_mul_rhs +alignment: 1 +tracksRegLiveness: true +registers: + - { id: 0, class: _ } + - { id: 1, class: _ } + - { id: 2, class: _ } + - { id: 3, class: sgpr_64 } + - { id: 4, class: _ } + - { id: 5, class: _ } + - { id: 6, class: _ } + - { id: 7, class: _ } + - { id: 8, class: _ } + - { id: 9, class: _ } + - { id: 10, class: _ } + - { id: 11, class: _ } + - { id: 12, class: _ } + - { id: 13, class: _ } + - { id: 14, class: _ } + - { id: 15, class: ccr_sgpr_64 } + - { id: 16, class: _ } + - { id: 17, class: _ } + - { id: 18, class: _ } +liveins: + - { reg: '$sgpr30_sgpr31', virtual-reg: '%3' } +frameInfo: + maxAlignment: 1 +machineFunctionInfo: + maxKernArgAlign: 1 + scratchRSrcReg: '$sgpr0_sgpr1_sgpr2_sgpr3' + frameOffsetReg: '$sgpr33' + stackPtrOffsetReg: '$sgpr32' + argumentInfo: + privateSegmentBuffer: { reg: '$sgpr0_sgpr1_sgpr2_sgpr3' } + occupancy: 10 +body: | + bb.1.entry: + liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $sgpr30_sgpr31 + + ; GFX9-LABEL: name: test_3xfloat_add_mul_rhs + ; GFX9: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $sgpr30_sgpr31 + ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32) + ; GFX9: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 + ; GFX9: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 + ; GFX9: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 + ; GFX9: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32) + ; GFX9: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6 + ; GFX9: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7 + ; GFX9: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8 + ; GFX9: [[BUILD_VECTOR2:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY6]](s32), [[COPY7]](s32), [[COPY8]](s32) + ; GFX9: [[COPY9:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; GFX9: [[FMUL:%[0-9]+]]:_(<3 x s32>) = reassoc G_FMUL [[BUILD_VECTOR]], [[BUILD_VECTOR1]] + ; GFX9: [[FADD:%[0-9]+]]:_(<3 x s32>) = reassoc G_FADD [[BUILD_VECTOR2]], [[FMUL]] + ; GFX9: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FADD]](<3 x s32>) + ; GFX9: $vgpr0 = COPY [[UV]](s32) + ; GFX9: $vgpr1 = COPY [[UV1]](s32) + ; GFX9: $vgpr2 = COPY [[UV2]](s32) + ; GFX9: [[COPY10:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY9]] + ; GFX9: S_SETPC_B64_return [[COPY10]], implicit $vgpr0, implicit $vgpr1, implicit $vgpr2 + ; GFX9-CONTRACT-LABEL: name: test_3xfloat_add_mul_rhs + ; GFX9-CONTRACT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $sgpr30_sgpr31 + ; GFX9-CONTRACT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX9-CONTRACT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX9-CONTRACT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; GFX9-CONTRACT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32) + ; GFX9-CONTRACT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 + ; GFX9-CONTRACT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 + ; GFX9-CONTRACT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 + ; GFX9-CONTRACT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32) + ; GFX9-CONTRACT: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6 + ; GFX9-CONTRACT: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7 + ; GFX9-CONTRACT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8 + ; GFX9-CONTRACT: [[BUILD_VECTOR2:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY6]](s32), [[COPY7]](s32), [[COPY8]](s32) + ; GFX9-CONTRACT: [[COPY9:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; GFX9-CONTRACT: [[FMUL:%[0-9]+]]:_(<3 x s32>) = reassoc G_FMUL [[BUILD_VECTOR]], [[BUILD_VECTOR1]] + ; GFX9-CONTRACT: [[FADD:%[0-9]+]]:_(<3 x s32>) = reassoc G_FADD [[BUILD_VECTOR2]], [[FMUL]] + ; GFX9-CONTRACT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FADD]](<3 x s32>) + ; GFX9-CONTRACT: $vgpr0 = COPY [[UV]](s32) + ; GFX9-CONTRACT: $vgpr1 = COPY [[UV1]](s32) + ; GFX9-CONTRACT: $vgpr2 = COPY [[UV2]](s32) + ; GFX9-CONTRACT: [[COPY10:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY9]] + ; GFX9-CONTRACT: S_SETPC_B64_return [[COPY10]], implicit $vgpr0, implicit $vgpr1, implicit $vgpr2 + ; GFX9-DENORM-LABEL: name: test_3xfloat_add_mul_rhs + ; GFX9-DENORM: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $sgpr30_sgpr31 + ; GFX9-DENORM: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX9-DENORM: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX9-DENORM: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; GFX9-DENORM: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32) + ; GFX9-DENORM: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 + ; GFX9-DENORM: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 + ; GFX9-DENORM: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 + ; GFX9-DENORM: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32) + ; GFX9-DENORM: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6 + ; GFX9-DENORM: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7 + ; GFX9-DENORM: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8 + ; GFX9-DENORM: [[BUILD_VECTOR2:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY6]](s32), [[COPY7]](s32), [[COPY8]](s32) + ; GFX9-DENORM: [[COPY9:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; GFX9-DENORM: [[FMUL:%[0-9]+]]:_(<3 x s32>) = reassoc G_FMUL [[BUILD_VECTOR]], [[BUILD_VECTOR1]] + ; GFX9-DENORM: [[FADD:%[0-9]+]]:_(<3 x s32>) = reassoc G_FADD [[BUILD_VECTOR2]], [[FMUL]] + ; GFX9-DENORM: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FADD]](<3 x s32>) + ; GFX9-DENORM: $vgpr0 = COPY [[UV]](s32) + ; GFX9-DENORM: $vgpr1 = COPY [[UV1]](s32) + ; GFX9-DENORM: $vgpr2 = COPY [[UV2]](s32) + ; GFX9-DENORM: [[COPY10:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY9]] + ; GFX9-DENORM: S_SETPC_B64_return [[COPY10]], implicit $vgpr0, implicit $vgpr1, implicit $vgpr2 + ; GFX10-LABEL: name: test_3xfloat_add_mul_rhs + ; GFX10: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $sgpr30_sgpr31 + ; GFX10: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX10: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX10: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; GFX10: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32) + ; GFX10: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 + ; GFX10: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 + ; GFX10: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 + ; GFX10: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32) + ; GFX10: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6 + ; GFX10: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7 + ; GFX10: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8 + ; GFX10: [[BUILD_VECTOR2:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY6]](s32), [[COPY7]](s32), [[COPY8]](s32) + ; GFX10: [[COPY9:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; GFX10: [[FMUL:%[0-9]+]]:_(<3 x s32>) = reassoc G_FMUL [[BUILD_VECTOR]], [[BUILD_VECTOR1]] + ; GFX10: [[FADD:%[0-9]+]]:_(<3 x s32>) = reassoc G_FADD [[BUILD_VECTOR2]], [[FMUL]] + ; GFX10: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FADD]](<3 x s32>) + ; GFX10: $vgpr0 = COPY [[UV]](s32) + ; GFX10: $vgpr1 = COPY [[UV1]](s32) + ; GFX10: $vgpr2 = COPY [[UV2]](s32) + ; GFX10: [[COPY10:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY9]] + ; GFX10: S_SETPC_B64_return [[COPY10]], implicit $vgpr0, implicit $vgpr1, implicit $vgpr2 + ; GFX10-CONTRACT-LABEL: name: test_3xfloat_add_mul_rhs + ; GFX10-CONTRACT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $sgpr30_sgpr31 + ; GFX10-CONTRACT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX10-CONTRACT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX10-CONTRACT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; GFX10-CONTRACT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32) + ; GFX10-CONTRACT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 + ; GFX10-CONTRACT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 + ; GFX10-CONTRACT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 + ; GFX10-CONTRACT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32) + ; GFX10-CONTRACT: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6 + ; GFX10-CONTRACT: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7 + ; GFX10-CONTRACT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8 + ; GFX10-CONTRACT: [[BUILD_VECTOR2:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY6]](s32), [[COPY7]](s32), [[COPY8]](s32) + ; GFX10-CONTRACT: [[COPY9:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; GFX10-CONTRACT: [[FMUL:%[0-9]+]]:_(<3 x s32>) = reassoc G_FMUL [[BUILD_VECTOR]], [[BUILD_VECTOR1]] + ; GFX10-CONTRACT: [[FADD:%[0-9]+]]:_(<3 x s32>) = reassoc G_FADD [[BUILD_VECTOR2]], [[FMUL]] + ; GFX10-CONTRACT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FADD]](<3 x s32>) + ; GFX10-CONTRACT: $vgpr0 = COPY [[UV]](s32) + ; GFX10-CONTRACT: $vgpr1 = COPY [[UV1]](s32) + ; GFX10-CONTRACT: $vgpr2 = COPY [[UV2]](s32) + ; GFX10-CONTRACT: [[COPY10:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY9]] + ; GFX10-CONTRACT: S_SETPC_B64_return [[COPY10]], implicit $vgpr0, implicit $vgpr1, implicit $vgpr2 + ; GFX10-DENORM-LABEL: name: test_3xfloat_add_mul_rhs + ; GFX10-DENORM: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $sgpr30_sgpr31 + ; GFX10-DENORM: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX10-DENORM: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX10-DENORM: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; GFX10-DENORM: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32) + ; GFX10-DENORM: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 + ; GFX10-DENORM: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 + ; GFX10-DENORM: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 + ; GFX10-DENORM: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32) + ; GFX10-DENORM: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6 + ; GFX10-DENORM: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7 + ; GFX10-DENORM: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8 + ; GFX10-DENORM: [[BUILD_VECTOR2:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY6]](s32), [[COPY7]](s32), [[COPY8]](s32) + ; GFX10-DENORM: [[COPY9:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; GFX10-DENORM: [[FMUL:%[0-9]+]]:_(<3 x s32>) = reassoc G_FMUL [[BUILD_VECTOR]], [[BUILD_VECTOR1]] + ; GFX10-DENORM: [[FADD:%[0-9]+]]:_(<3 x s32>) = reassoc G_FADD [[BUILD_VECTOR2]], [[FMUL]] + ; GFX10-DENORM: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FADD]](<3 x s32>) + ; GFX10-DENORM: $vgpr0 = COPY [[UV]](s32) + ; GFX10-DENORM: $vgpr1 = COPY [[UV1]](s32) + ; GFX10-DENORM: $vgpr2 = COPY [[UV2]](s32) + ; GFX10-DENORM: [[COPY10:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY9]] + ; GFX10-DENORM: S_SETPC_B64_return [[COPY10]], implicit $vgpr0, implicit $vgpr1, implicit $vgpr2 + %4:_(s32) = COPY $vgpr0 + %5:_(s32) = COPY $vgpr1 + %6:_(s32) = COPY $vgpr2 + %0:_(<3 x s32>) = G_BUILD_VECTOR %4(s32), %5(s32), %6(s32) + %7:_(s32) = COPY $vgpr3 + %8:_(s32) = COPY $vgpr4 + %9:_(s32) = COPY $vgpr5 + %1:_(<3 x s32>) = G_BUILD_VECTOR %7(s32), %8(s32), %9(s32) + %10:_(s32) = COPY $vgpr6 + %11:_(s32) = COPY $vgpr7 + %12:_(s32) = COPY $vgpr8 + %2:_(<3 x s32>) = G_BUILD_VECTOR %10(s32), %11(s32), %12(s32) + %3:sgpr_64 = COPY $sgpr30_sgpr31 + %13:_(<3 x s32>) = reassoc G_FMUL %0, %1 + %14:_(<3 x s32>) = reassoc G_FADD %2, %13 + %16:_(s32), %17:_(s32), %18:_(s32) = G_UNMERGE_VALUES %14(<3 x s32>) + $vgpr0 = COPY %16(s32) + $vgpr1 = COPY %17(s32) + $vgpr2 = COPY %18(s32) + %15:ccr_sgpr_64 = COPY %3 + S_SETPC_B64_return %15, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2 + +... +--- +name: test_4xhalf_add_mul +alignment: 1 +tracksRegLiveness: true +registers: + - { id: 0, class: _ } + - { id: 1, class: _ } + - { id: 2, class: _ } + - { id: 3, class: sgpr_64 } + - { id: 4, class: _ } + - { id: 5, class: _ } + - { id: 6, class: _ } + - { id: 7, class: _ } + - { id: 8, class: _ } + - { id: 9, class: _ } + - { id: 10, class: _ } + - { id: 11, class: _ } + - { id: 12, class: ccr_sgpr_64 } + - { id: 13, class: _ } + - { id: 14, class: _ } +liveins: + - { reg: '$sgpr30_sgpr31', virtual-reg: '%3' } +frameInfo: + maxAlignment: 1 +machineFunctionInfo: + maxKernArgAlign: 1 + scratchRSrcReg: '$sgpr0_sgpr1_sgpr2_sgpr3' + frameOffsetReg: '$sgpr33' + stackPtrOffsetReg: '$sgpr32' + argumentInfo: + privateSegmentBuffer: { reg: '$sgpr0_sgpr1_sgpr2_sgpr3' } + occupancy: 10 +body: | + bb.1.entry: + liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $sgpr30_sgpr31 + + ; GFX9-LABEL: name: test_4xhalf_add_mul + ; GFX9: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $sgpr30_sgpr31 + ; GFX9: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 + ; GFX9: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 + ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[COPY]](<2 x s16>), [[COPY1]](<2 x s16>) + ; GFX9: [[COPY2:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2 + ; GFX9: [[COPY3:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr3 + ; GFX9: [[CONCAT_VECTORS1:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[COPY2]](<2 x s16>), [[COPY3]](<2 x s16>) + ; GFX9: [[COPY4:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4 + ; GFX9: [[COPY5:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr5 + ; GFX9: [[CONCAT_VECTORS2:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[COPY4]](<2 x s16>), [[COPY5]](<2 x s16>) + ; GFX9: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; GFX9: [[FMUL:%[0-9]+]]:_(<4 x s16>) = reassoc G_FMUL [[CONCAT_VECTORS]], [[CONCAT_VECTORS1]] + ; GFX9: [[FADD:%[0-9]+]]:_(<4 x s16>) = reassoc G_FADD [[FMUL]], [[CONCAT_VECTORS2]] + ; GFX9: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[FADD]](<4 x s16>) + ; GFX9: $vgpr0 = COPY [[UV]](<2 x s16>) + ; GFX9: $vgpr1 = COPY [[UV1]](<2 x s16>) + ; GFX9: [[COPY7:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY6]] + ; GFX9: S_SETPC_B64_return [[COPY7]], implicit $vgpr0, implicit $vgpr1 + ; GFX9-CONTRACT-LABEL: name: test_4xhalf_add_mul + ; GFX9-CONTRACT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $sgpr30_sgpr31 + ; GFX9-CONTRACT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 + ; GFX9-CONTRACT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 + ; GFX9-CONTRACT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[COPY]](<2 x s16>), [[COPY1]](<2 x s16>) + ; GFX9-CONTRACT: [[COPY2:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2 + ; GFX9-CONTRACT: [[COPY3:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr3 + ; GFX9-CONTRACT: [[CONCAT_VECTORS1:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[COPY2]](<2 x s16>), [[COPY3]](<2 x s16>) + ; GFX9-CONTRACT: [[COPY4:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4 + ; GFX9-CONTRACT: [[COPY5:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr5 + ; GFX9-CONTRACT: [[CONCAT_VECTORS2:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[COPY4]](<2 x s16>), [[COPY5]](<2 x s16>) + ; GFX9-CONTRACT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; GFX9-CONTRACT: [[FMUL:%[0-9]+]]:_(<4 x s16>) = reassoc G_FMUL [[CONCAT_VECTORS]], [[CONCAT_VECTORS1]] + ; GFX9-CONTRACT: [[FADD:%[0-9]+]]:_(<4 x s16>) = reassoc G_FADD [[FMUL]], [[CONCAT_VECTORS2]] + ; GFX9-CONTRACT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[FADD]](<4 x s16>) + ; GFX9-CONTRACT: $vgpr0 = COPY [[UV]](<2 x s16>) + ; GFX9-CONTRACT: $vgpr1 = COPY [[UV1]](<2 x s16>) + ; GFX9-CONTRACT: [[COPY7:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY6]] + ; GFX9-CONTRACT: S_SETPC_B64_return [[COPY7]], implicit $vgpr0, implicit $vgpr1 + ; GFX9-DENORM-LABEL: name: test_4xhalf_add_mul + ; GFX9-DENORM: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $sgpr30_sgpr31 + ; GFX9-DENORM: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 + ; GFX9-DENORM: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 + ; GFX9-DENORM: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[COPY]](<2 x s16>), [[COPY1]](<2 x s16>) + ; GFX9-DENORM: [[COPY2:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2 + ; GFX9-DENORM: [[COPY3:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr3 + ; GFX9-DENORM: [[CONCAT_VECTORS1:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[COPY2]](<2 x s16>), [[COPY3]](<2 x s16>) + ; GFX9-DENORM: [[COPY4:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4 + ; GFX9-DENORM: [[COPY5:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr5 + ; GFX9-DENORM: [[CONCAT_VECTORS2:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[COPY4]](<2 x s16>), [[COPY5]](<2 x s16>) + ; GFX9-DENORM: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; GFX9-DENORM: [[FMUL:%[0-9]+]]:_(<4 x s16>) = reassoc G_FMUL [[CONCAT_VECTORS]], [[CONCAT_VECTORS1]] + ; GFX9-DENORM: [[FADD:%[0-9]+]]:_(<4 x s16>) = reassoc G_FADD [[FMUL]], [[CONCAT_VECTORS2]] + ; GFX9-DENORM: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[FADD]](<4 x s16>) + ; GFX9-DENORM: $vgpr0 = COPY [[UV]](<2 x s16>) + ; GFX9-DENORM: $vgpr1 = COPY [[UV1]](<2 x s16>) + ; GFX9-DENORM: [[COPY7:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY6]] + ; GFX9-DENORM: S_SETPC_B64_return [[COPY7]], implicit $vgpr0, implicit $vgpr1 + ; GFX10-LABEL: name: test_4xhalf_add_mul + ; GFX10: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $sgpr30_sgpr31 + ; GFX10: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 + ; GFX10: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 + ; GFX10: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[COPY]](<2 x s16>), [[COPY1]](<2 x s16>) + ; GFX10: [[COPY2:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2 + ; GFX10: [[COPY3:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr3 + ; GFX10: [[CONCAT_VECTORS1:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[COPY2]](<2 x s16>), [[COPY3]](<2 x s16>) + ; GFX10: [[COPY4:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4 + ; GFX10: [[COPY5:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr5 + ; GFX10: [[CONCAT_VECTORS2:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[COPY4]](<2 x s16>), [[COPY5]](<2 x s16>) + ; GFX10: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; GFX10: [[FMUL:%[0-9]+]]:_(<4 x s16>) = reassoc G_FMUL [[CONCAT_VECTORS]], [[CONCAT_VECTORS1]] + ; GFX10: [[FADD:%[0-9]+]]:_(<4 x s16>) = reassoc G_FADD [[FMUL]], [[CONCAT_VECTORS2]] + ; GFX10: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[FADD]](<4 x s16>) + ; GFX10: $vgpr0 = COPY [[UV]](<2 x s16>) + ; GFX10: $vgpr1 = COPY [[UV1]](<2 x s16>) + ; GFX10: [[COPY7:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY6]] + ; GFX10: S_SETPC_B64_return [[COPY7]], implicit $vgpr0, implicit $vgpr1 + ; GFX10-CONTRACT-LABEL: name: test_4xhalf_add_mul + ; GFX10-CONTRACT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $sgpr30_sgpr31 + ; GFX10-CONTRACT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 + ; GFX10-CONTRACT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 + ; GFX10-CONTRACT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[COPY]](<2 x s16>), [[COPY1]](<2 x s16>) + ; GFX10-CONTRACT: [[COPY2:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2 + ; GFX10-CONTRACT: [[COPY3:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr3 + ; GFX10-CONTRACT: [[CONCAT_VECTORS1:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[COPY2]](<2 x s16>), [[COPY3]](<2 x s16>) + ; GFX10-CONTRACT: [[COPY4:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4 + ; GFX10-CONTRACT: [[COPY5:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr5 + ; GFX10-CONTRACT: [[CONCAT_VECTORS2:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[COPY4]](<2 x s16>), [[COPY5]](<2 x s16>) + ; GFX10-CONTRACT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; GFX10-CONTRACT: [[FMUL:%[0-9]+]]:_(<4 x s16>) = reassoc G_FMUL [[CONCAT_VECTORS]], [[CONCAT_VECTORS1]] + ; GFX10-CONTRACT: [[FADD:%[0-9]+]]:_(<4 x s16>) = reassoc G_FADD [[FMUL]], [[CONCAT_VECTORS2]] + ; GFX10-CONTRACT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[FADD]](<4 x s16>) + ; GFX10-CONTRACT: $vgpr0 = COPY [[UV]](<2 x s16>) + ; GFX10-CONTRACT: $vgpr1 = COPY [[UV1]](<2 x s16>) + ; GFX10-CONTRACT: [[COPY7:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY6]] + ; GFX10-CONTRACT: S_SETPC_B64_return [[COPY7]], implicit $vgpr0, implicit $vgpr1 + ; GFX10-DENORM-LABEL: name: test_4xhalf_add_mul + ; GFX10-DENORM: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $sgpr30_sgpr31 + ; GFX10-DENORM: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 + ; GFX10-DENORM: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 + ; GFX10-DENORM: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[COPY]](<2 x s16>), [[COPY1]](<2 x s16>) + ; GFX10-DENORM: [[COPY2:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2 + ; GFX10-DENORM: [[COPY3:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr3 + ; GFX10-DENORM: [[CONCAT_VECTORS1:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[COPY2]](<2 x s16>), [[COPY3]](<2 x s16>) + ; GFX10-DENORM: [[COPY4:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4 + ; GFX10-DENORM: [[COPY5:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr5 + ; GFX10-DENORM: [[CONCAT_VECTORS2:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[COPY4]](<2 x s16>), [[COPY5]](<2 x s16>) + ; GFX10-DENORM: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; GFX10-DENORM: [[FMUL:%[0-9]+]]:_(<4 x s16>) = reassoc G_FMUL [[CONCAT_VECTORS]], [[CONCAT_VECTORS1]] + ; GFX10-DENORM: [[FADD:%[0-9]+]]:_(<4 x s16>) = reassoc G_FADD [[FMUL]], [[CONCAT_VECTORS2]] + ; GFX10-DENORM: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[FADD]](<4 x s16>) + ; GFX10-DENORM: $vgpr0 = COPY [[UV]](<2 x s16>) + ; GFX10-DENORM: $vgpr1 = COPY [[UV1]](<2 x s16>) + ; GFX10-DENORM: [[COPY7:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY6]] + ; GFX10-DENORM: S_SETPC_B64_return [[COPY7]], implicit $vgpr0, implicit $vgpr1 + %4:_(<2 x s16>) = COPY $vgpr0 + %5:_(<2 x s16>) = COPY $vgpr1 + %0:_(<4 x s16>) = G_CONCAT_VECTORS %4(<2 x s16>), %5(<2 x s16>) + %6:_(<2 x s16>) = COPY $vgpr2 + %7:_(<2 x s16>) = COPY $vgpr3 + %1:_(<4 x s16>) = G_CONCAT_VECTORS %6(<2 x s16>), %7(<2 x s16>) + %8:_(<2 x s16>) = COPY $vgpr4 + %9:_(<2 x s16>) = COPY $vgpr5 + %2:_(<4 x s16>) = G_CONCAT_VECTORS %8(<2 x s16>), %9(<2 x s16>) + %3:sgpr_64 = COPY $sgpr30_sgpr31 + %10:_(<4 x s16>) = reassoc G_FMUL %0, %1 + %11:_(<4 x s16>) = reassoc G_FADD %10, %2 + %13:_(<2 x s16>), %14:_(<2 x s16>) = G_UNMERGE_VALUES %11(<4 x s16>) + $vgpr0 = COPY %13(<2 x s16>) + $vgpr1 = COPY %14(<2 x s16>) + %12:ccr_sgpr_64 = COPY %3 + S_SETPC_B64_return %12, implicit $vgpr0, implicit $vgpr1 + +... +--- +name: test_3xhalf_add_mul_rhs +alignment: 1 +tracksRegLiveness: true +registers: + - { id: 0, class: _ } + - { id: 1, class: _ } + - { id: 2, class: _ } + - { id: 3, class: sgpr_64 } + - { id: 4, class: _ } + - { id: 5, class: _ } + - { id: 6, class: _ } + - { id: 7, class: _ } + - { id: 8, class: _ } + - { id: 9, class: _ } + - { id: 10, class: _ } + - { id: 11, class: _ } + - { id: 12, class: _ } + - { id: 13, class: _ } + - { id: 14, class: _ } + - { id: 15, class: _ } + - { id: 16, class: _ } + - { id: 17, class: _ } + - { id: 18, class: _ } + - { id: 19, class: ccr_sgpr_64 } + - { id: 20, class: _ } + - { id: 21, class: _ } + - { id: 22, class: _ } + - { id: 23, class: _ } + - { id: 24, class: _ } +liveins: + - { reg: '$sgpr30_sgpr31', virtual-reg: '%3' } +frameInfo: + maxAlignment: 1 +machineFunctionInfo: + maxKernArgAlign: 1 + scratchRSrcReg: '$sgpr0_sgpr1_sgpr2_sgpr3' + frameOffsetReg: '$sgpr33' + stackPtrOffsetReg: '$sgpr32' + argumentInfo: + privateSegmentBuffer: { reg: '$sgpr0_sgpr1_sgpr2_sgpr3' } + occupancy: 10 +body: | + bb.1.entry: + liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $sgpr30_sgpr31 + + ; GFX9-LABEL: name: test_3xhalf_add_mul_rhs + ; GFX9: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $sgpr30_sgpr31 + ; GFX9: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 + ; GFX9: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 + ; GFX9: [[DEF:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF + ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[COPY]](<2 x s16>), [[COPY1]](<2 x s16>), [[DEF]](<2 x s16>) + ; GFX9: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[CONCAT_VECTORS]](<6 x s16>) + ; GFX9: [[TRUNC:%[0-9]+]]:_(s48) = G_TRUNC [[BITCAST]](s96) + ; GFX9: [[BITCAST1:%[0-9]+]]:_(<3 x s16>) = G_BITCAST [[TRUNC]](s48) + ; GFX9: [[COPY2:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2 + ; GFX9: [[COPY3:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr3 + ; GFX9: [[CONCAT_VECTORS1:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[COPY2]](<2 x s16>), [[COPY3]](<2 x s16>), [[DEF]](<2 x s16>) + ; GFX9: [[BITCAST2:%[0-9]+]]:_(s96) = G_BITCAST [[CONCAT_VECTORS1]](<6 x s16>) + ; GFX9: [[TRUNC1:%[0-9]+]]:_(s48) = G_TRUNC [[BITCAST2]](s96) + ; GFX9: [[BITCAST3:%[0-9]+]]:_(<3 x s16>) = G_BITCAST [[TRUNC1]](s48) + ; GFX9: [[COPY4:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4 + ; GFX9: [[COPY5:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr5 + ; GFX9: [[CONCAT_VECTORS2:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[COPY4]](<2 x s16>), [[COPY5]](<2 x s16>), [[DEF]](<2 x s16>) + ; GFX9: [[BITCAST4:%[0-9]+]]:_(s96) = G_BITCAST [[CONCAT_VECTORS2]](<6 x s16>) + ; GFX9: [[TRUNC2:%[0-9]+]]:_(s48) = G_TRUNC [[BITCAST4]](s96) + ; GFX9: [[BITCAST5:%[0-9]+]]:_(<3 x s16>) = G_BITCAST [[TRUNC2]](s48) + ; GFX9: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; GFX9: [[FMUL:%[0-9]+]]:_(<3 x s16>) = reassoc G_FMUL [[BITCAST1]], [[BITCAST3]] + ; GFX9: [[FADD:%[0-9]+]]:_(<3 x s16>) = reassoc G_FADD [[BITCAST5]], [[FMUL]] + ; GFX9: [[DEF1:%[0-9]+]]:_(<3 x s16>) = G_IMPLICIT_DEF + ; GFX9: [[CONCAT_VECTORS3:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[FADD]](<3 x s16>), [[DEF1]](<3 x s16>) + ; GFX9: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS3]](<6 x s16>) + ; GFX9: $vgpr0 = COPY [[UV]](<2 x s16>) + ; GFX9: $vgpr1 = COPY [[UV1]](<2 x s16>) + ; GFX9: [[COPY7:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY6]] + ; GFX9: S_SETPC_B64_return [[COPY7]], implicit $vgpr0, implicit $vgpr1 + ; GFX9-CONTRACT-LABEL: name: test_3xhalf_add_mul_rhs + ; GFX9-CONTRACT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $sgpr30_sgpr31 + ; GFX9-CONTRACT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 + ; GFX9-CONTRACT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 + ; GFX9-CONTRACT: [[DEF:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF + ; GFX9-CONTRACT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[COPY]](<2 x s16>), [[COPY1]](<2 x s16>), [[DEF]](<2 x s16>) + ; GFX9-CONTRACT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[CONCAT_VECTORS]](<6 x s16>) + ; GFX9-CONTRACT: [[TRUNC:%[0-9]+]]:_(s48) = G_TRUNC [[BITCAST]](s96) + ; GFX9-CONTRACT: [[BITCAST1:%[0-9]+]]:_(<3 x s16>) = G_BITCAST [[TRUNC]](s48) + ; GFX9-CONTRACT: [[COPY2:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2 + ; GFX9-CONTRACT: [[COPY3:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr3 + ; GFX9-CONTRACT: [[CONCAT_VECTORS1:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[COPY2]](<2 x s16>), [[COPY3]](<2 x s16>), [[DEF]](<2 x s16>) + ; GFX9-CONTRACT: [[BITCAST2:%[0-9]+]]:_(s96) = G_BITCAST [[CONCAT_VECTORS1]](<6 x s16>) + ; GFX9-CONTRACT: [[TRUNC1:%[0-9]+]]:_(s48) = G_TRUNC [[BITCAST2]](s96) + ; GFX9-CONTRACT: [[BITCAST3:%[0-9]+]]:_(<3 x s16>) = G_BITCAST [[TRUNC1]](s48) + ; GFX9-CONTRACT: [[COPY4:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4 + ; GFX9-CONTRACT: [[COPY5:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr5 + ; GFX9-CONTRACT: [[CONCAT_VECTORS2:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[COPY4]](<2 x s16>), [[COPY5]](<2 x s16>), [[DEF]](<2 x s16>) + ; GFX9-CONTRACT: [[BITCAST4:%[0-9]+]]:_(s96) = G_BITCAST [[CONCAT_VECTORS2]](<6 x s16>) + ; GFX9-CONTRACT: [[TRUNC2:%[0-9]+]]:_(s48) = G_TRUNC [[BITCAST4]](s96) + ; GFX9-CONTRACT: [[BITCAST5:%[0-9]+]]:_(<3 x s16>) = G_BITCAST [[TRUNC2]](s48) + ; GFX9-CONTRACT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; GFX9-CONTRACT: [[FMUL:%[0-9]+]]:_(<3 x s16>) = reassoc G_FMUL [[BITCAST1]], [[BITCAST3]] + ; GFX9-CONTRACT: [[FADD:%[0-9]+]]:_(<3 x s16>) = reassoc G_FADD [[BITCAST5]], [[FMUL]] + ; GFX9-CONTRACT: [[DEF1:%[0-9]+]]:_(<3 x s16>) = G_IMPLICIT_DEF + ; GFX9-CONTRACT: [[CONCAT_VECTORS3:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[FADD]](<3 x s16>), [[DEF1]](<3 x s16>) + ; GFX9-CONTRACT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS3]](<6 x s16>) + ; GFX9-CONTRACT: $vgpr0 = COPY [[UV]](<2 x s16>) + ; GFX9-CONTRACT: $vgpr1 = COPY [[UV1]](<2 x s16>) + ; GFX9-CONTRACT: [[COPY7:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY6]] + ; GFX9-CONTRACT: S_SETPC_B64_return [[COPY7]], implicit $vgpr0, implicit $vgpr1 + ; GFX9-DENORM-LABEL: name: test_3xhalf_add_mul_rhs + ; GFX9-DENORM: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $sgpr30_sgpr31 + ; GFX9-DENORM: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 + ; GFX9-DENORM: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 + ; GFX9-DENORM: [[DEF:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF + ; GFX9-DENORM: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[COPY]](<2 x s16>), [[COPY1]](<2 x s16>), [[DEF]](<2 x s16>) + ; GFX9-DENORM: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[CONCAT_VECTORS]](<6 x s16>) + ; GFX9-DENORM: [[TRUNC:%[0-9]+]]:_(s48) = G_TRUNC [[BITCAST]](s96) + ; GFX9-DENORM: [[BITCAST1:%[0-9]+]]:_(<3 x s16>) = G_BITCAST [[TRUNC]](s48) + ; GFX9-DENORM: [[COPY2:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2 + ; GFX9-DENORM: [[COPY3:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr3 + ; GFX9-DENORM: [[CONCAT_VECTORS1:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[COPY2]](<2 x s16>), [[COPY3]](<2 x s16>), [[DEF]](<2 x s16>) + ; GFX9-DENORM: [[BITCAST2:%[0-9]+]]:_(s96) = G_BITCAST [[CONCAT_VECTORS1]](<6 x s16>) + ; GFX9-DENORM: [[TRUNC1:%[0-9]+]]:_(s48) = G_TRUNC [[BITCAST2]](s96) + ; GFX9-DENORM: [[BITCAST3:%[0-9]+]]:_(<3 x s16>) = G_BITCAST [[TRUNC1]](s48) + ; GFX9-DENORM: [[COPY4:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4 + ; GFX9-DENORM: [[COPY5:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr5 + ; GFX9-DENORM: [[CONCAT_VECTORS2:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[COPY4]](<2 x s16>), [[COPY5]](<2 x s16>), [[DEF]](<2 x s16>) + ; GFX9-DENORM: [[BITCAST4:%[0-9]+]]:_(s96) = G_BITCAST [[CONCAT_VECTORS2]](<6 x s16>) + ; GFX9-DENORM: [[TRUNC2:%[0-9]+]]:_(s48) = G_TRUNC [[BITCAST4]](s96) + ; GFX9-DENORM: [[BITCAST5:%[0-9]+]]:_(<3 x s16>) = G_BITCAST [[TRUNC2]](s48) + ; GFX9-DENORM: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; GFX9-DENORM: [[FMUL:%[0-9]+]]:_(<3 x s16>) = reassoc G_FMUL [[BITCAST1]], [[BITCAST3]] + ; GFX9-DENORM: [[FADD:%[0-9]+]]:_(<3 x s16>) = reassoc G_FADD [[BITCAST5]], [[FMUL]] + ; GFX9-DENORM: [[DEF1:%[0-9]+]]:_(<3 x s16>) = G_IMPLICIT_DEF + ; GFX9-DENORM: [[CONCAT_VECTORS3:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[FADD]](<3 x s16>), [[DEF1]](<3 x s16>) + ; GFX9-DENORM: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS3]](<6 x s16>) + ; GFX9-DENORM: $vgpr0 = COPY [[UV]](<2 x s16>) + ; GFX9-DENORM: $vgpr1 = COPY [[UV1]](<2 x s16>) + ; GFX9-DENORM: [[COPY7:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY6]] + ; GFX9-DENORM: S_SETPC_B64_return [[COPY7]], implicit $vgpr0, implicit $vgpr1 + ; GFX10-LABEL: name: test_3xhalf_add_mul_rhs + ; GFX10: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $sgpr30_sgpr31 + ; GFX10: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 + ; GFX10: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 + ; GFX10: [[DEF:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF + ; GFX10: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[COPY]](<2 x s16>), [[COPY1]](<2 x s16>), [[DEF]](<2 x s16>) + ; GFX10: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[CONCAT_VECTORS]](<6 x s16>) + ; GFX10: [[TRUNC:%[0-9]+]]:_(s48) = G_TRUNC [[BITCAST]](s96) + ; GFX10: [[BITCAST1:%[0-9]+]]:_(<3 x s16>) = G_BITCAST [[TRUNC]](s48) + ; GFX10: [[COPY2:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2 + ; GFX10: [[COPY3:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr3 + ; GFX10: [[CONCAT_VECTORS1:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[COPY2]](<2 x s16>), [[COPY3]](<2 x s16>), [[DEF]](<2 x s16>) + ; GFX10: [[BITCAST2:%[0-9]+]]:_(s96) = G_BITCAST [[CONCAT_VECTORS1]](<6 x s16>) + ; GFX10: [[TRUNC1:%[0-9]+]]:_(s48) = G_TRUNC [[BITCAST2]](s96) + ; GFX10: [[BITCAST3:%[0-9]+]]:_(<3 x s16>) = G_BITCAST [[TRUNC1]](s48) + ; GFX10: [[COPY4:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4 + ; GFX10: [[COPY5:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr5 + ; GFX10: [[CONCAT_VECTORS2:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[COPY4]](<2 x s16>), [[COPY5]](<2 x s16>), [[DEF]](<2 x s16>) + ; GFX10: [[BITCAST4:%[0-9]+]]:_(s96) = G_BITCAST [[CONCAT_VECTORS2]](<6 x s16>) + ; GFX10: [[TRUNC2:%[0-9]+]]:_(s48) = G_TRUNC [[BITCAST4]](s96) + ; GFX10: [[BITCAST5:%[0-9]+]]:_(<3 x s16>) = G_BITCAST [[TRUNC2]](s48) + ; GFX10: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; GFX10: [[FMUL:%[0-9]+]]:_(<3 x s16>) = reassoc G_FMUL [[BITCAST1]], [[BITCAST3]] + ; GFX10: [[FADD:%[0-9]+]]:_(<3 x s16>) = reassoc G_FADD [[BITCAST5]], [[FMUL]] + ; GFX10: [[DEF1:%[0-9]+]]:_(<3 x s16>) = G_IMPLICIT_DEF + ; GFX10: [[CONCAT_VECTORS3:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[FADD]](<3 x s16>), [[DEF1]](<3 x s16>) + ; GFX10: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS3]](<6 x s16>) + ; GFX10: $vgpr0 = COPY [[UV]](<2 x s16>) + ; GFX10: $vgpr1 = COPY [[UV1]](<2 x s16>) + ; GFX10: [[COPY7:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY6]] + ; GFX10: S_SETPC_B64_return [[COPY7]], implicit $vgpr0, implicit $vgpr1 + ; GFX10-CONTRACT-LABEL: name: test_3xhalf_add_mul_rhs + ; GFX10-CONTRACT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $sgpr30_sgpr31 + ; GFX10-CONTRACT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 + ; GFX10-CONTRACT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 + ; GFX10-CONTRACT: [[DEF:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF + ; GFX10-CONTRACT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[COPY]](<2 x s16>), [[COPY1]](<2 x s16>), [[DEF]](<2 x s16>) + ; GFX10-CONTRACT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[CONCAT_VECTORS]](<6 x s16>) + ; GFX10-CONTRACT: [[TRUNC:%[0-9]+]]:_(s48) = G_TRUNC [[BITCAST]](s96) + ; GFX10-CONTRACT: [[BITCAST1:%[0-9]+]]:_(<3 x s16>) = G_BITCAST [[TRUNC]](s48) + ; GFX10-CONTRACT: [[COPY2:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2 + ; GFX10-CONTRACT: [[COPY3:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr3 + ; GFX10-CONTRACT: [[CONCAT_VECTORS1:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[COPY2]](<2 x s16>), [[COPY3]](<2 x s16>), [[DEF]](<2 x s16>) + ; GFX10-CONTRACT: [[BITCAST2:%[0-9]+]]:_(s96) = G_BITCAST [[CONCAT_VECTORS1]](<6 x s16>) + ; GFX10-CONTRACT: [[TRUNC1:%[0-9]+]]:_(s48) = G_TRUNC [[BITCAST2]](s96) + ; GFX10-CONTRACT: [[BITCAST3:%[0-9]+]]:_(<3 x s16>) = G_BITCAST [[TRUNC1]](s48) + ; GFX10-CONTRACT: [[COPY4:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4 + ; GFX10-CONTRACT: [[COPY5:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr5 + ; GFX10-CONTRACT: [[CONCAT_VECTORS2:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[COPY4]](<2 x s16>), [[COPY5]](<2 x s16>), [[DEF]](<2 x s16>) + ; GFX10-CONTRACT: [[BITCAST4:%[0-9]+]]:_(s96) = G_BITCAST [[CONCAT_VECTORS2]](<6 x s16>) + ; GFX10-CONTRACT: [[TRUNC2:%[0-9]+]]:_(s48) = G_TRUNC [[BITCAST4]](s96) + ; GFX10-CONTRACT: [[BITCAST5:%[0-9]+]]:_(<3 x s16>) = G_BITCAST [[TRUNC2]](s48) + ; GFX10-CONTRACT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; GFX10-CONTRACT: [[FMUL:%[0-9]+]]:_(<3 x s16>) = reassoc G_FMUL [[BITCAST1]], [[BITCAST3]] + ; GFX10-CONTRACT: [[FADD:%[0-9]+]]:_(<3 x s16>) = reassoc G_FADD [[BITCAST5]], [[FMUL]] + ; GFX10-CONTRACT: [[DEF1:%[0-9]+]]:_(<3 x s16>) = G_IMPLICIT_DEF + ; GFX10-CONTRACT: [[CONCAT_VECTORS3:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[FADD]](<3 x s16>), [[DEF1]](<3 x s16>) + ; GFX10-CONTRACT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS3]](<6 x s16>) + ; GFX10-CONTRACT: $vgpr0 = COPY [[UV]](<2 x s16>) + ; GFX10-CONTRACT: $vgpr1 = COPY [[UV1]](<2 x s16>) + ; GFX10-CONTRACT: [[COPY7:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY6]] + ; GFX10-CONTRACT: S_SETPC_B64_return [[COPY7]], implicit $vgpr0, implicit $vgpr1 + ; GFX10-DENORM-LABEL: name: test_3xhalf_add_mul_rhs + ; GFX10-DENORM: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $sgpr30_sgpr31 + ; GFX10-DENORM: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 + ; GFX10-DENORM: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 + ; GFX10-DENORM: [[DEF:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF + ; GFX10-DENORM: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[COPY]](<2 x s16>), [[COPY1]](<2 x s16>), [[DEF]](<2 x s16>) + ; GFX10-DENORM: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[CONCAT_VECTORS]](<6 x s16>) + ; GFX10-DENORM: [[TRUNC:%[0-9]+]]:_(s48) = G_TRUNC [[BITCAST]](s96) + ; GFX10-DENORM: [[BITCAST1:%[0-9]+]]:_(<3 x s16>) = G_BITCAST [[TRUNC]](s48) + ; GFX10-DENORM: [[COPY2:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2 + ; GFX10-DENORM: [[COPY3:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr3 + ; GFX10-DENORM: [[CONCAT_VECTORS1:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[COPY2]](<2 x s16>), [[COPY3]](<2 x s16>), [[DEF]](<2 x s16>) + ; GFX10-DENORM: [[BITCAST2:%[0-9]+]]:_(s96) = G_BITCAST [[CONCAT_VECTORS1]](<6 x s16>) + ; GFX10-DENORM: [[TRUNC1:%[0-9]+]]:_(s48) = G_TRUNC [[BITCAST2]](s96) + ; GFX10-DENORM: [[BITCAST3:%[0-9]+]]:_(<3 x s16>) = G_BITCAST [[TRUNC1]](s48) + ; GFX10-DENORM: [[COPY4:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4 + ; GFX10-DENORM: [[COPY5:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr5 + ; GFX10-DENORM: [[CONCAT_VECTORS2:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[COPY4]](<2 x s16>), [[COPY5]](<2 x s16>), [[DEF]](<2 x s16>) + ; GFX10-DENORM: [[BITCAST4:%[0-9]+]]:_(s96) = G_BITCAST [[CONCAT_VECTORS2]](<6 x s16>) + ; GFX10-DENORM: [[TRUNC2:%[0-9]+]]:_(s48) = G_TRUNC [[BITCAST4]](s96) + ; GFX10-DENORM: [[BITCAST5:%[0-9]+]]:_(<3 x s16>) = G_BITCAST [[TRUNC2]](s48) + ; GFX10-DENORM: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; GFX10-DENORM: [[FMUL:%[0-9]+]]:_(<3 x s16>) = reassoc G_FMUL [[BITCAST1]], [[BITCAST3]] + ; GFX10-DENORM: [[FADD:%[0-9]+]]:_(<3 x s16>) = reassoc G_FADD [[BITCAST5]], [[FMUL]] + ; GFX10-DENORM: [[DEF1:%[0-9]+]]:_(<3 x s16>) = G_IMPLICIT_DEF + ; GFX10-DENORM: [[CONCAT_VECTORS3:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[FADD]](<3 x s16>), [[DEF1]](<3 x s16>) + ; GFX10-DENORM: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS3]](<6 x s16>) + ; GFX10-DENORM: $vgpr0 = COPY [[UV]](<2 x s16>) + ; GFX10-DENORM: $vgpr1 = COPY [[UV1]](<2 x s16>) + ; GFX10-DENORM: [[COPY7:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY6]] + ; GFX10-DENORM: S_SETPC_B64_return [[COPY7]], implicit $vgpr0, implicit $vgpr1 + %4:_(<2 x s16>) = COPY $vgpr0 + %5:_(<2 x s16>) = COPY $vgpr1 + %10:_(<2 x s16>) = G_IMPLICIT_DEF + %11:_(<6 x s16>) = G_CONCAT_VECTORS %4(<2 x s16>), %5(<2 x s16>), %10(<2 x s16>) + %0:_(<3 x s16>), %12:_(<3 x s16>) = G_UNMERGE_VALUES %11(<6 x s16>) + %6:_(<2 x s16>) = COPY $vgpr2 + %7:_(<2 x s16>) = COPY $vgpr3 + %13:_(<6 x s16>) = G_CONCAT_VECTORS %6(<2 x s16>), %7(<2 x s16>), %10(<2 x s16>) + %1:_(<3 x s16>), %14:_(<3 x s16>) = G_UNMERGE_VALUES %13(<6 x s16>) + %8:_(<2 x s16>) = COPY $vgpr4 + %9:_(<2 x s16>) = COPY $vgpr5 + %15:_(<6 x s16>) = G_CONCAT_VECTORS %8(<2 x s16>), %9(<2 x s16>), %10(<2 x s16>) + %2:_(<3 x s16>), %16:_(<3 x s16>) = G_UNMERGE_VALUES %15(<6 x s16>) + %3:sgpr_64 = COPY $sgpr30_sgpr31 + %17:_(<3 x s16>) = reassoc G_FMUL %0, %1 + %18:_(<3 x s16>) = reassoc G_FADD %2, %17 + %22:_(<3 x s16>) = G_IMPLICIT_DEF + %23:_(<6 x s16>) = G_CONCAT_VECTORS %18(<3 x s16>), %22(<3 x s16>) + %20:_(<2 x s16>), %21:_(<2 x s16>), %24:_(<2 x s16>) = G_UNMERGE_VALUES %23(<6 x s16>) + $vgpr0 = COPY %20(<2 x s16>) + $vgpr1 = COPY %21(<2 x s16>) + %19:ccr_sgpr_64 = COPY %3 + S_SETPC_B64_return %19, implicit $vgpr0, implicit $vgpr1 + +... +--- +name: test_4xdouble_add_mul +alignment: 1 +tracksRegLiveness: true +registers: + - { id: 0, class: _ } + - { id: 1, class: _ } + - { id: 2, class: _ } + - { id: 3, class: sgpr_64 } + - { id: 4, class: _ } + - { id: 5, class: _ } + - { id: 6, class: _ } + - { id: 7, class: _ } + - { id: 8, class: _ } + - { id: 9, class: _ } + - { id: 10, class: _ } + - { id: 11, class: _ } + - { id: 12, class: _ } + - { id: 13, class: _ } + - { id: 14, class: _ } + - { id: 15, class: _ } + - { id: 16, class: _ } + - { id: 17, class: _ } + - { id: 18, class: _ } + - { id: 19, class: _ } + - { id: 20, class: _ } + - { id: 21, class: _ } + - { id: 22, class: _ } + - { id: 23, class: _ } + - { id: 24, class: _ } + - { id: 25, class: _ } + - { id: 26, class: _ } + - { id: 27, class: _ } + - { id: 28, class: _ } + - { id: 29, class: _ } + - { id: 30, class: _ } + - { id: 31, class: _ } + - { id: 32, class: _ } + - { id: 33, class: _ } + - { id: 34, class: _ } + - { id: 35, class: _ } + - { id: 36, class: _ } + - { id: 37, class: _ } + - { id: 38, class: _ } + - { id: 39, class: _ } + - { id: 40, class: _ } + - { id: 41, class: _ } + - { id: 42, class: ccr_sgpr_64 } + - { id: 43, class: _ } + - { id: 44, class: _ } + - { id: 45, class: _ } + - { id: 46, class: _ } + - { id: 47, class: _ } + - { id: 48, class: _ } + - { id: 49, class: _ } + - { id: 50, class: _ } +liveins: + - { reg: '$sgpr30_sgpr31', virtual-reg: '%3' } +frameInfo: + maxAlignment: 1 +machineFunctionInfo: + maxKernArgAlign: 1 + scratchRSrcReg: '$sgpr0_sgpr1_sgpr2_sgpr3' + frameOffsetReg: '$sgpr33' + stackPtrOffsetReg: '$sgpr32' + argumentInfo: + privateSegmentBuffer: { reg: '$sgpr0_sgpr1_sgpr2_sgpr3' } + occupancy: 10 +body: | + bb.1.entry: + liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $sgpr30_sgpr31 + + ; GFX9-LABEL: name: test_4xdouble_add_mul + ; GFX9: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $sgpr30_sgpr31 + ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; GFX9: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 + ; GFX9: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 + ; GFX9: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 + ; GFX9: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6 + ; GFX9: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7 + ; GFX9: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) + ; GFX9: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) + ; GFX9: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32) + ; GFX9: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY6]](s32), [[COPY7]](s32) + ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64), [[MV2]](s64), [[MV3]](s64) + ; GFX9: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8 + ; GFX9: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr9 + ; GFX9: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr10 + ; GFX9: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr11 + ; GFX9: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr12 + ; GFX9: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr13 + ; GFX9: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr14 + ; GFX9: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr15 + ; GFX9: [[MV4:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY8]](s32), [[COPY9]](s32) + ; GFX9: [[MV5:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY10]](s32), [[COPY11]](s32) + ; GFX9: [[MV6:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY12]](s32), [[COPY13]](s32) + ; GFX9: [[MV7:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY14]](s32), [[COPY15]](s32) + ; GFX9: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[MV4]](s64), [[MV5]](s64), [[MV6]](s64), [[MV7]](s64) + ; GFX9: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr16 + ; GFX9: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr17 + ; GFX9: [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr18 + ; GFX9: [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr19 + ; GFX9: [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr20 + ; GFX9: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr21 + ; GFX9: [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr22 + ; GFX9: [[COPY23:%[0-9]+]]:_(s32) = COPY $vgpr23 + ; GFX9: [[MV8:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY16]](s32), [[COPY17]](s32) + ; GFX9: [[MV9:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY18]](s32), [[COPY19]](s32) + ; GFX9: [[MV10:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY20]](s32), [[COPY21]](s32) + ; GFX9: [[MV11:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY22]](s32), [[COPY23]](s32) + ; GFX9: [[BUILD_VECTOR2:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[MV8]](s64), [[MV9]](s64), [[MV10]](s64), [[MV11]](s64) + ; GFX9: [[COPY24:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; GFX9: [[FMUL:%[0-9]+]]:_(<4 x s64>) = reassoc G_FMUL [[BUILD_VECTOR]], [[BUILD_VECTOR1]] + ; GFX9: [[FADD:%[0-9]+]]:_(<4 x s64>) = reassoc G_FADD [[FMUL]], [[BUILD_VECTOR2]] + ; GFX9: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FADD]](<4 x s64>) + ; GFX9: $vgpr0 = COPY [[UV]](s32) + ; GFX9: $vgpr1 = COPY [[UV1]](s32) + ; GFX9: $vgpr2 = COPY [[UV2]](s32) + ; GFX9: $vgpr3 = COPY [[UV3]](s32) + ; GFX9: $vgpr4 = COPY [[UV4]](s32) + ; GFX9: $vgpr5 = COPY [[UV5]](s32) + ; GFX9: $vgpr6 = COPY [[UV6]](s32) + ; GFX9: $vgpr7 = COPY [[UV7]](s32) + ; GFX9: [[COPY25:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY24]] + ; GFX9: S_SETPC_B64_return [[COPY25]], implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 + ; GFX9-CONTRACT-LABEL: name: test_4xdouble_add_mul + ; GFX9-CONTRACT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $sgpr30_sgpr31 + ; GFX9-CONTRACT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX9-CONTRACT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX9-CONTRACT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; GFX9-CONTRACT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 + ; GFX9-CONTRACT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 + ; GFX9-CONTRACT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 + ; GFX9-CONTRACT: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6 + ; GFX9-CONTRACT: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7 + ; GFX9-CONTRACT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) + ; GFX9-CONTRACT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) + ; GFX9-CONTRACT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32) + ; GFX9-CONTRACT: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY6]](s32), [[COPY7]](s32) + ; GFX9-CONTRACT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64), [[MV2]](s64), [[MV3]](s64) + ; GFX9-CONTRACT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8 + ; GFX9-CONTRACT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr9 + ; GFX9-CONTRACT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr10 + ; GFX9-CONTRACT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr11 + ; GFX9-CONTRACT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr12 + ; GFX9-CONTRACT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr13 + ; GFX9-CONTRACT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr14 + ; GFX9-CONTRACT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr15 + ; GFX9-CONTRACT: [[MV4:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY8]](s32), [[COPY9]](s32) + ; GFX9-CONTRACT: [[MV5:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY10]](s32), [[COPY11]](s32) + ; GFX9-CONTRACT: [[MV6:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY12]](s32), [[COPY13]](s32) + ; GFX9-CONTRACT: [[MV7:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY14]](s32), [[COPY15]](s32) + ; GFX9-CONTRACT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[MV4]](s64), [[MV5]](s64), [[MV6]](s64), [[MV7]](s64) + ; GFX9-CONTRACT: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr16 + ; GFX9-CONTRACT: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr17 + ; GFX9-CONTRACT: [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr18 + ; GFX9-CONTRACT: [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr19 + ; GFX9-CONTRACT: [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr20 + ; GFX9-CONTRACT: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr21 + ; GFX9-CONTRACT: [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr22 + ; GFX9-CONTRACT: [[COPY23:%[0-9]+]]:_(s32) = COPY $vgpr23 + ; GFX9-CONTRACT: [[MV8:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY16]](s32), [[COPY17]](s32) + ; GFX9-CONTRACT: [[MV9:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY18]](s32), [[COPY19]](s32) + ; GFX9-CONTRACT: [[MV10:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY20]](s32), [[COPY21]](s32) + ; GFX9-CONTRACT: [[MV11:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY22]](s32), [[COPY23]](s32) + ; GFX9-CONTRACT: [[BUILD_VECTOR2:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[MV8]](s64), [[MV9]](s64), [[MV10]](s64), [[MV11]](s64) + ; GFX9-CONTRACT: [[COPY24:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; GFX9-CONTRACT: [[FMUL:%[0-9]+]]:_(<4 x s64>) = reassoc G_FMUL [[BUILD_VECTOR]], [[BUILD_VECTOR1]] + ; GFX9-CONTRACT: [[FADD:%[0-9]+]]:_(<4 x s64>) = reassoc G_FADD [[FMUL]], [[BUILD_VECTOR2]] + ; GFX9-CONTRACT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FADD]](<4 x s64>) + ; GFX9-CONTRACT: $vgpr0 = COPY [[UV]](s32) + ; GFX9-CONTRACT: $vgpr1 = COPY [[UV1]](s32) + ; GFX9-CONTRACT: $vgpr2 = COPY [[UV2]](s32) + ; GFX9-CONTRACT: $vgpr3 = COPY [[UV3]](s32) + ; GFX9-CONTRACT: $vgpr4 = COPY [[UV4]](s32) + ; GFX9-CONTRACT: $vgpr5 = COPY [[UV5]](s32) + ; GFX9-CONTRACT: $vgpr6 = COPY [[UV6]](s32) + ; GFX9-CONTRACT: $vgpr7 = COPY [[UV7]](s32) + ; GFX9-CONTRACT: [[COPY25:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY24]] + ; GFX9-CONTRACT: S_SETPC_B64_return [[COPY25]], implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 + ; GFX9-DENORM-LABEL: name: test_4xdouble_add_mul + ; GFX9-DENORM: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $sgpr30_sgpr31 + ; GFX9-DENORM: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX9-DENORM: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX9-DENORM: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; GFX9-DENORM: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 + ; GFX9-DENORM: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 + ; GFX9-DENORM: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 + ; GFX9-DENORM: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6 + ; GFX9-DENORM: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7 + ; GFX9-DENORM: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) + ; GFX9-DENORM: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) + ; GFX9-DENORM: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32) + ; GFX9-DENORM: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY6]](s32), [[COPY7]](s32) + ; GFX9-DENORM: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64), [[MV2]](s64), [[MV3]](s64) + ; GFX9-DENORM: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8 + ; GFX9-DENORM: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr9 + ; GFX9-DENORM: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr10 + ; GFX9-DENORM: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr11 + ; GFX9-DENORM: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr12 + ; GFX9-DENORM: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr13 + ; GFX9-DENORM: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr14 + ; GFX9-DENORM: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr15 + ; GFX9-DENORM: [[MV4:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY8]](s32), [[COPY9]](s32) + ; GFX9-DENORM: [[MV5:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY10]](s32), [[COPY11]](s32) + ; GFX9-DENORM: [[MV6:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY12]](s32), [[COPY13]](s32) + ; GFX9-DENORM: [[MV7:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY14]](s32), [[COPY15]](s32) + ; GFX9-DENORM: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[MV4]](s64), [[MV5]](s64), [[MV6]](s64), [[MV7]](s64) + ; GFX9-DENORM: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr16 + ; GFX9-DENORM: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr17 + ; GFX9-DENORM: [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr18 + ; GFX9-DENORM: [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr19 + ; GFX9-DENORM: [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr20 + ; GFX9-DENORM: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr21 + ; GFX9-DENORM: [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr22 + ; GFX9-DENORM: [[COPY23:%[0-9]+]]:_(s32) = COPY $vgpr23 + ; GFX9-DENORM: [[MV8:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY16]](s32), [[COPY17]](s32) + ; GFX9-DENORM: [[MV9:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY18]](s32), [[COPY19]](s32) + ; GFX9-DENORM: [[MV10:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY20]](s32), [[COPY21]](s32) + ; GFX9-DENORM: [[MV11:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY22]](s32), [[COPY23]](s32) + ; GFX9-DENORM: [[BUILD_VECTOR2:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[MV8]](s64), [[MV9]](s64), [[MV10]](s64), [[MV11]](s64) + ; GFX9-DENORM: [[COPY24:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; GFX9-DENORM: [[FMUL:%[0-9]+]]:_(<4 x s64>) = reassoc G_FMUL [[BUILD_VECTOR]], [[BUILD_VECTOR1]] + ; GFX9-DENORM: [[FADD:%[0-9]+]]:_(<4 x s64>) = reassoc G_FADD [[FMUL]], [[BUILD_VECTOR2]] + ; GFX9-DENORM: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FADD]](<4 x s64>) + ; GFX9-DENORM: $vgpr0 = COPY [[UV]](s32) + ; GFX9-DENORM: $vgpr1 = COPY [[UV1]](s32) + ; GFX9-DENORM: $vgpr2 = COPY [[UV2]](s32) + ; GFX9-DENORM: $vgpr3 = COPY [[UV3]](s32) + ; GFX9-DENORM: $vgpr4 = COPY [[UV4]](s32) + ; GFX9-DENORM: $vgpr5 = COPY [[UV5]](s32) + ; GFX9-DENORM: $vgpr6 = COPY [[UV6]](s32) + ; GFX9-DENORM: $vgpr7 = COPY [[UV7]](s32) + ; GFX9-DENORM: [[COPY25:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY24]] + ; GFX9-DENORM: S_SETPC_B64_return [[COPY25]], implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 + ; GFX10-LABEL: name: test_4xdouble_add_mul + ; GFX10: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $sgpr30_sgpr31 + ; GFX10: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX10: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX10: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; GFX10: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 + ; GFX10: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 + ; GFX10: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 + ; GFX10: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6 + ; GFX10: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7 + ; GFX10: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) + ; GFX10: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) + ; GFX10: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32) + ; GFX10: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY6]](s32), [[COPY7]](s32) + ; GFX10: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64), [[MV2]](s64), [[MV3]](s64) + ; GFX10: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8 + ; GFX10: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr9 + ; GFX10: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr10 + ; GFX10: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr11 + ; GFX10: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr12 + ; GFX10: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr13 + ; GFX10: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr14 + ; GFX10: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr15 + ; GFX10: [[MV4:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY8]](s32), [[COPY9]](s32) + ; GFX10: [[MV5:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY10]](s32), [[COPY11]](s32) + ; GFX10: [[MV6:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY12]](s32), [[COPY13]](s32) + ; GFX10: [[MV7:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY14]](s32), [[COPY15]](s32) + ; GFX10: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[MV4]](s64), [[MV5]](s64), [[MV6]](s64), [[MV7]](s64) + ; GFX10: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr16 + ; GFX10: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr17 + ; GFX10: [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr18 + ; GFX10: [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr19 + ; GFX10: [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr20 + ; GFX10: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr21 + ; GFX10: [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr22 + ; GFX10: [[COPY23:%[0-9]+]]:_(s32) = COPY $vgpr23 + ; GFX10: [[MV8:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY16]](s32), [[COPY17]](s32) + ; GFX10: [[MV9:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY18]](s32), [[COPY19]](s32) + ; GFX10: [[MV10:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY20]](s32), [[COPY21]](s32) + ; GFX10: [[MV11:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY22]](s32), [[COPY23]](s32) + ; GFX10: [[BUILD_VECTOR2:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[MV8]](s64), [[MV9]](s64), [[MV10]](s64), [[MV11]](s64) + ; GFX10: [[COPY24:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; GFX10: [[FMUL:%[0-9]+]]:_(<4 x s64>) = reassoc G_FMUL [[BUILD_VECTOR]], [[BUILD_VECTOR1]] + ; GFX10: [[FADD:%[0-9]+]]:_(<4 x s64>) = reassoc G_FADD [[FMUL]], [[BUILD_VECTOR2]] + ; GFX10: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FADD]](<4 x s64>) + ; GFX10: $vgpr0 = COPY [[UV]](s32) + ; GFX10: $vgpr1 = COPY [[UV1]](s32) + ; GFX10: $vgpr2 = COPY [[UV2]](s32) + ; GFX10: $vgpr3 = COPY [[UV3]](s32) + ; GFX10: $vgpr4 = COPY [[UV4]](s32) + ; GFX10: $vgpr5 = COPY [[UV5]](s32) + ; GFX10: $vgpr6 = COPY [[UV6]](s32) + ; GFX10: $vgpr7 = COPY [[UV7]](s32) + ; GFX10: [[COPY25:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY24]] + ; GFX10: S_SETPC_B64_return [[COPY25]], implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 + ; GFX10-CONTRACT-LABEL: name: test_4xdouble_add_mul + ; GFX10-CONTRACT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $sgpr30_sgpr31 + ; GFX10-CONTRACT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX10-CONTRACT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX10-CONTRACT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; GFX10-CONTRACT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 + ; GFX10-CONTRACT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 + ; GFX10-CONTRACT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 + ; GFX10-CONTRACT: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6 + ; GFX10-CONTRACT: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7 + ; GFX10-CONTRACT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) + ; GFX10-CONTRACT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) + ; GFX10-CONTRACT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32) + ; GFX10-CONTRACT: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY6]](s32), [[COPY7]](s32) + ; GFX10-CONTRACT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64), [[MV2]](s64), [[MV3]](s64) + ; GFX10-CONTRACT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8 + ; GFX10-CONTRACT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr9 + ; GFX10-CONTRACT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr10 + ; GFX10-CONTRACT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr11 + ; GFX10-CONTRACT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr12 + ; GFX10-CONTRACT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr13 + ; GFX10-CONTRACT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr14 + ; GFX10-CONTRACT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr15 + ; GFX10-CONTRACT: [[MV4:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY8]](s32), [[COPY9]](s32) + ; GFX10-CONTRACT: [[MV5:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY10]](s32), [[COPY11]](s32) + ; GFX10-CONTRACT: [[MV6:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY12]](s32), [[COPY13]](s32) + ; GFX10-CONTRACT: [[MV7:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY14]](s32), [[COPY15]](s32) + ; GFX10-CONTRACT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[MV4]](s64), [[MV5]](s64), [[MV6]](s64), [[MV7]](s64) + ; GFX10-CONTRACT: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr16 + ; GFX10-CONTRACT: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr17 + ; GFX10-CONTRACT: [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr18 + ; GFX10-CONTRACT: [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr19 + ; GFX10-CONTRACT: [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr20 + ; GFX10-CONTRACT: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr21 + ; GFX10-CONTRACT: [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr22 + ; GFX10-CONTRACT: [[COPY23:%[0-9]+]]:_(s32) = COPY $vgpr23 + ; GFX10-CONTRACT: [[MV8:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY16]](s32), [[COPY17]](s32) + ; GFX10-CONTRACT: [[MV9:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY18]](s32), [[COPY19]](s32) + ; GFX10-CONTRACT: [[MV10:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY20]](s32), [[COPY21]](s32) + ; GFX10-CONTRACT: [[MV11:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY22]](s32), [[COPY23]](s32) + ; GFX10-CONTRACT: [[BUILD_VECTOR2:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[MV8]](s64), [[MV9]](s64), [[MV10]](s64), [[MV11]](s64) + ; GFX10-CONTRACT: [[COPY24:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; GFX10-CONTRACT: [[FMUL:%[0-9]+]]:_(<4 x s64>) = reassoc G_FMUL [[BUILD_VECTOR]], [[BUILD_VECTOR1]] + ; GFX10-CONTRACT: [[FADD:%[0-9]+]]:_(<4 x s64>) = reassoc G_FADD [[FMUL]], [[BUILD_VECTOR2]] + ; GFX10-CONTRACT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FADD]](<4 x s64>) + ; GFX10-CONTRACT: $vgpr0 = COPY [[UV]](s32) + ; GFX10-CONTRACT: $vgpr1 = COPY [[UV1]](s32) + ; GFX10-CONTRACT: $vgpr2 = COPY [[UV2]](s32) + ; GFX10-CONTRACT: $vgpr3 = COPY [[UV3]](s32) + ; GFX10-CONTRACT: $vgpr4 = COPY [[UV4]](s32) + ; GFX10-CONTRACT: $vgpr5 = COPY [[UV5]](s32) + ; GFX10-CONTRACT: $vgpr6 = COPY [[UV6]](s32) + ; GFX10-CONTRACT: $vgpr7 = COPY [[UV7]](s32) + ; GFX10-CONTRACT: [[COPY25:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY24]] + ; GFX10-CONTRACT: S_SETPC_B64_return [[COPY25]], implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 + ; GFX10-DENORM-LABEL: name: test_4xdouble_add_mul + ; GFX10-DENORM: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $sgpr30_sgpr31 + ; GFX10-DENORM: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX10-DENORM: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX10-DENORM: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; GFX10-DENORM: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 + ; GFX10-DENORM: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 + ; GFX10-DENORM: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 + ; GFX10-DENORM: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6 + ; GFX10-DENORM: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7 + ; GFX10-DENORM: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) + ; GFX10-DENORM: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) + ; GFX10-DENORM: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32) + ; GFX10-DENORM: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY6]](s32), [[COPY7]](s32) + ; GFX10-DENORM: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64), [[MV2]](s64), [[MV3]](s64) + ; GFX10-DENORM: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8 + ; GFX10-DENORM: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr9 + ; GFX10-DENORM: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr10 + ; GFX10-DENORM: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr11 + ; GFX10-DENORM: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr12 + ; GFX10-DENORM: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr13 + ; GFX10-DENORM: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr14 + ; GFX10-DENORM: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr15 + ; GFX10-DENORM: [[MV4:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY8]](s32), [[COPY9]](s32) + ; GFX10-DENORM: [[MV5:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY10]](s32), [[COPY11]](s32) + ; GFX10-DENORM: [[MV6:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY12]](s32), [[COPY13]](s32) + ; GFX10-DENORM: [[MV7:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY14]](s32), [[COPY15]](s32) + ; GFX10-DENORM: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[MV4]](s64), [[MV5]](s64), [[MV6]](s64), [[MV7]](s64) + ; GFX10-DENORM: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr16 + ; GFX10-DENORM: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr17 + ; GFX10-DENORM: [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr18 + ; GFX10-DENORM: [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr19 + ; GFX10-DENORM: [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr20 + ; GFX10-DENORM: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr21 + ; GFX10-DENORM: [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr22 + ; GFX10-DENORM: [[COPY23:%[0-9]+]]:_(s32) = COPY $vgpr23 + ; GFX10-DENORM: [[MV8:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY16]](s32), [[COPY17]](s32) + ; GFX10-DENORM: [[MV9:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY18]](s32), [[COPY19]](s32) + ; GFX10-DENORM: [[MV10:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY20]](s32), [[COPY21]](s32) + ; GFX10-DENORM: [[MV11:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY22]](s32), [[COPY23]](s32) + ; GFX10-DENORM: [[BUILD_VECTOR2:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[MV8]](s64), [[MV9]](s64), [[MV10]](s64), [[MV11]](s64) + ; GFX10-DENORM: [[COPY24:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; GFX10-DENORM: [[FMUL:%[0-9]+]]:_(<4 x s64>) = reassoc G_FMUL [[BUILD_VECTOR]], [[BUILD_VECTOR1]] + ; GFX10-DENORM: [[FADD:%[0-9]+]]:_(<4 x s64>) = reassoc G_FADD [[FMUL]], [[BUILD_VECTOR2]] + ; GFX10-DENORM: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FADD]](<4 x s64>) + ; GFX10-DENORM: $vgpr0 = COPY [[UV]](s32) + ; GFX10-DENORM: $vgpr1 = COPY [[UV1]](s32) + ; GFX10-DENORM: $vgpr2 = COPY [[UV2]](s32) + ; GFX10-DENORM: $vgpr3 = COPY [[UV3]](s32) + ; GFX10-DENORM: $vgpr4 = COPY [[UV4]](s32) + ; GFX10-DENORM: $vgpr5 = COPY [[UV5]](s32) + ; GFX10-DENORM: $vgpr6 = COPY [[UV6]](s32) + ; GFX10-DENORM: $vgpr7 = COPY [[UV7]](s32) + ; GFX10-DENORM: [[COPY25:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY24]] + ; GFX10-DENORM: S_SETPC_B64_return [[COPY25]], implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 + %4:_(s32) = COPY $vgpr0 + %5:_(s32) = COPY $vgpr1 + %6:_(s32) = COPY $vgpr2 + %7:_(s32) = COPY $vgpr3 + %8:_(s32) = COPY $vgpr4 + %9:_(s32) = COPY $vgpr5 + %10:_(s32) = COPY $vgpr6 + %11:_(s32) = COPY $vgpr7 + %28:_(s64) = G_MERGE_VALUES %4(s32), %5(s32) + %29:_(s64) = G_MERGE_VALUES %6(s32), %7(s32) + %30:_(s64) = G_MERGE_VALUES %8(s32), %9(s32) + %31:_(s64) = G_MERGE_VALUES %10(s32), %11(s32) + %0:_(<4 x s64>) = G_BUILD_VECTOR %28(s64), %29(s64), %30(s64), %31(s64) + %12:_(s32) = COPY $vgpr8 + %13:_(s32) = COPY $vgpr9 + %14:_(s32) = COPY $vgpr10 + %15:_(s32) = COPY $vgpr11 + %16:_(s32) = COPY $vgpr12 + %17:_(s32) = COPY $vgpr13 + %18:_(s32) = COPY $vgpr14 + %19:_(s32) = COPY $vgpr15 + %32:_(s64) = G_MERGE_VALUES %12(s32), %13(s32) + %33:_(s64) = G_MERGE_VALUES %14(s32), %15(s32) + %34:_(s64) = G_MERGE_VALUES %16(s32), %17(s32) + %35:_(s64) = G_MERGE_VALUES %18(s32), %19(s32) + %1:_(<4 x s64>) = G_BUILD_VECTOR %32(s64), %33(s64), %34(s64), %35(s64) + %20:_(s32) = COPY $vgpr16 + %21:_(s32) = COPY $vgpr17 + %22:_(s32) = COPY $vgpr18 + %23:_(s32) = COPY $vgpr19 + %24:_(s32) = COPY $vgpr20 + %25:_(s32) = COPY $vgpr21 + %26:_(s32) = COPY $vgpr22 + %27:_(s32) = COPY $vgpr23 + %36:_(s64) = G_MERGE_VALUES %20(s32), %21(s32) + %37:_(s64) = G_MERGE_VALUES %22(s32), %23(s32) + %38:_(s64) = G_MERGE_VALUES %24(s32), %25(s32) + %39:_(s64) = G_MERGE_VALUES %26(s32), %27(s32) + %2:_(<4 x s64>) = G_BUILD_VECTOR %36(s64), %37(s64), %38(s64), %39(s64) + %3:sgpr_64 = COPY $sgpr30_sgpr31 + %40:_(<4 x s64>) = reassoc G_FMUL %0, %1 + %41:_(<4 x s64>) = reassoc G_FADD %40, %2 + %43:_(s32), %44:_(s32), %45:_(s32), %46:_(s32), %47:_(s32), %48:_(s32), %49:_(s32), %50:_(s32) = G_UNMERGE_VALUES %41(<4 x s64>) + $vgpr0 = COPY %43(s32) + $vgpr1 = COPY %44(s32) + $vgpr2 = COPY %45(s32) + $vgpr3 = COPY %46(s32) + $vgpr4 = COPY %47(s32) + $vgpr5 = COPY %48(s32) + $vgpr6 = COPY %49(s32) + $vgpr7 = COPY %50(s32) + %42:ccr_sgpr_64 = COPY %3 + S_SETPC_B64_return %42, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 + +... +--- +name: test_3xdouble_add_mul_rhs +alignment: 1 +tracksRegLiveness: true +registers: + - { id: 0, class: _ } + - { id: 1, class: _ } + - { id: 2, class: _ } + - { id: 3, class: sgpr_64 } + - { id: 4, class: _ } + - { id: 5, class: _ } + - { id: 6, class: _ } + - { id: 7, class: _ } + - { id: 8, class: _ } + - { id: 9, class: _ } + - { id: 10, class: _ } + - { id: 11, class: _ } + - { id: 12, class: _ } + - { id: 13, class: _ } + - { id: 14, class: _ } + - { id: 15, class: _ } + - { id: 16, class: _ } + - { id: 17, class: _ } + - { id: 18, class: _ } + - { id: 19, class: _ } + - { id: 20, class: _ } + - { id: 21, class: _ } + - { id: 22, class: _ } + - { id: 23, class: _ } + - { id: 24, class: _ } + - { id: 25, class: _ } + - { id: 26, class: _ } + - { id: 27, class: _ } + - { id: 28, class: _ } + - { id: 29, class: _ } + - { id: 30, class: _ } + - { id: 31, class: _ } + - { id: 32, class: _ } + - { id: 33, class: ccr_sgpr_64 } + - { id: 34, class: _ } + - { id: 35, class: _ } + - { id: 36, class: _ } + - { id: 37, class: _ } + - { id: 38, class: _ } + - { id: 39, class: _ } +liveins: + - { reg: '$sgpr30_sgpr31', virtual-reg: '%3' } +frameInfo: + maxAlignment: 1 +machineFunctionInfo: + maxKernArgAlign: 1 + scratchRSrcReg: '$sgpr0_sgpr1_sgpr2_sgpr3' + frameOffsetReg: '$sgpr33' + stackPtrOffsetReg: '$sgpr32' + argumentInfo: + privateSegmentBuffer: { reg: '$sgpr0_sgpr1_sgpr2_sgpr3' } + occupancy: 10 +body: | + bb.1.entry: + liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $sgpr30_sgpr31 + + ; GFX9-LABEL: name: test_3xdouble_add_mul_rhs + ; GFX9: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $sgpr30_sgpr31 + ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; GFX9: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 + ; GFX9: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 + ; GFX9: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 + ; GFX9: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) + ; GFX9: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) + ; GFX9: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32) + ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64), [[MV2]](s64) + ; GFX9: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6 + ; GFX9: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7 + ; GFX9: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8 + ; GFX9: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr9 + ; GFX9: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr10 + ; GFX9: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr11 + ; GFX9: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY6]](s32), [[COPY7]](s32) + ; GFX9: [[MV4:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY8]](s32), [[COPY9]](s32) + ; GFX9: [[MV5:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY10]](s32), [[COPY11]](s32) + ; GFX9: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[MV3]](s64), [[MV4]](s64), [[MV5]](s64) + ; GFX9: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr12 + ; GFX9: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr13 + ; GFX9: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr14 + ; GFX9: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr15 + ; GFX9: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr16 + ; GFX9: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr17 + ; GFX9: [[MV6:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY12]](s32), [[COPY13]](s32) + ; GFX9: [[MV7:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY14]](s32), [[COPY15]](s32) + ; GFX9: [[MV8:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY16]](s32), [[COPY17]](s32) + ; GFX9: [[BUILD_VECTOR2:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[MV6]](s64), [[MV7]](s64), [[MV8]](s64) + ; GFX9: [[COPY18:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; GFX9: [[FMUL:%[0-9]+]]:_(<3 x s64>) = reassoc G_FMUL [[BUILD_VECTOR]], [[BUILD_VECTOR1]] + ; GFX9: [[FADD:%[0-9]+]]:_(<3 x s64>) = reassoc G_FADD [[BUILD_VECTOR2]], [[FMUL]] + ; GFX9: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FADD]](<3 x s64>) + ; GFX9: $vgpr0 = COPY [[UV]](s32) + ; GFX9: $vgpr1 = COPY [[UV1]](s32) + ; GFX9: $vgpr2 = COPY [[UV2]](s32) + ; GFX9: $vgpr3 = COPY [[UV3]](s32) + ; GFX9: $vgpr4 = COPY [[UV4]](s32) + ; GFX9: $vgpr5 = COPY [[UV5]](s32) + ; GFX9: [[COPY19:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY18]] + ; GFX9: S_SETPC_B64_return [[COPY19]], implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5 + ; GFX9-CONTRACT-LABEL: name: test_3xdouble_add_mul_rhs + ; GFX9-CONTRACT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $sgpr30_sgpr31 + ; GFX9-CONTRACT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX9-CONTRACT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX9-CONTRACT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; GFX9-CONTRACT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 + ; GFX9-CONTRACT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 + ; GFX9-CONTRACT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 + ; GFX9-CONTRACT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) + ; GFX9-CONTRACT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) + ; GFX9-CONTRACT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32) + ; GFX9-CONTRACT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64), [[MV2]](s64) + ; GFX9-CONTRACT: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6 + ; GFX9-CONTRACT: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7 + ; GFX9-CONTRACT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8 + ; GFX9-CONTRACT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr9 + ; GFX9-CONTRACT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr10 + ; GFX9-CONTRACT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr11 + ; GFX9-CONTRACT: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY6]](s32), [[COPY7]](s32) + ; GFX9-CONTRACT: [[MV4:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY8]](s32), [[COPY9]](s32) + ; GFX9-CONTRACT: [[MV5:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY10]](s32), [[COPY11]](s32) + ; GFX9-CONTRACT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[MV3]](s64), [[MV4]](s64), [[MV5]](s64) + ; GFX9-CONTRACT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr12 + ; GFX9-CONTRACT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr13 + ; GFX9-CONTRACT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr14 + ; GFX9-CONTRACT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr15 + ; GFX9-CONTRACT: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr16 + ; GFX9-CONTRACT: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr17 + ; GFX9-CONTRACT: [[MV6:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY12]](s32), [[COPY13]](s32) + ; GFX9-CONTRACT: [[MV7:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY14]](s32), [[COPY15]](s32) + ; GFX9-CONTRACT: [[MV8:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY16]](s32), [[COPY17]](s32) + ; GFX9-CONTRACT: [[BUILD_VECTOR2:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[MV6]](s64), [[MV7]](s64), [[MV8]](s64) + ; GFX9-CONTRACT: [[COPY18:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; GFX9-CONTRACT: [[FMUL:%[0-9]+]]:_(<3 x s64>) = reassoc G_FMUL [[BUILD_VECTOR]], [[BUILD_VECTOR1]] + ; GFX9-CONTRACT: [[FADD:%[0-9]+]]:_(<3 x s64>) = reassoc G_FADD [[BUILD_VECTOR2]], [[FMUL]] + ; GFX9-CONTRACT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FADD]](<3 x s64>) + ; GFX9-CONTRACT: $vgpr0 = COPY [[UV]](s32) + ; GFX9-CONTRACT: $vgpr1 = COPY [[UV1]](s32) + ; GFX9-CONTRACT: $vgpr2 = COPY [[UV2]](s32) + ; GFX9-CONTRACT: $vgpr3 = COPY [[UV3]](s32) + ; GFX9-CONTRACT: $vgpr4 = COPY [[UV4]](s32) + ; GFX9-CONTRACT: $vgpr5 = COPY [[UV5]](s32) + ; GFX9-CONTRACT: [[COPY19:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY18]] + ; GFX9-CONTRACT: S_SETPC_B64_return [[COPY19]], implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5 + ; GFX9-DENORM-LABEL: name: test_3xdouble_add_mul_rhs + ; GFX9-DENORM: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $sgpr30_sgpr31 + ; GFX9-DENORM: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX9-DENORM: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX9-DENORM: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; GFX9-DENORM: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 + ; GFX9-DENORM: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 + ; GFX9-DENORM: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 + ; GFX9-DENORM: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) + ; GFX9-DENORM: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) + ; GFX9-DENORM: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32) + ; GFX9-DENORM: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64), [[MV2]](s64) + ; GFX9-DENORM: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6 + ; GFX9-DENORM: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7 + ; GFX9-DENORM: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8 + ; GFX9-DENORM: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr9 + ; GFX9-DENORM: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr10 + ; GFX9-DENORM: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr11 + ; GFX9-DENORM: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY6]](s32), [[COPY7]](s32) + ; GFX9-DENORM: [[MV4:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY8]](s32), [[COPY9]](s32) + ; GFX9-DENORM: [[MV5:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY10]](s32), [[COPY11]](s32) + ; GFX9-DENORM: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[MV3]](s64), [[MV4]](s64), [[MV5]](s64) + ; GFX9-DENORM: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr12 + ; GFX9-DENORM: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr13 + ; GFX9-DENORM: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr14 + ; GFX9-DENORM: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr15 + ; GFX9-DENORM: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr16 + ; GFX9-DENORM: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr17 + ; GFX9-DENORM: [[MV6:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY12]](s32), [[COPY13]](s32) + ; GFX9-DENORM: [[MV7:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY14]](s32), [[COPY15]](s32) + ; GFX9-DENORM: [[MV8:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY16]](s32), [[COPY17]](s32) + ; GFX9-DENORM: [[BUILD_VECTOR2:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[MV6]](s64), [[MV7]](s64), [[MV8]](s64) + ; GFX9-DENORM: [[COPY18:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; GFX9-DENORM: [[FMUL:%[0-9]+]]:_(<3 x s64>) = reassoc G_FMUL [[BUILD_VECTOR]], [[BUILD_VECTOR1]] + ; GFX9-DENORM: [[FADD:%[0-9]+]]:_(<3 x s64>) = reassoc G_FADD [[BUILD_VECTOR2]], [[FMUL]] + ; GFX9-DENORM: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FADD]](<3 x s64>) + ; GFX9-DENORM: $vgpr0 = COPY [[UV]](s32) + ; GFX9-DENORM: $vgpr1 = COPY [[UV1]](s32) + ; GFX9-DENORM: $vgpr2 = COPY [[UV2]](s32) + ; GFX9-DENORM: $vgpr3 = COPY [[UV3]](s32) + ; GFX9-DENORM: $vgpr4 = COPY [[UV4]](s32) + ; GFX9-DENORM: $vgpr5 = COPY [[UV5]](s32) + ; GFX9-DENORM: [[COPY19:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY18]] + ; GFX9-DENORM: S_SETPC_B64_return [[COPY19]], implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5 + ; GFX10-LABEL: name: test_3xdouble_add_mul_rhs + ; GFX10: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $sgpr30_sgpr31 + ; GFX10: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX10: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX10: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; GFX10: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 + ; GFX10: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 + ; GFX10: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 + ; GFX10: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) + ; GFX10: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) + ; GFX10: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32) + ; GFX10: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64), [[MV2]](s64) + ; GFX10: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6 + ; GFX10: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7 + ; GFX10: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8 + ; GFX10: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr9 + ; GFX10: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr10 + ; GFX10: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr11 + ; GFX10: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY6]](s32), [[COPY7]](s32) + ; GFX10: [[MV4:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY8]](s32), [[COPY9]](s32) + ; GFX10: [[MV5:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY10]](s32), [[COPY11]](s32) + ; GFX10: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[MV3]](s64), [[MV4]](s64), [[MV5]](s64) + ; GFX10: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr12 + ; GFX10: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr13 + ; GFX10: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr14 + ; GFX10: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr15 + ; GFX10: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr16 + ; GFX10: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr17 + ; GFX10: [[MV6:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY12]](s32), [[COPY13]](s32) + ; GFX10: [[MV7:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY14]](s32), [[COPY15]](s32) + ; GFX10: [[MV8:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY16]](s32), [[COPY17]](s32) + ; GFX10: [[BUILD_VECTOR2:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[MV6]](s64), [[MV7]](s64), [[MV8]](s64) + ; GFX10: [[COPY18:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; GFX10: [[FMUL:%[0-9]+]]:_(<3 x s64>) = reassoc G_FMUL [[BUILD_VECTOR]], [[BUILD_VECTOR1]] + ; GFX10: [[FADD:%[0-9]+]]:_(<3 x s64>) = reassoc G_FADD [[BUILD_VECTOR2]], [[FMUL]] + ; GFX10: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FADD]](<3 x s64>) + ; GFX10: $vgpr0 = COPY [[UV]](s32) + ; GFX10: $vgpr1 = COPY [[UV1]](s32) + ; GFX10: $vgpr2 = COPY [[UV2]](s32) + ; GFX10: $vgpr3 = COPY [[UV3]](s32) + ; GFX10: $vgpr4 = COPY [[UV4]](s32) + ; GFX10: $vgpr5 = COPY [[UV5]](s32) + ; GFX10: [[COPY19:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY18]] + ; GFX10: S_SETPC_B64_return [[COPY19]], implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5 + ; GFX10-CONTRACT-LABEL: name: test_3xdouble_add_mul_rhs + ; GFX10-CONTRACT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $sgpr30_sgpr31 + ; GFX10-CONTRACT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX10-CONTRACT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX10-CONTRACT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; GFX10-CONTRACT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 + ; GFX10-CONTRACT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 + ; GFX10-CONTRACT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 + ; GFX10-CONTRACT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) + ; GFX10-CONTRACT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) + ; GFX10-CONTRACT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32) + ; GFX10-CONTRACT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64), [[MV2]](s64) + ; GFX10-CONTRACT: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6 + ; GFX10-CONTRACT: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7 + ; GFX10-CONTRACT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8 + ; GFX10-CONTRACT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr9 + ; GFX10-CONTRACT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr10 + ; GFX10-CONTRACT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr11 + ; GFX10-CONTRACT: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY6]](s32), [[COPY7]](s32) + ; GFX10-CONTRACT: [[MV4:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY8]](s32), [[COPY9]](s32) + ; GFX10-CONTRACT: [[MV5:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY10]](s32), [[COPY11]](s32) + ; GFX10-CONTRACT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[MV3]](s64), [[MV4]](s64), [[MV5]](s64) + ; GFX10-CONTRACT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr12 + ; GFX10-CONTRACT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr13 + ; GFX10-CONTRACT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr14 + ; GFX10-CONTRACT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr15 + ; GFX10-CONTRACT: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr16 + ; GFX10-CONTRACT: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr17 + ; GFX10-CONTRACT: [[MV6:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY12]](s32), [[COPY13]](s32) + ; GFX10-CONTRACT: [[MV7:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY14]](s32), [[COPY15]](s32) + ; GFX10-CONTRACT: [[MV8:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY16]](s32), [[COPY17]](s32) + ; GFX10-CONTRACT: [[BUILD_VECTOR2:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[MV6]](s64), [[MV7]](s64), [[MV8]](s64) + ; GFX10-CONTRACT: [[COPY18:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; GFX10-CONTRACT: [[FMUL:%[0-9]+]]:_(<3 x s64>) = reassoc G_FMUL [[BUILD_VECTOR]], [[BUILD_VECTOR1]] + ; GFX10-CONTRACT: [[FADD:%[0-9]+]]:_(<3 x s64>) = reassoc G_FADD [[BUILD_VECTOR2]], [[FMUL]] + ; GFX10-CONTRACT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FADD]](<3 x s64>) + ; GFX10-CONTRACT: $vgpr0 = COPY [[UV]](s32) + ; GFX10-CONTRACT: $vgpr1 = COPY [[UV1]](s32) + ; GFX10-CONTRACT: $vgpr2 = COPY [[UV2]](s32) + ; GFX10-CONTRACT: $vgpr3 = COPY [[UV3]](s32) + ; GFX10-CONTRACT: $vgpr4 = COPY [[UV4]](s32) + ; GFX10-CONTRACT: $vgpr5 = COPY [[UV5]](s32) + ; GFX10-CONTRACT: [[COPY19:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY18]] + ; GFX10-CONTRACT: S_SETPC_B64_return [[COPY19]], implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5 + ; GFX10-DENORM-LABEL: name: test_3xdouble_add_mul_rhs + ; GFX10-DENORM: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $sgpr30_sgpr31 + ; GFX10-DENORM: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX10-DENORM: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX10-DENORM: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; GFX10-DENORM: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 + ; GFX10-DENORM: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 + ; GFX10-DENORM: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 + ; GFX10-DENORM: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) + ; GFX10-DENORM: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) + ; GFX10-DENORM: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32) + ; GFX10-DENORM: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64), [[MV2]](s64) + ; GFX10-DENORM: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6 + ; GFX10-DENORM: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7 + ; GFX10-DENORM: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8 + ; GFX10-DENORM: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr9 + ; GFX10-DENORM: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr10 + ; GFX10-DENORM: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr11 + ; GFX10-DENORM: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY6]](s32), [[COPY7]](s32) + ; GFX10-DENORM: [[MV4:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY8]](s32), [[COPY9]](s32) + ; GFX10-DENORM: [[MV5:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY10]](s32), [[COPY11]](s32) + ; GFX10-DENORM: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[MV3]](s64), [[MV4]](s64), [[MV5]](s64) + ; GFX10-DENORM: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr12 + ; GFX10-DENORM: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr13 + ; GFX10-DENORM: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr14 + ; GFX10-DENORM: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr15 + ; GFX10-DENORM: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr16 + ; GFX10-DENORM: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr17 + ; GFX10-DENORM: [[MV6:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY12]](s32), [[COPY13]](s32) + ; GFX10-DENORM: [[MV7:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY14]](s32), [[COPY15]](s32) + ; GFX10-DENORM: [[MV8:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY16]](s32), [[COPY17]](s32) + ; GFX10-DENORM: [[BUILD_VECTOR2:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[MV6]](s64), [[MV7]](s64), [[MV8]](s64) + ; GFX10-DENORM: [[COPY18:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; GFX10-DENORM: [[FMUL:%[0-9]+]]:_(<3 x s64>) = reassoc G_FMUL [[BUILD_VECTOR]], [[BUILD_VECTOR1]] + ; GFX10-DENORM: [[FADD:%[0-9]+]]:_(<3 x s64>) = reassoc G_FADD [[BUILD_VECTOR2]], [[FMUL]] + ; GFX10-DENORM: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FADD]](<3 x s64>) + ; GFX10-DENORM: $vgpr0 = COPY [[UV]](s32) + ; GFX10-DENORM: $vgpr1 = COPY [[UV1]](s32) + ; GFX10-DENORM: $vgpr2 = COPY [[UV2]](s32) + ; GFX10-DENORM: $vgpr3 = COPY [[UV3]](s32) + ; GFX10-DENORM: $vgpr4 = COPY [[UV4]](s32) + ; GFX10-DENORM: $vgpr5 = COPY [[UV5]](s32) + ; GFX10-DENORM: [[COPY19:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY18]] + ; GFX10-DENORM: S_SETPC_B64_return [[COPY19]], implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5 + %4:_(s32) = COPY $vgpr0 + %5:_(s32) = COPY $vgpr1 + %6:_(s32) = COPY $vgpr2 + %7:_(s32) = COPY $vgpr3 + %8:_(s32) = COPY $vgpr4 + %9:_(s32) = COPY $vgpr5 + %22:_(s64) = G_MERGE_VALUES %4(s32), %5(s32) + %23:_(s64) = G_MERGE_VALUES %6(s32), %7(s32) + %24:_(s64) = G_MERGE_VALUES %8(s32), %9(s32) + %0:_(<3 x s64>) = G_BUILD_VECTOR %22(s64), %23(s64), %24(s64) + %10:_(s32) = COPY $vgpr6 + %11:_(s32) = COPY $vgpr7 + %12:_(s32) = COPY $vgpr8 + %13:_(s32) = COPY $vgpr9 + %14:_(s32) = COPY $vgpr10 + %15:_(s32) = COPY $vgpr11 + %25:_(s64) = G_MERGE_VALUES %10(s32), %11(s32) + %26:_(s64) = G_MERGE_VALUES %12(s32), %13(s32) + %27:_(s64) = G_MERGE_VALUES %14(s32), %15(s32) + %1:_(<3 x s64>) = G_BUILD_VECTOR %25(s64), %26(s64), %27(s64) + %16:_(s32) = COPY $vgpr12 + %17:_(s32) = COPY $vgpr13 + %18:_(s32) = COPY $vgpr14 + %19:_(s32) = COPY $vgpr15 + %20:_(s32) = COPY $vgpr16 + %21:_(s32) = COPY $vgpr17 + %28:_(s64) = G_MERGE_VALUES %16(s32), %17(s32) + %29:_(s64) = G_MERGE_VALUES %18(s32), %19(s32) + %30:_(s64) = G_MERGE_VALUES %20(s32), %21(s32) + %2:_(<3 x s64>) = G_BUILD_VECTOR %28(s64), %29(s64), %30(s64) + %3:sgpr_64 = COPY $sgpr30_sgpr31 + %31:_(<3 x s64>) = reassoc G_FMUL %0, %1 + %32:_(<3 x s64>) = reassoc G_FADD %2, %31 + %34:_(s32), %35:_(s32), %36:_(s32), %37:_(s32), %38:_(s32), %39:_(s32) = G_UNMERGE_VALUES %32(<3 x s64>) + $vgpr0 = COPY %34(s32) + $vgpr1 = COPY %35(s32) + $vgpr2 = COPY %36(s32) + $vgpr3 = COPY %37(s32) + $vgpr4 = COPY %38(s32) + $vgpr5 = COPY %39(s32) + %33:ccr_sgpr_64 = COPY %3 + S_SETPC_B64_return %33, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5 + +... Index: llvm/test/CodeGen/AMDGPU/GlobalISel/combine-fma-add-mul-pre-legalize.mir =================================================================== --- /dev/null +++ llvm/test/CodeGen/AMDGPU/GlobalISel/combine-fma-add-mul-pre-legalize.mir @@ -0,0 +1,2444 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -global-isel -march=amdgcn -mcpu=gfx900 -run-pass=amdgpu-prelegalizer-combiner %s -o - | FileCheck -check-prefix=GFX9 %s +# RUN: llc -global-isel -march=amdgcn -mcpu=gfx900 -run-pass=amdgpu-prelegalizer-combiner -fp-contract=fast %s -o - | FileCheck -check-prefix=GFX9-CONTRACT %s +# RUN: llc -global-isel -march=amdgcn -mcpu=gfx900 -run-pass=amdgpu-prelegalizer-combiner --denormal-fp-math=preserve-sign %s -o - | FileCheck -check-prefix=GFX9-DENORM %s +# RUN: llc -global-isel -march=amdgcn -mcpu=gfx1010 -run-pass=amdgpu-prelegalizer-combiner %s -o - | FileCheck -check-prefix=GFX10 %s +# RUN: llc -global-isel -march=amdgcn -mcpu=gfx1010 -run-pass=amdgpu-prelegalizer-combiner -fp-contract=fast %s -o - | FileCheck -check-prefix=GFX10-CONTRACT %s +# RUN: llc -global-isel -march=amdgcn -mcpu=gfx1010 -run-pass=amdgpu-prelegalizer-combiner --denormal-fp-math=preserve-sign %s -o - | FileCheck -check-prefix=GFX10-DENORM %s + +--- +name: test_f32_add_mul +alignment: 1 +tracksRegLiveness: true +registers: + - { id: 0, class: _ } + - { id: 1, class: _ } + - { id: 2, class: _ } + - { id: 3, class: sgpr_64 } + - { id: 4, class: _ } + - { id: 5, class: _ } + - { id: 6, class: ccr_sgpr_64 } +liveins: + - { reg: '$sgpr30_sgpr31', virtual-reg: '%3' } +frameInfo: + maxAlignment: 1 +machineFunctionInfo: + maxKernArgAlign: 1 + scratchRSrcReg: '$sgpr0_sgpr1_sgpr2_sgpr3' + frameOffsetReg: '$sgpr33' + stackPtrOffsetReg: '$sgpr32' + argumentInfo: + privateSegmentBuffer: { reg: '$sgpr0_sgpr1_sgpr2_sgpr3' } + occupancy: 10 +body: | + bb.1.entry: + liveins: $vgpr0, $vgpr1, $vgpr2, $sgpr30_sgpr31 + + ; GFX9-LABEL: name: test_f32_add_mul + ; GFX9: liveins: $vgpr0, $vgpr1, $vgpr2, $sgpr30_sgpr31 + ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; GFX9: [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; GFX9: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[COPY]], [[COPY1]], [[COPY2]] + ; GFX9: $vgpr0 = COPY [[FMA]](s32) + ; GFX9: [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY3]] + ; GFX9: S_SETPC_B64_return [[COPY4]], implicit $vgpr0 + ; GFX9-CONTRACT-LABEL: name: test_f32_add_mul + ; GFX9-CONTRACT: liveins: $vgpr0, $vgpr1, $vgpr2, $sgpr30_sgpr31 + ; GFX9-CONTRACT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX9-CONTRACT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX9-CONTRACT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; GFX9-CONTRACT: [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; GFX9-CONTRACT: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[COPY]], [[COPY1]], [[COPY2]] + ; GFX9-CONTRACT: $vgpr0 = COPY [[FMA]](s32) + ; GFX9-CONTRACT: [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY3]] + ; GFX9-CONTRACT: S_SETPC_B64_return [[COPY4]], implicit $vgpr0 + ; GFX9-DENORM-LABEL: name: test_f32_add_mul + ; GFX9-DENORM: liveins: $vgpr0, $vgpr1, $vgpr2, $sgpr30_sgpr31 + ; GFX9-DENORM: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX9-DENORM: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX9-DENORM: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; GFX9-DENORM: [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; GFX9-DENORM: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[COPY]], [[COPY1]], [[COPY2]] + ; GFX9-DENORM: $vgpr0 = COPY [[FMA]](s32) + ; GFX9-DENORM: [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY3]] + ; GFX9-DENORM: S_SETPC_B64_return [[COPY4]], implicit $vgpr0 + ; GFX10-LABEL: name: test_f32_add_mul + ; GFX10: liveins: $vgpr0, $vgpr1, $vgpr2, $sgpr30_sgpr31 + ; GFX10: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX10: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX10: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; GFX10: [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; GFX10: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[COPY]], [[COPY1]], [[COPY2]] + ; GFX10: $vgpr0 = COPY [[FMA]](s32) + ; GFX10: [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY3]] + ; GFX10: S_SETPC_B64_return [[COPY4]], implicit $vgpr0 + ; GFX10-CONTRACT-LABEL: name: test_f32_add_mul + ; GFX10-CONTRACT: liveins: $vgpr0, $vgpr1, $vgpr2, $sgpr30_sgpr31 + ; GFX10-CONTRACT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX10-CONTRACT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX10-CONTRACT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; GFX10-CONTRACT: [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; GFX10-CONTRACT: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[COPY]], [[COPY1]], [[COPY2]] + ; GFX10-CONTRACT: $vgpr0 = COPY [[FMA]](s32) + ; GFX10-CONTRACT: [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY3]] + ; GFX10-CONTRACT: S_SETPC_B64_return [[COPY4]], implicit $vgpr0 + ; GFX10-DENORM-LABEL: name: test_f32_add_mul + ; GFX10-DENORM: liveins: $vgpr0, $vgpr1, $vgpr2, $sgpr30_sgpr31 + ; GFX10-DENORM: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX10-DENORM: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX10-DENORM: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; GFX10-DENORM: [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; GFX10-DENORM: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[COPY]], [[COPY1]], [[COPY2]] + ; GFX10-DENORM: $vgpr0 = COPY [[FMA]](s32) + ; GFX10-DENORM: [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY3]] + ; GFX10-DENORM: S_SETPC_B64_return [[COPY4]], implicit $vgpr0 + %0:_(s32) = COPY $vgpr0 + %1:_(s32) = COPY $vgpr1 + %2:_(s32) = COPY $vgpr2 + %3:sgpr_64 = COPY $sgpr30_sgpr31 + %4:_(s32) = reassoc G_FMUL %0, %1 + %5:_(s32) = reassoc G_FADD %4, %2 + $vgpr0 = COPY %5(s32) + %6:ccr_sgpr_64 = COPY %3 + S_SETPC_B64_return %6, implicit $vgpr0 + +... +--- +name: test_f32_add_mul_rhs +alignment: 1 +tracksRegLiveness: true +registers: + - { id: 0, class: _ } + - { id: 1, class: _ } + - { id: 2, class: _ } + - { id: 3, class: sgpr_64 } + - { id: 4, class: _ } + - { id: 5, class: _ } + - { id: 6, class: ccr_sgpr_64 } +liveins: + - { reg: '$sgpr30_sgpr31', virtual-reg: '%3' } +frameInfo: + maxAlignment: 1 +machineFunctionInfo: + maxKernArgAlign: 1 + scratchRSrcReg: '$sgpr0_sgpr1_sgpr2_sgpr3' + frameOffsetReg: '$sgpr33' + stackPtrOffsetReg: '$sgpr32' + argumentInfo: + privateSegmentBuffer: { reg: '$sgpr0_sgpr1_sgpr2_sgpr3' } + occupancy: 10 +body: | + bb.1.entry: + liveins: $vgpr0, $vgpr1, $vgpr2, $sgpr30_sgpr31 + + ; GFX9-LABEL: name: test_f32_add_mul_rhs + ; GFX9: liveins: $vgpr0, $vgpr1, $vgpr2, $sgpr30_sgpr31 + ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; GFX9: [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; GFX9: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[COPY]], [[COPY1]], [[COPY2]] + ; GFX9: $vgpr0 = COPY [[FMA]](s32) + ; GFX9: [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY3]] + ; GFX9: S_SETPC_B64_return [[COPY4]], implicit $vgpr0 + ; GFX9-CONTRACT-LABEL: name: test_f32_add_mul_rhs + ; GFX9-CONTRACT: liveins: $vgpr0, $vgpr1, $vgpr2, $sgpr30_sgpr31 + ; GFX9-CONTRACT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX9-CONTRACT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX9-CONTRACT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; GFX9-CONTRACT: [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; GFX9-CONTRACT: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[COPY]], [[COPY1]], [[COPY2]] + ; GFX9-CONTRACT: $vgpr0 = COPY [[FMA]](s32) + ; GFX9-CONTRACT: [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY3]] + ; GFX9-CONTRACT: S_SETPC_B64_return [[COPY4]], implicit $vgpr0 + ; GFX9-DENORM-LABEL: name: test_f32_add_mul_rhs + ; GFX9-DENORM: liveins: $vgpr0, $vgpr1, $vgpr2, $sgpr30_sgpr31 + ; GFX9-DENORM: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX9-DENORM: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX9-DENORM: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; GFX9-DENORM: [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; GFX9-DENORM: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[COPY]], [[COPY1]], [[COPY2]] + ; GFX9-DENORM: $vgpr0 = COPY [[FMA]](s32) + ; GFX9-DENORM: [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY3]] + ; GFX9-DENORM: S_SETPC_B64_return [[COPY4]], implicit $vgpr0 + ; GFX10-LABEL: name: test_f32_add_mul_rhs + ; GFX10: liveins: $vgpr0, $vgpr1, $vgpr2, $sgpr30_sgpr31 + ; GFX10: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX10: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX10: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; GFX10: [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; GFX10: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[COPY]], [[COPY1]], [[COPY2]] + ; GFX10: $vgpr0 = COPY [[FMA]](s32) + ; GFX10: [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY3]] + ; GFX10: S_SETPC_B64_return [[COPY4]], implicit $vgpr0 + ; GFX10-CONTRACT-LABEL: name: test_f32_add_mul_rhs + ; GFX10-CONTRACT: liveins: $vgpr0, $vgpr1, $vgpr2, $sgpr30_sgpr31 + ; GFX10-CONTRACT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX10-CONTRACT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX10-CONTRACT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; GFX10-CONTRACT: [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; GFX10-CONTRACT: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[COPY]], [[COPY1]], [[COPY2]] + ; GFX10-CONTRACT: $vgpr0 = COPY [[FMA]](s32) + ; GFX10-CONTRACT: [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY3]] + ; GFX10-CONTRACT: S_SETPC_B64_return [[COPY4]], implicit $vgpr0 + ; GFX10-DENORM-LABEL: name: test_f32_add_mul_rhs + ; GFX10-DENORM: liveins: $vgpr0, $vgpr1, $vgpr2, $sgpr30_sgpr31 + ; GFX10-DENORM: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX10-DENORM: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX10-DENORM: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; GFX10-DENORM: [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; GFX10-DENORM: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[COPY]], [[COPY1]], [[COPY2]] + ; GFX10-DENORM: $vgpr0 = COPY [[FMA]](s32) + ; GFX10-DENORM: [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY3]] + ; GFX10-DENORM: S_SETPC_B64_return [[COPY4]], implicit $vgpr0 + %0:_(s32) = COPY $vgpr0 + %1:_(s32) = COPY $vgpr1 + %2:_(s32) = COPY $vgpr2 + %3:sgpr_64 = COPY $sgpr30_sgpr31 + %4:_(s32) = reassoc G_FMUL %0, %1 + %5:_(s32) = reassoc G_FADD %2, %4 + $vgpr0 = COPY %5(s32) + %6:ccr_sgpr_64 = COPY %3 + S_SETPC_B64_return %6, implicit $vgpr0 + +... +--- +name: test_half_add_mul +alignment: 1 +tracksRegLiveness: true +registers: + - { id: 0, class: _ } + - { id: 1, class: _ } + - { id: 2, class: _ } + - { id: 3, class: sgpr_64 } + - { id: 4, class: _ } + - { id: 5, class: _ } + - { id: 6, class: _ } + - { id: 7, class: _ } + - { id: 8, class: _ } + - { id: 9, class: ccr_sgpr_64 } + - { id: 10, class: _ } +liveins: + - { reg: '$sgpr30_sgpr31', virtual-reg: '%3' } +frameInfo: + maxAlignment: 1 +machineFunctionInfo: + maxKernArgAlign: 1 + scratchRSrcReg: '$sgpr0_sgpr1_sgpr2_sgpr3' + frameOffsetReg: '$sgpr33' + stackPtrOffsetReg: '$sgpr32' + argumentInfo: + privateSegmentBuffer: { reg: '$sgpr0_sgpr1_sgpr2_sgpr3' } + occupancy: 10 +body: | + bb.1.entry: + liveins: $vgpr0, $vgpr1, $vgpr2, $sgpr30_sgpr31 + + ; GFX9-LABEL: name: test_half_add_mul + ; GFX9: liveins: $vgpr0, $vgpr1, $vgpr2, $sgpr30_sgpr31 + ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) + ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX9: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) + ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; GFX9: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s32) + ; GFX9: [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; GFX9: [[FMA:%[0-9]+]]:_(s16) = G_FMA [[TRUNC]], [[TRUNC1]], [[TRUNC2]] + ; GFX9: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FMA]](s16) + ; GFX9: $vgpr0 = COPY [[ANYEXT]](s32) + ; GFX9: [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY3]] + ; GFX9: S_SETPC_B64_return [[COPY4]], implicit $vgpr0 + ; GFX9-CONTRACT-LABEL: name: test_half_add_mul + ; GFX9-CONTRACT: liveins: $vgpr0, $vgpr1, $vgpr2, $sgpr30_sgpr31 + ; GFX9-CONTRACT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX9-CONTRACT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) + ; GFX9-CONTRACT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX9-CONTRACT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) + ; GFX9-CONTRACT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; GFX9-CONTRACT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s32) + ; GFX9-CONTRACT: [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; GFX9-CONTRACT: [[FMA:%[0-9]+]]:_(s16) = G_FMA [[TRUNC]], [[TRUNC1]], [[TRUNC2]] + ; GFX9-CONTRACT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FMA]](s16) + ; GFX9-CONTRACT: $vgpr0 = COPY [[ANYEXT]](s32) + ; GFX9-CONTRACT: [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY3]] + ; GFX9-CONTRACT: S_SETPC_B64_return [[COPY4]], implicit $vgpr0 + ; GFX9-DENORM-LABEL: name: test_half_add_mul + ; GFX9-DENORM: liveins: $vgpr0, $vgpr1, $vgpr2, $sgpr30_sgpr31 + ; GFX9-DENORM: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX9-DENORM: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) + ; GFX9-DENORM: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX9-DENORM: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) + ; GFX9-DENORM: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; GFX9-DENORM: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s32) + ; GFX9-DENORM: [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; GFX9-DENORM: [[FMA:%[0-9]+]]:_(s16) = G_FMA [[TRUNC]], [[TRUNC1]], [[TRUNC2]] + ; GFX9-DENORM: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FMA]](s16) + ; GFX9-DENORM: $vgpr0 = COPY [[ANYEXT]](s32) + ; GFX9-DENORM: [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY3]] + ; GFX9-DENORM: S_SETPC_B64_return [[COPY4]], implicit $vgpr0 + ; GFX10-LABEL: name: test_half_add_mul + ; GFX10: liveins: $vgpr0, $vgpr1, $vgpr2, $sgpr30_sgpr31 + ; GFX10: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX10: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) + ; GFX10: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX10: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) + ; GFX10: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; GFX10: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s32) + ; GFX10: [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; GFX10: [[FMA:%[0-9]+]]:_(s16) = G_FMA [[TRUNC]], [[TRUNC1]], [[TRUNC2]] + ; GFX10: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FMA]](s16) + ; GFX10: $vgpr0 = COPY [[ANYEXT]](s32) + ; GFX10: [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY3]] + ; GFX10: S_SETPC_B64_return [[COPY4]], implicit $vgpr0 + ; GFX10-CONTRACT-LABEL: name: test_half_add_mul + ; GFX10-CONTRACT: liveins: $vgpr0, $vgpr1, $vgpr2, $sgpr30_sgpr31 + ; GFX10-CONTRACT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX10-CONTRACT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) + ; GFX10-CONTRACT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX10-CONTRACT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) + ; GFX10-CONTRACT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; GFX10-CONTRACT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s32) + ; GFX10-CONTRACT: [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; GFX10-CONTRACT: [[FMA:%[0-9]+]]:_(s16) = G_FMA [[TRUNC]], [[TRUNC1]], [[TRUNC2]] + ; GFX10-CONTRACT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FMA]](s16) + ; GFX10-CONTRACT: $vgpr0 = COPY [[ANYEXT]](s32) + ; GFX10-CONTRACT: [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY3]] + ; GFX10-CONTRACT: S_SETPC_B64_return [[COPY4]], implicit $vgpr0 + ; GFX10-DENORM-LABEL: name: test_half_add_mul + ; GFX10-DENORM: liveins: $vgpr0, $vgpr1, $vgpr2, $sgpr30_sgpr31 + ; GFX10-DENORM: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX10-DENORM: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) + ; GFX10-DENORM: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX10-DENORM: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) + ; GFX10-DENORM: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; GFX10-DENORM: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s32) + ; GFX10-DENORM: [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; GFX10-DENORM: [[FMA:%[0-9]+]]:_(s16) = G_FMA [[TRUNC]], [[TRUNC1]], [[TRUNC2]] + ; GFX10-DENORM: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FMA]](s16) + ; GFX10-DENORM: $vgpr0 = COPY [[ANYEXT]](s32) + ; GFX10-DENORM: [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY3]] + ; GFX10-DENORM: S_SETPC_B64_return [[COPY4]], implicit $vgpr0 + %4:_(s32) = COPY $vgpr0 + %0:_(s16) = G_TRUNC %4(s32) + %5:_(s32) = COPY $vgpr1 + %1:_(s16) = G_TRUNC %5(s32) + %6:_(s32) = COPY $vgpr2 + %2:_(s16) = G_TRUNC %6(s32) + %3:sgpr_64 = COPY $sgpr30_sgpr31 + %7:_(s16) = reassoc G_FMUL %0, %1 + %8:_(s16) = reassoc G_FADD %7, %2 + %10:_(s32) = G_ANYEXT %8(s16) + $vgpr0 = COPY %10(s32) + %9:ccr_sgpr_64 = COPY %3 + S_SETPC_B64_return %9, implicit $vgpr0 + +... +--- +name: test_half_add_mul_rhs +alignment: 1 +tracksRegLiveness: true +registers: + - { id: 0, class: _ } + - { id: 1, class: _ } + - { id: 2, class: _ } + - { id: 3, class: sgpr_64 } + - { id: 4, class: _ } + - { id: 5, class: _ } + - { id: 6, class: _ } + - { id: 7, class: _ } + - { id: 8, class: _ } + - { id: 9, class: ccr_sgpr_64 } + - { id: 10, class: _ } +liveins: + - { reg: '$sgpr30_sgpr31', virtual-reg: '%3' } +frameInfo: + maxAlignment: 1 +machineFunctionInfo: + maxKernArgAlign: 1 + scratchRSrcReg: '$sgpr0_sgpr1_sgpr2_sgpr3' + frameOffsetReg: '$sgpr33' + stackPtrOffsetReg: '$sgpr32' + argumentInfo: + privateSegmentBuffer: { reg: '$sgpr0_sgpr1_sgpr2_sgpr3' } + occupancy: 10 +body: | + bb.1.entry: + liveins: $vgpr0, $vgpr1, $vgpr2, $sgpr30_sgpr31 + + ; GFX9-LABEL: name: test_half_add_mul_rhs + ; GFX9: liveins: $vgpr0, $vgpr1, $vgpr2, $sgpr30_sgpr31 + ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) + ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX9: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) + ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; GFX9: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s32) + ; GFX9: [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; GFX9: [[FMA:%[0-9]+]]:_(s16) = G_FMA [[TRUNC]], [[TRUNC1]], [[TRUNC2]] + ; GFX9: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FMA]](s16) + ; GFX9: $vgpr0 = COPY [[ANYEXT]](s32) + ; GFX9: [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY3]] + ; GFX9: S_SETPC_B64_return [[COPY4]], implicit $vgpr0 + ; GFX9-CONTRACT-LABEL: name: test_half_add_mul_rhs + ; GFX9-CONTRACT: liveins: $vgpr0, $vgpr1, $vgpr2, $sgpr30_sgpr31 + ; GFX9-CONTRACT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX9-CONTRACT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) + ; GFX9-CONTRACT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX9-CONTRACT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) + ; GFX9-CONTRACT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; GFX9-CONTRACT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s32) + ; GFX9-CONTRACT: [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; GFX9-CONTRACT: [[FMA:%[0-9]+]]:_(s16) = G_FMA [[TRUNC]], [[TRUNC1]], [[TRUNC2]] + ; GFX9-CONTRACT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FMA]](s16) + ; GFX9-CONTRACT: $vgpr0 = COPY [[ANYEXT]](s32) + ; GFX9-CONTRACT: [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY3]] + ; GFX9-CONTRACT: S_SETPC_B64_return [[COPY4]], implicit $vgpr0 + ; GFX9-DENORM-LABEL: name: test_half_add_mul_rhs + ; GFX9-DENORM: liveins: $vgpr0, $vgpr1, $vgpr2, $sgpr30_sgpr31 + ; GFX9-DENORM: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX9-DENORM: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) + ; GFX9-DENORM: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX9-DENORM: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) + ; GFX9-DENORM: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; GFX9-DENORM: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s32) + ; GFX9-DENORM: [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; GFX9-DENORM: [[FMA:%[0-9]+]]:_(s16) = G_FMA [[TRUNC]], [[TRUNC1]], [[TRUNC2]] + ; GFX9-DENORM: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FMA]](s16) + ; GFX9-DENORM: $vgpr0 = COPY [[ANYEXT]](s32) + ; GFX9-DENORM: [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY3]] + ; GFX9-DENORM: S_SETPC_B64_return [[COPY4]], implicit $vgpr0 + ; GFX10-LABEL: name: test_half_add_mul_rhs + ; GFX10: liveins: $vgpr0, $vgpr1, $vgpr2, $sgpr30_sgpr31 + ; GFX10: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX10: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) + ; GFX10: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX10: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) + ; GFX10: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; GFX10: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s32) + ; GFX10: [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; GFX10: [[FMA:%[0-9]+]]:_(s16) = G_FMA [[TRUNC]], [[TRUNC1]], [[TRUNC2]] + ; GFX10: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FMA]](s16) + ; GFX10: $vgpr0 = COPY [[ANYEXT]](s32) + ; GFX10: [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY3]] + ; GFX10: S_SETPC_B64_return [[COPY4]], implicit $vgpr0 + ; GFX10-CONTRACT-LABEL: name: test_half_add_mul_rhs + ; GFX10-CONTRACT: liveins: $vgpr0, $vgpr1, $vgpr2, $sgpr30_sgpr31 + ; GFX10-CONTRACT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX10-CONTRACT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) + ; GFX10-CONTRACT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX10-CONTRACT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) + ; GFX10-CONTRACT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; GFX10-CONTRACT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s32) + ; GFX10-CONTRACT: [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; GFX10-CONTRACT: [[FMA:%[0-9]+]]:_(s16) = G_FMA [[TRUNC]], [[TRUNC1]], [[TRUNC2]] + ; GFX10-CONTRACT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FMA]](s16) + ; GFX10-CONTRACT: $vgpr0 = COPY [[ANYEXT]](s32) + ; GFX10-CONTRACT: [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY3]] + ; GFX10-CONTRACT: S_SETPC_B64_return [[COPY4]], implicit $vgpr0 + ; GFX10-DENORM-LABEL: name: test_half_add_mul_rhs + ; GFX10-DENORM: liveins: $vgpr0, $vgpr1, $vgpr2, $sgpr30_sgpr31 + ; GFX10-DENORM: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX10-DENORM: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) + ; GFX10-DENORM: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX10-DENORM: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) + ; GFX10-DENORM: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; GFX10-DENORM: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s32) + ; GFX10-DENORM: [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; GFX10-DENORM: [[FMA:%[0-9]+]]:_(s16) = G_FMA [[TRUNC]], [[TRUNC1]], [[TRUNC2]] + ; GFX10-DENORM: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FMA]](s16) + ; GFX10-DENORM: $vgpr0 = COPY [[ANYEXT]](s32) + ; GFX10-DENORM: [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY3]] + ; GFX10-DENORM: S_SETPC_B64_return [[COPY4]], implicit $vgpr0 + %4:_(s32) = COPY $vgpr0 + %0:_(s16) = G_TRUNC %4(s32) + %5:_(s32) = COPY $vgpr1 + %1:_(s16) = G_TRUNC %5(s32) + %6:_(s32) = COPY $vgpr2 + %2:_(s16) = G_TRUNC %6(s32) + %3:sgpr_64 = COPY $sgpr30_sgpr31 + %7:_(s16) = reassoc G_FMUL %0, %1 + %8:_(s16) = reassoc G_FADD %2, %7 + %10:_(s32) = G_ANYEXT %8(s16) + $vgpr0 = COPY %10(s32) + %9:ccr_sgpr_64 = COPY %3 + S_SETPC_B64_return %9, implicit $vgpr0 + +... +--- +name: test_double_add_mul +alignment: 1 +tracksRegLiveness: true +registers: + - { id: 0, class: _ } + - { id: 1, class: _ } + - { id: 2, class: _ } + - { id: 3, class: sgpr_64 } + - { id: 4, class: _ } + - { id: 5, class: _ } + - { id: 6, class: _ } + - { id: 7, class: _ } + - { id: 8, class: _ } + - { id: 9, class: _ } + - { id: 10, class: _ } + - { id: 11, class: _ } + - { id: 12, class: ccr_sgpr_64 } + - { id: 13, class: _ } + - { id: 14, class: _ } +liveins: + - { reg: '$sgpr30_sgpr31', virtual-reg: '%3' } +frameInfo: + maxAlignment: 1 +machineFunctionInfo: + maxKernArgAlign: 1 + scratchRSrcReg: '$sgpr0_sgpr1_sgpr2_sgpr3' + frameOffsetReg: '$sgpr33' + stackPtrOffsetReg: '$sgpr32' + argumentInfo: + privateSegmentBuffer: { reg: '$sgpr0_sgpr1_sgpr2_sgpr3' } + occupancy: 10 +body: | + bb.1.entry: + liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $sgpr30_sgpr31 + + ; GFX9-LABEL: name: test_double_add_mul + ; GFX9: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $sgpr30_sgpr31 + ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX9: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) + ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; GFX9: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 + ; GFX9: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) + ; GFX9: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 + ; GFX9: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 + ; GFX9: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32) + ; GFX9: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; GFX9: [[FMA:%[0-9]+]]:_(s64) = G_FMA [[MV]], [[MV1]], [[MV2]] + ; GFX9: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FMA]](s64) + ; GFX9: $vgpr0 = COPY [[UV]](s32) + ; GFX9: $vgpr1 = COPY [[UV1]](s32) + ; GFX9: [[COPY7:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY6]] + ; GFX9: S_SETPC_B64_return [[COPY7]], implicit $vgpr0, implicit $vgpr1 + ; GFX9-CONTRACT-LABEL: name: test_double_add_mul + ; GFX9-CONTRACT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $sgpr30_sgpr31 + ; GFX9-CONTRACT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX9-CONTRACT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX9-CONTRACT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) + ; GFX9-CONTRACT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; GFX9-CONTRACT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 + ; GFX9-CONTRACT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) + ; GFX9-CONTRACT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 + ; GFX9-CONTRACT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 + ; GFX9-CONTRACT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32) + ; GFX9-CONTRACT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; GFX9-CONTRACT: [[FMA:%[0-9]+]]:_(s64) = G_FMA [[MV]], [[MV1]], [[MV2]] + ; GFX9-CONTRACT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FMA]](s64) + ; GFX9-CONTRACT: $vgpr0 = COPY [[UV]](s32) + ; GFX9-CONTRACT: $vgpr1 = COPY [[UV1]](s32) + ; GFX9-CONTRACT: [[COPY7:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY6]] + ; GFX9-CONTRACT: S_SETPC_B64_return [[COPY7]], implicit $vgpr0, implicit $vgpr1 + ; GFX9-DENORM-LABEL: name: test_double_add_mul + ; GFX9-DENORM: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $sgpr30_sgpr31 + ; GFX9-DENORM: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX9-DENORM: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX9-DENORM: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) + ; GFX9-DENORM: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; GFX9-DENORM: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 + ; GFX9-DENORM: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) + ; GFX9-DENORM: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 + ; GFX9-DENORM: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 + ; GFX9-DENORM: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32) + ; GFX9-DENORM: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; GFX9-DENORM: [[FMA:%[0-9]+]]:_(s64) = G_FMA [[MV]], [[MV1]], [[MV2]] + ; GFX9-DENORM: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FMA]](s64) + ; GFX9-DENORM: $vgpr0 = COPY [[UV]](s32) + ; GFX9-DENORM: $vgpr1 = COPY [[UV1]](s32) + ; GFX9-DENORM: [[COPY7:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY6]] + ; GFX9-DENORM: S_SETPC_B64_return [[COPY7]], implicit $vgpr0, implicit $vgpr1 + ; GFX10-LABEL: name: test_double_add_mul + ; GFX10: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $sgpr30_sgpr31 + ; GFX10: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX10: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX10: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) + ; GFX10: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; GFX10: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 + ; GFX10: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) + ; GFX10: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 + ; GFX10: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 + ; GFX10: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32) + ; GFX10: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; GFX10: [[FMA:%[0-9]+]]:_(s64) = G_FMA [[MV]], [[MV1]], [[MV2]] + ; GFX10: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FMA]](s64) + ; GFX10: $vgpr0 = COPY [[UV]](s32) + ; GFX10: $vgpr1 = COPY [[UV1]](s32) + ; GFX10: [[COPY7:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY6]] + ; GFX10: S_SETPC_B64_return [[COPY7]], implicit $vgpr0, implicit $vgpr1 + ; GFX10-CONTRACT-LABEL: name: test_double_add_mul + ; GFX10-CONTRACT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $sgpr30_sgpr31 + ; GFX10-CONTRACT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX10-CONTRACT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX10-CONTRACT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) + ; GFX10-CONTRACT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; GFX10-CONTRACT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 + ; GFX10-CONTRACT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) + ; GFX10-CONTRACT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 + ; GFX10-CONTRACT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 + ; GFX10-CONTRACT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32) + ; GFX10-CONTRACT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; GFX10-CONTRACT: [[FMA:%[0-9]+]]:_(s64) = G_FMA [[MV]], [[MV1]], [[MV2]] + ; GFX10-CONTRACT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FMA]](s64) + ; GFX10-CONTRACT: $vgpr0 = COPY [[UV]](s32) + ; GFX10-CONTRACT: $vgpr1 = COPY [[UV1]](s32) + ; GFX10-CONTRACT: [[COPY7:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY6]] + ; GFX10-CONTRACT: S_SETPC_B64_return [[COPY7]], implicit $vgpr0, implicit $vgpr1 + ; GFX10-DENORM-LABEL: name: test_double_add_mul + ; GFX10-DENORM: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $sgpr30_sgpr31 + ; GFX10-DENORM: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX10-DENORM: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX10-DENORM: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) + ; GFX10-DENORM: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; GFX10-DENORM: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 + ; GFX10-DENORM: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) + ; GFX10-DENORM: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 + ; GFX10-DENORM: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 + ; GFX10-DENORM: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32) + ; GFX10-DENORM: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; GFX10-DENORM: [[FMA:%[0-9]+]]:_(s64) = G_FMA [[MV]], [[MV1]], [[MV2]] + ; GFX10-DENORM: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FMA]](s64) + ; GFX10-DENORM: $vgpr0 = COPY [[UV]](s32) + ; GFX10-DENORM: $vgpr1 = COPY [[UV1]](s32) + ; GFX10-DENORM: [[COPY7:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY6]] + ; GFX10-DENORM: S_SETPC_B64_return [[COPY7]], implicit $vgpr0, implicit $vgpr1 + %4:_(s32) = COPY $vgpr0 + %5:_(s32) = COPY $vgpr1 + %0:_(s64) = G_MERGE_VALUES %4(s32), %5(s32) + %6:_(s32) = COPY $vgpr2 + %7:_(s32) = COPY $vgpr3 + %1:_(s64) = G_MERGE_VALUES %6(s32), %7(s32) + %8:_(s32) = COPY $vgpr4 + %9:_(s32) = COPY $vgpr5 + %2:_(s64) = G_MERGE_VALUES %8(s32), %9(s32) + %3:sgpr_64 = COPY $sgpr30_sgpr31 + %10:_(s64) = reassoc G_FMUL %0, %1 + %11:_(s64) = reassoc G_FADD %10, %2 + %13:_(s32), %14:_(s32) = G_UNMERGE_VALUES %11(s64) + $vgpr0 = COPY %13(s32) + $vgpr1 = COPY %14(s32) + %12:ccr_sgpr_64 = COPY %3 + S_SETPC_B64_return %12, implicit $vgpr0, implicit $vgpr1 + +... +--- +name: test_double_add_mul_rhs +alignment: 1 +tracksRegLiveness: true +registers: + - { id: 0, class: _ } + - { id: 1, class: _ } + - { id: 2, class: _ } + - { id: 3, class: sgpr_64 } + - { id: 4, class: _ } + - { id: 5, class: _ } + - { id: 6, class: _ } + - { id: 7, class: _ } + - { id: 8, class: _ } + - { id: 9, class: _ } + - { id: 10, class: _ } + - { id: 11, class: _ } + - { id: 12, class: ccr_sgpr_64 } + - { id: 13, class: _ } + - { id: 14, class: _ } +liveins: + - { reg: '$sgpr30_sgpr31', virtual-reg: '%3' } +frameInfo: + maxAlignment: 1 +machineFunctionInfo: + maxKernArgAlign: 1 + scratchRSrcReg: '$sgpr0_sgpr1_sgpr2_sgpr3' + frameOffsetReg: '$sgpr33' + stackPtrOffsetReg: '$sgpr32' + argumentInfo: + privateSegmentBuffer: { reg: '$sgpr0_sgpr1_sgpr2_sgpr3' } + occupancy: 10 +body: | + bb.1.entry: + liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $sgpr30_sgpr31 + + ; GFX9-LABEL: name: test_double_add_mul_rhs + ; GFX9: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $sgpr30_sgpr31 + ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX9: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) + ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; GFX9: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 + ; GFX9: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) + ; GFX9: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 + ; GFX9: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 + ; GFX9: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32) + ; GFX9: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; GFX9: [[FMA:%[0-9]+]]:_(s64) = G_FMA [[MV]], [[MV1]], [[MV2]] + ; GFX9: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FMA]](s64) + ; GFX9: $vgpr0 = COPY [[UV]](s32) + ; GFX9: $vgpr1 = COPY [[UV1]](s32) + ; GFX9: [[COPY7:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY6]] + ; GFX9: S_SETPC_B64_return [[COPY7]], implicit $vgpr0, implicit $vgpr1 + ; GFX9-CONTRACT-LABEL: name: test_double_add_mul_rhs + ; GFX9-CONTRACT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $sgpr30_sgpr31 + ; GFX9-CONTRACT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX9-CONTRACT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX9-CONTRACT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) + ; GFX9-CONTRACT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; GFX9-CONTRACT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 + ; GFX9-CONTRACT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) + ; GFX9-CONTRACT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 + ; GFX9-CONTRACT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 + ; GFX9-CONTRACT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32) + ; GFX9-CONTRACT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; GFX9-CONTRACT: [[FMA:%[0-9]+]]:_(s64) = G_FMA [[MV]], [[MV1]], [[MV2]] + ; GFX9-CONTRACT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FMA]](s64) + ; GFX9-CONTRACT: $vgpr0 = COPY [[UV]](s32) + ; GFX9-CONTRACT: $vgpr1 = COPY [[UV1]](s32) + ; GFX9-CONTRACT: [[COPY7:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY6]] + ; GFX9-CONTRACT: S_SETPC_B64_return [[COPY7]], implicit $vgpr0, implicit $vgpr1 + ; GFX9-DENORM-LABEL: name: test_double_add_mul_rhs + ; GFX9-DENORM: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $sgpr30_sgpr31 + ; GFX9-DENORM: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX9-DENORM: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX9-DENORM: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) + ; GFX9-DENORM: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; GFX9-DENORM: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 + ; GFX9-DENORM: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) + ; GFX9-DENORM: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 + ; GFX9-DENORM: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 + ; GFX9-DENORM: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32) + ; GFX9-DENORM: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; GFX9-DENORM: [[FMA:%[0-9]+]]:_(s64) = G_FMA [[MV]], [[MV1]], [[MV2]] + ; GFX9-DENORM: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FMA]](s64) + ; GFX9-DENORM: $vgpr0 = COPY [[UV]](s32) + ; GFX9-DENORM: $vgpr1 = COPY [[UV1]](s32) + ; GFX9-DENORM: [[COPY7:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY6]] + ; GFX9-DENORM: S_SETPC_B64_return [[COPY7]], implicit $vgpr0, implicit $vgpr1 + ; GFX10-LABEL: name: test_double_add_mul_rhs + ; GFX10: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $sgpr30_sgpr31 + ; GFX10: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX10: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX10: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) + ; GFX10: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; GFX10: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 + ; GFX10: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) + ; GFX10: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 + ; GFX10: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 + ; GFX10: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32) + ; GFX10: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; GFX10: [[FMA:%[0-9]+]]:_(s64) = G_FMA [[MV]], [[MV1]], [[MV2]] + ; GFX10: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FMA]](s64) + ; GFX10: $vgpr0 = COPY [[UV]](s32) + ; GFX10: $vgpr1 = COPY [[UV1]](s32) + ; GFX10: [[COPY7:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY6]] + ; GFX10: S_SETPC_B64_return [[COPY7]], implicit $vgpr0, implicit $vgpr1 + ; GFX10-CONTRACT-LABEL: name: test_double_add_mul_rhs + ; GFX10-CONTRACT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $sgpr30_sgpr31 + ; GFX10-CONTRACT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX10-CONTRACT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX10-CONTRACT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) + ; GFX10-CONTRACT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; GFX10-CONTRACT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 + ; GFX10-CONTRACT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) + ; GFX10-CONTRACT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 + ; GFX10-CONTRACT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 + ; GFX10-CONTRACT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32) + ; GFX10-CONTRACT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; GFX10-CONTRACT: [[FMA:%[0-9]+]]:_(s64) = G_FMA [[MV]], [[MV1]], [[MV2]] + ; GFX10-CONTRACT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FMA]](s64) + ; GFX10-CONTRACT: $vgpr0 = COPY [[UV]](s32) + ; GFX10-CONTRACT: $vgpr1 = COPY [[UV1]](s32) + ; GFX10-CONTRACT: [[COPY7:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY6]] + ; GFX10-CONTRACT: S_SETPC_B64_return [[COPY7]], implicit $vgpr0, implicit $vgpr1 + ; GFX10-DENORM-LABEL: name: test_double_add_mul_rhs + ; GFX10-DENORM: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $sgpr30_sgpr31 + ; GFX10-DENORM: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX10-DENORM: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX10-DENORM: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) + ; GFX10-DENORM: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; GFX10-DENORM: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 + ; GFX10-DENORM: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) + ; GFX10-DENORM: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 + ; GFX10-DENORM: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 + ; GFX10-DENORM: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32) + ; GFX10-DENORM: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; GFX10-DENORM: [[FMA:%[0-9]+]]:_(s64) = G_FMA [[MV]], [[MV1]], [[MV2]] + ; GFX10-DENORM: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FMA]](s64) + ; GFX10-DENORM: $vgpr0 = COPY [[UV]](s32) + ; GFX10-DENORM: $vgpr1 = COPY [[UV1]](s32) + ; GFX10-DENORM: [[COPY7:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY6]] + ; GFX10-DENORM: S_SETPC_B64_return [[COPY7]], implicit $vgpr0, implicit $vgpr1 + %4:_(s32) = COPY $vgpr0 + %5:_(s32) = COPY $vgpr1 + %0:_(s64) = G_MERGE_VALUES %4(s32), %5(s32) + %6:_(s32) = COPY $vgpr2 + %7:_(s32) = COPY $vgpr3 + %1:_(s64) = G_MERGE_VALUES %6(s32), %7(s32) + %8:_(s32) = COPY $vgpr4 + %9:_(s32) = COPY $vgpr5 + %2:_(s64) = G_MERGE_VALUES %8(s32), %9(s32) + %3:sgpr_64 = COPY $sgpr30_sgpr31 + %10:_(s64) = reassoc G_FMUL %0, %1 + %11:_(s64) = reassoc G_FADD %2, %10 + %13:_(s32), %14:_(s32) = G_UNMERGE_VALUES %11(s64) + $vgpr0 = COPY %13(s32) + $vgpr1 = COPY %14(s32) + %12:ccr_sgpr_64 = COPY %3 + S_SETPC_B64_return %12, implicit $vgpr0, implicit $vgpr1 + +... +--- +name: test_4xfloat_add_mul +alignment: 1 +tracksRegLiveness: true +registers: + - { id: 0, class: _ } + - { id: 1, class: _ } + - { id: 2, class: _ } + - { id: 3, class: sgpr_64 } + - { id: 4, class: _ } + - { id: 5, class: _ } + - { id: 6, class: _ } + - { id: 7, class: _ } + - { id: 8, class: _ } + - { id: 9, class: _ } + - { id: 10, class: _ } + - { id: 11, class: _ } + - { id: 12, class: _ } + - { id: 13, class: _ } + - { id: 14, class: _ } + - { id: 15, class: _ } + - { id: 16, class: _ } + - { id: 17, class: _ } + - { id: 18, class: ccr_sgpr_64 } + - { id: 19, class: _ } + - { id: 20, class: _ } + - { id: 21, class: _ } + - { id: 22, class: _ } +liveins: + - { reg: '$sgpr30_sgpr31', virtual-reg: '%3' } +frameInfo: + maxAlignment: 1 +machineFunctionInfo: + maxKernArgAlign: 1 + scratchRSrcReg: '$sgpr0_sgpr1_sgpr2_sgpr3' + frameOffsetReg: '$sgpr33' + stackPtrOffsetReg: '$sgpr32' + argumentInfo: + privateSegmentBuffer: { reg: '$sgpr0_sgpr1_sgpr2_sgpr3' } + occupancy: 10 +body: | + bb.1.entry: + liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $sgpr30_sgpr31 + + ; GFX9-LABEL: name: test_4xfloat_add_mul + ; GFX9: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $sgpr30_sgpr31 + ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; GFX9: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 + ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) + ; GFX9: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 + ; GFX9: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 + ; GFX9: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6 + ; GFX9: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7 + ; GFX9: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) + ; GFX9: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8 + ; GFX9: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr9 + ; GFX9: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr10 + ; GFX9: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr11 + ; GFX9: [[BUILD_VECTOR2:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) + ; GFX9: [[COPY12:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; GFX9: [[FMA:%[0-9]+]]:_(<4 x s32>) = G_FMA [[BUILD_VECTOR]], [[BUILD_VECTOR1]], [[BUILD_VECTOR2]] + ; GFX9: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FMA]](<4 x s32>) + ; GFX9: $vgpr0 = COPY [[UV]](s32) + ; GFX9: $vgpr1 = COPY [[UV1]](s32) + ; GFX9: $vgpr2 = COPY [[UV2]](s32) + ; GFX9: $vgpr3 = COPY [[UV3]](s32) + ; GFX9: [[COPY13:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY12]] + ; GFX9: S_SETPC_B64_return [[COPY13]], implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 + ; GFX9-CONTRACT-LABEL: name: test_4xfloat_add_mul + ; GFX9-CONTRACT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $sgpr30_sgpr31 + ; GFX9-CONTRACT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX9-CONTRACT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX9-CONTRACT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; GFX9-CONTRACT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 + ; GFX9-CONTRACT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) + ; GFX9-CONTRACT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 + ; GFX9-CONTRACT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 + ; GFX9-CONTRACT: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6 + ; GFX9-CONTRACT: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7 + ; GFX9-CONTRACT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) + ; GFX9-CONTRACT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8 + ; GFX9-CONTRACT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr9 + ; GFX9-CONTRACT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr10 + ; GFX9-CONTRACT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr11 + ; GFX9-CONTRACT: [[BUILD_VECTOR2:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) + ; GFX9-CONTRACT: [[COPY12:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; GFX9-CONTRACT: [[FMA:%[0-9]+]]:_(<4 x s32>) = G_FMA [[BUILD_VECTOR]], [[BUILD_VECTOR1]], [[BUILD_VECTOR2]] + ; GFX9-CONTRACT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FMA]](<4 x s32>) + ; GFX9-CONTRACT: $vgpr0 = COPY [[UV]](s32) + ; GFX9-CONTRACT: $vgpr1 = COPY [[UV1]](s32) + ; GFX9-CONTRACT: $vgpr2 = COPY [[UV2]](s32) + ; GFX9-CONTRACT: $vgpr3 = COPY [[UV3]](s32) + ; GFX9-CONTRACT: [[COPY13:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY12]] + ; GFX9-CONTRACT: S_SETPC_B64_return [[COPY13]], implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 + ; GFX9-DENORM-LABEL: name: test_4xfloat_add_mul + ; GFX9-DENORM: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $sgpr30_sgpr31 + ; GFX9-DENORM: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX9-DENORM: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX9-DENORM: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; GFX9-DENORM: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 + ; GFX9-DENORM: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) + ; GFX9-DENORM: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 + ; GFX9-DENORM: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 + ; GFX9-DENORM: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6 + ; GFX9-DENORM: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7 + ; GFX9-DENORM: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) + ; GFX9-DENORM: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8 + ; GFX9-DENORM: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr9 + ; GFX9-DENORM: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr10 + ; GFX9-DENORM: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr11 + ; GFX9-DENORM: [[BUILD_VECTOR2:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) + ; GFX9-DENORM: [[COPY12:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; GFX9-DENORM: [[FMA:%[0-9]+]]:_(<4 x s32>) = G_FMA [[BUILD_VECTOR]], [[BUILD_VECTOR1]], [[BUILD_VECTOR2]] + ; GFX9-DENORM: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FMA]](<4 x s32>) + ; GFX9-DENORM: $vgpr0 = COPY [[UV]](s32) + ; GFX9-DENORM: $vgpr1 = COPY [[UV1]](s32) + ; GFX9-DENORM: $vgpr2 = COPY [[UV2]](s32) + ; GFX9-DENORM: $vgpr3 = COPY [[UV3]](s32) + ; GFX9-DENORM: [[COPY13:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY12]] + ; GFX9-DENORM: S_SETPC_B64_return [[COPY13]], implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 + ; GFX10-LABEL: name: test_4xfloat_add_mul + ; GFX10: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $sgpr30_sgpr31 + ; GFX10: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX10: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX10: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; GFX10: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 + ; GFX10: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) + ; GFX10: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 + ; GFX10: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 + ; GFX10: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6 + ; GFX10: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7 + ; GFX10: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) + ; GFX10: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8 + ; GFX10: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr9 + ; GFX10: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr10 + ; GFX10: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr11 + ; GFX10: [[BUILD_VECTOR2:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) + ; GFX10: [[COPY12:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; GFX10: [[FMA:%[0-9]+]]:_(<4 x s32>) = G_FMA [[BUILD_VECTOR]], [[BUILD_VECTOR1]], [[BUILD_VECTOR2]] + ; GFX10: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FMA]](<4 x s32>) + ; GFX10: $vgpr0 = COPY [[UV]](s32) + ; GFX10: $vgpr1 = COPY [[UV1]](s32) + ; GFX10: $vgpr2 = COPY [[UV2]](s32) + ; GFX10: $vgpr3 = COPY [[UV3]](s32) + ; GFX10: [[COPY13:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY12]] + ; GFX10: S_SETPC_B64_return [[COPY13]], implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 + ; GFX10-CONTRACT-LABEL: name: test_4xfloat_add_mul + ; GFX10-CONTRACT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $sgpr30_sgpr31 + ; GFX10-CONTRACT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX10-CONTRACT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX10-CONTRACT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; GFX10-CONTRACT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 + ; GFX10-CONTRACT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) + ; GFX10-CONTRACT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 + ; GFX10-CONTRACT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 + ; GFX10-CONTRACT: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6 + ; GFX10-CONTRACT: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7 + ; GFX10-CONTRACT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) + ; GFX10-CONTRACT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8 + ; GFX10-CONTRACT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr9 + ; GFX10-CONTRACT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr10 + ; GFX10-CONTRACT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr11 + ; GFX10-CONTRACT: [[BUILD_VECTOR2:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) + ; GFX10-CONTRACT: [[COPY12:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; GFX10-CONTRACT: [[FMA:%[0-9]+]]:_(<4 x s32>) = G_FMA [[BUILD_VECTOR]], [[BUILD_VECTOR1]], [[BUILD_VECTOR2]] + ; GFX10-CONTRACT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FMA]](<4 x s32>) + ; GFX10-CONTRACT: $vgpr0 = COPY [[UV]](s32) + ; GFX10-CONTRACT: $vgpr1 = COPY [[UV1]](s32) + ; GFX10-CONTRACT: $vgpr2 = COPY [[UV2]](s32) + ; GFX10-CONTRACT: $vgpr3 = COPY [[UV3]](s32) + ; GFX10-CONTRACT: [[COPY13:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY12]] + ; GFX10-CONTRACT: S_SETPC_B64_return [[COPY13]], implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 + ; GFX10-DENORM-LABEL: name: test_4xfloat_add_mul + ; GFX10-DENORM: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $sgpr30_sgpr31 + ; GFX10-DENORM: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX10-DENORM: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX10-DENORM: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; GFX10-DENORM: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 + ; GFX10-DENORM: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) + ; GFX10-DENORM: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 + ; GFX10-DENORM: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 + ; GFX10-DENORM: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6 + ; GFX10-DENORM: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7 + ; GFX10-DENORM: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) + ; GFX10-DENORM: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8 + ; GFX10-DENORM: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr9 + ; GFX10-DENORM: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr10 + ; GFX10-DENORM: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr11 + ; GFX10-DENORM: [[BUILD_VECTOR2:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) + ; GFX10-DENORM: [[COPY12:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; GFX10-DENORM: [[FMA:%[0-9]+]]:_(<4 x s32>) = G_FMA [[BUILD_VECTOR]], [[BUILD_VECTOR1]], [[BUILD_VECTOR2]] + ; GFX10-DENORM: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FMA]](<4 x s32>) + ; GFX10-DENORM: $vgpr0 = COPY [[UV]](s32) + ; GFX10-DENORM: $vgpr1 = COPY [[UV1]](s32) + ; GFX10-DENORM: $vgpr2 = COPY [[UV2]](s32) + ; GFX10-DENORM: $vgpr3 = COPY [[UV3]](s32) + ; GFX10-DENORM: [[COPY13:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY12]] + ; GFX10-DENORM: S_SETPC_B64_return [[COPY13]], implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 + %4:_(s32) = COPY $vgpr0 + %5:_(s32) = COPY $vgpr1 + %6:_(s32) = COPY $vgpr2 + %7:_(s32) = COPY $vgpr3 + %0:_(<4 x s32>) = G_BUILD_VECTOR %4(s32), %5(s32), %6(s32), %7(s32) + %8:_(s32) = COPY $vgpr4 + %9:_(s32) = COPY $vgpr5 + %10:_(s32) = COPY $vgpr6 + %11:_(s32) = COPY $vgpr7 + %1:_(<4 x s32>) = G_BUILD_VECTOR %8(s32), %9(s32), %10(s32), %11(s32) + %12:_(s32) = COPY $vgpr8 + %13:_(s32) = COPY $vgpr9 + %14:_(s32) = COPY $vgpr10 + %15:_(s32) = COPY $vgpr11 + %2:_(<4 x s32>) = G_BUILD_VECTOR %12(s32), %13(s32), %14(s32), %15(s32) + %3:sgpr_64 = COPY $sgpr30_sgpr31 + %16:_(<4 x s32>) = reassoc G_FMUL %0, %1 + %17:_(<4 x s32>) = reassoc G_FADD %16, %2 + %19:_(s32), %20:_(s32), %21:_(s32), %22:_(s32) = G_UNMERGE_VALUES %17(<4 x s32>) + $vgpr0 = COPY %19(s32) + $vgpr1 = COPY %20(s32) + $vgpr2 = COPY %21(s32) + $vgpr3 = COPY %22(s32) + %18:ccr_sgpr_64 = COPY %3 + S_SETPC_B64_return %18, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 + +... +--- +name: test_3xfloat_add_mul_rhs +alignment: 1 +tracksRegLiveness: true +registers: + - { id: 0, class: _ } + - { id: 1, class: _ } + - { id: 2, class: _ } + - { id: 3, class: sgpr_64 } + - { id: 4, class: _ } + - { id: 5, class: _ } + - { id: 6, class: _ } + - { id: 7, class: _ } + - { id: 8, class: _ } + - { id: 9, class: _ } + - { id: 10, class: _ } + - { id: 11, class: _ } + - { id: 12, class: _ } + - { id: 13, class: _ } + - { id: 14, class: _ } + - { id: 15, class: ccr_sgpr_64 } + - { id: 16, class: _ } + - { id: 17, class: _ } + - { id: 18, class: _ } +liveins: + - { reg: '$sgpr30_sgpr31', virtual-reg: '%3' } +frameInfo: + maxAlignment: 1 +machineFunctionInfo: + maxKernArgAlign: 1 + scratchRSrcReg: '$sgpr0_sgpr1_sgpr2_sgpr3' + frameOffsetReg: '$sgpr33' + stackPtrOffsetReg: '$sgpr32' + argumentInfo: + privateSegmentBuffer: { reg: '$sgpr0_sgpr1_sgpr2_sgpr3' } + occupancy: 10 +body: | + bb.1.entry: + liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $sgpr30_sgpr31 + + ; GFX9-LABEL: name: test_3xfloat_add_mul_rhs + ; GFX9: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $sgpr30_sgpr31 + ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32) + ; GFX9: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 + ; GFX9: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 + ; GFX9: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 + ; GFX9: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32) + ; GFX9: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6 + ; GFX9: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7 + ; GFX9: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8 + ; GFX9: [[BUILD_VECTOR2:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY6]](s32), [[COPY7]](s32), [[COPY8]](s32) + ; GFX9: [[COPY9:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; GFX9: [[FMA:%[0-9]+]]:_(<3 x s32>) = G_FMA [[BUILD_VECTOR]], [[BUILD_VECTOR1]], [[BUILD_VECTOR2]] + ; GFX9: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FMA]](<3 x s32>) + ; GFX9: $vgpr0 = COPY [[UV]](s32) + ; GFX9: $vgpr1 = COPY [[UV1]](s32) + ; GFX9: $vgpr2 = COPY [[UV2]](s32) + ; GFX9: [[COPY10:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY9]] + ; GFX9: S_SETPC_B64_return [[COPY10]], implicit $vgpr0, implicit $vgpr1, implicit $vgpr2 + ; GFX9-CONTRACT-LABEL: name: test_3xfloat_add_mul_rhs + ; GFX9-CONTRACT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $sgpr30_sgpr31 + ; GFX9-CONTRACT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX9-CONTRACT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX9-CONTRACT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; GFX9-CONTRACT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32) + ; GFX9-CONTRACT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 + ; GFX9-CONTRACT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 + ; GFX9-CONTRACT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 + ; GFX9-CONTRACT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32) + ; GFX9-CONTRACT: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6 + ; GFX9-CONTRACT: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7 + ; GFX9-CONTRACT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8 + ; GFX9-CONTRACT: [[BUILD_VECTOR2:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY6]](s32), [[COPY7]](s32), [[COPY8]](s32) + ; GFX9-CONTRACT: [[COPY9:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; GFX9-CONTRACT: [[FMA:%[0-9]+]]:_(<3 x s32>) = G_FMA [[BUILD_VECTOR]], [[BUILD_VECTOR1]], [[BUILD_VECTOR2]] + ; GFX9-CONTRACT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FMA]](<3 x s32>) + ; GFX9-CONTRACT: $vgpr0 = COPY [[UV]](s32) + ; GFX9-CONTRACT: $vgpr1 = COPY [[UV1]](s32) + ; GFX9-CONTRACT: $vgpr2 = COPY [[UV2]](s32) + ; GFX9-CONTRACT: [[COPY10:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY9]] + ; GFX9-CONTRACT: S_SETPC_B64_return [[COPY10]], implicit $vgpr0, implicit $vgpr1, implicit $vgpr2 + ; GFX9-DENORM-LABEL: name: test_3xfloat_add_mul_rhs + ; GFX9-DENORM: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $sgpr30_sgpr31 + ; GFX9-DENORM: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX9-DENORM: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX9-DENORM: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; GFX9-DENORM: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32) + ; GFX9-DENORM: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 + ; GFX9-DENORM: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 + ; GFX9-DENORM: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 + ; GFX9-DENORM: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32) + ; GFX9-DENORM: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6 + ; GFX9-DENORM: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7 + ; GFX9-DENORM: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8 + ; GFX9-DENORM: [[BUILD_VECTOR2:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY6]](s32), [[COPY7]](s32), [[COPY8]](s32) + ; GFX9-DENORM: [[COPY9:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; GFX9-DENORM: [[FMA:%[0-9]+]]:_(<3 x s32>) = G_FMA [[BUILD_VECTOR]], [[BUILD_VECTOR1]], [[BUILD_VECTOR2]] + ; GFX9-DENORM: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FMA]](<3 x s32>) + ; GFX9-DENORM: $vgpr0 = COPY [[UV]](s32) + ; GFX9-DENORM: $vgpr1 = COPY [[UV1]](s32) + ; GFX9-DENORM: $vgpr2 = COPY [[UV2]](s32) + ; GFX9-DENORM: [[COPY10:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY9]] + ; GFX9-DENORM: S_SETPC_B64_return [[COPY10]], implicit $vgpr0, implicit $vgpr1, implicit $vgpr2 + ; GFX10-LABEL: name: test_3xfloat_add_mul_rhs + ; GFX10: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $sgpr30_sgpr31 + ; GFX10: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX10: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX10: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; GFX10: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32) + ; GFX10: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 + ; GFX10: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 + ; GFX10: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 + ; GFX10: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32) + ; GFX10: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6 + ; GFX10: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7 + ; GFX10: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8 + ; GFX10: [[BUILD_VECTOR2:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY6]](s32), [[COPY7]](s32), [[COPY8]](s32) + ; GFX10: [[COPY9:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; GFX10: [[FMA:%[0-9]+]]:_(<3 x s32>) = G_FMA [[BUILD_VECTOR]], [[BUILD_VECTOR1]], [[BUILD_VECTOR2]] + ; GFX10: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FMA]](<3 x s32>) + ; GFX10: $vgpr0 = COPY [[UV]](s32) + ; GFX10: $vgpr1 = COPY [[UV1]](s32) + ; GFX10: $vgpr2 = COPY [[UV2]](s32) + ; GFX10: [[COPY10:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY9]] + ; GFX10: S_SETPC_B64_return [[COPY10]], implicit $vgpr0, implicit $vgpr1, implicit $vgpr2 + ; GFX10-CONTRACT-LABEL: name: test_3xfloat_add_mul_rhs + ; GFX10-CONTRACT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $sgpr30_sgpr31 + ; GFX10-CONTRACT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX10-CONTRACT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX10-CONTRACT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; GFX10-CONTRACT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32) + ; GFX10-CONTRACT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 + ; GFX10-CONTRACT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 + ; GFX10-CONTRACT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 + ; GFX10-CONTRACT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32) + ; GFX10-CONTRACT: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6 + ; GFX10-CONTRACT: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7 + ; GFX10-CONTRACT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8 + ; GFX10-CONTRACT: [[BUILD_VECTOR2:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY6]](s32), [[COPY7]](s32), [[COPY8]](s32) + ; GFX10-CONTRACT: [[COPY9:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; GFX10-CONTRACT: [[FMA:%[0-9]+]]:_(<3 x s32>) = G_FMA [[BUILD_VECTOR]], [[BUILD_VECTOR1]], [[BUILD_VECTOR2]] + ; GFX10-CONTRACT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FMA]](<3 x s32>) + ; GFX10-CONTRACT: $vgpr0 = COPY [[UV]](s32) + ; GFX10-CONTRACT: $vgpr1 = COPY [[UV1]](s32) + ; GFX10-CONTRACT: $vgpr2 = COPY [[UV2]](s32) + ; GFX10-CONTRACT: [[COPY10:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY9]] + ; GFX10-CONTRACT: S_SETPC_B64_return [[COPY10]], implicit $vgpr0, implicit $vgpr1, implicit $vgpr2 + ; GFX10-DENORM-LABEL: name: test_3xfloat_add_mul_rhs + ; GFX10-DENORM: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $sgpr30_sgpr31 + ; GFX10-DENORM: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX10-DENORM: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX10-DENORM: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; GFX10-DENORM: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32) + ; GFX10-DENORM: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 + ; GFX10-DENORM: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 + ; GFX10-DENORM: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 + ; GFX10-DENORM: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32) + ; GFX10-DENORM: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6 + ; GFX10-DENORM: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7 + ; GFX10-DENORM: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8 + ; GFX10-DENORM: [[BUILD_VECTOR2:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY6]](s32), [[COPY7]](s32), [[COPY8]](s32) + ; GFX10-DENORM: [[COPY9:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; GFX10-DENORM: [[FMA:%[0-9]+]]:_(<3 x s32>) = G_FMA [[BUILD_VECTOR]], [[BUILD_VECTOR1]], [[BUILD_VECTOR2]] + ; GFX10-DENORM: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FMA]](<3 x s32>) + ; GFX10-DENORM: $vgpr0 = COPY [[UV]](s32) + ; GFX10-DENORM: $vgpr1 = COPY [[UV1]](s32) + ; GFX10-DENORM: $vgpr2 = COPY [[UV2]](s32) + ; GFX10-DENORM: [[COPY10:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY9]] + ; GFX10-DENORM: S_SETPC_B64_return [[COPY10]], implicit $vgpr0, implicit $vgpr1, implicit $vgpr2 + %4:_(s32) = COPY $vgpr0 + %5:_(s32) = COPY $vgpr1 + %6:_(s32) = COPY $vgpr2 + %0:_(<3 x s32>) = G_BUILD_VECTOR %4(s32), %5(s32), %6(s32) + %7:_(s32) = COPY $vgpr3 + %8:_(s32) = COPY $vgpr4 + %9:_(s32) = COPY $vgpr5 + %1:_(<3 x s32>) = G_BUILD_VECTOR %7(s32), %8(s32), %9(s32) + %10:_(s32) = COPY $vgpr6 + %11:_(s32) = COPY $vgpr7 + %12:_(s32) = COPY $vgpr8 + %2:_(<3 x s32>) = G_BUILD_VECTOR %10(s32), %11(s32), %12(s32) + %3:sgpr_64 = COPY $sgpr30_sgpr31 + %13:_(<3 x s32>) = reassoc G_FMUL %0, %1 + %14:_(<3 x s32>) = reassoc G_FADD %2, %13 + %16:_(s32), %17:_(s32), %18:_(s32) = G_UNMERGE_VALUES %14(<3 x s32>) + $vgpr0 = COPY %16(s32) + $vgpr1 = COPY %17(s32) + $vgpr2 = COPY %18(s32) + %15:ccr_sgpr_64 = COPY %3 + S_SETPC_B64_return %15, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2 + +... +--- +name: test_4xhalf_add_mul +alignment: 1 +tracksRegLiveness: true +registers: + - { id: 0, class: _ } + - { id: 1, class: _ } + - { id: 2, class: _ } + - { id: 3, class: sgpr_64 } + - { id: 4, class: _ } + - { id: 5, class: _ } + - { id: 6, class: _ } + - { id: 7, class: _ } + - { id: 8, class: _ } + - { id: 9, class: _ } + - { id: 10, class: _ } + - { id: 11, class: _ } + - { id: 12, class: ccr_sgpr_64 } + - { id: 13, class: _ } + - { id: 14, class: _ } +liveins: + - { reg: '$sgpr30_sgpr31', virtual-reg: '%3' } +frameInfo: + maxAlignment: 1 +machineFunctionInfo: + maxKernArgAlign: 1 + scratchRSrcReg: '$sgpr0_sgpr1_sgpr2_sgpr3' + frameOffsetReg: '$sgpr33' + stackPtrOffsetReg: '$sgpr32' + argumentInfo: + privateSegmentBuffer: { reg: '$sgpr0_sgpr1_sgpr2_sgpr3' } + occupancy: 10 +body: | + bb.1.entry: + liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $sgpr30_sgpr31 + + ; GFX9-LABEL: name: test_4xhalf_add_mul + ; GFX9: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $sgpr30_sgpr31 + ; GFX9: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 + ; GFX9: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 + ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[COPY]](<2 x s16>), [[COPY1]](<2 x s16>) + ; GFX9: [[COPY2:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2 + ; GFX9: [[COPY3:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr3 + ; GFX9: [[CONCAT_VECTORS1:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[COPY2]](<2 x s16>), [[COPY3]](<2 x s16>) + ; GFX9: [[COPY4:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4 + ; GFX9: [[COPY5:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr5 + ; GFX9: [[CONCAT_VECTORS2:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[COPY4]](<2 x s16>), [[COPY5]](<2 x s16>) + ; GFX9: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; GFX9: [[FMA:%[0-9]+]]:_(<4 x s16>) = G_FMA [[CONCAT_VECTORS]], [[CONCAT_VECTORS1]], [[CONCAT_VECTORS2]] + ; GFX9: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[FMA]](<4 x s16>) + ; GFX9: $vgpr0 = COPY [[UV]](<2 x s16>) + ; GFX9: $vgpr1 = COPY [[UV1]](<2 x s16>) + ; GFX9: [[COPY7:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY6]] + ; GFX9: S_SETPC_B64_return [[COPY7]], implicit $vgpr0, implicit $vgpr1 + ; GFX9-CONTRACT-LABEL: name: test_4xhalf_add_mul + ; GFX9-CONTRACT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $sgpr30_sgpr31 + ; GFX9-CONTRACT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 + ; GFX9-CONTRACT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 + ; GFX9-CONTRACT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[COPY]](<2 x s16>), [[COPY1]](<2 x s16>) + ; GFX9-CONTRACT: [[COPY2:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2 + ; GFX9-CONTRACT: [[COPY3:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr3 + ; GFX9-CONTRACT: [[CONCAT_VECTORS1:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[COPY2]](<2 x s16>), [[COPY3]](<2 x s16>) + ; GFX9-CONTRACT: [[COPY4:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4 + ; GFX9-CONTRACT: [[COPY5:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr5 + ; GFX9-CONTRACT: [[CONCAT_VECTORS2:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[COPY4]](<2 x s16>), [[COPY5]](<2 x s16>) + ; GFX9-CONTRACT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; GFX9-CONTRACT: [[FMA:%[0-9]+]]:_(<4 x s16>) = G_FMA [[CONCAT_VECTORS]], [[CONCAT_VECTORS1]], [[CONCAT_VECTORS2]] + ; GFX9-CONTRACT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[FMA]](<4 x s16>) + ; GFX9-CONTRACT: $vgpr0 = COPY [[UV]](<2 x s16>) + ; GFX9-CONTRACT: $vgpr1 = COPY [[UV1]](<2 x s16>) + ; GFX9-CONTRACT: [[COPY7:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY6]] + ; GFX9-CONTRACT: S_SETPC_B64_return [[COPY7]], implicit $vgpr0, implicit $vgpr1 + ; GFX9-DENORM-LABEL: name: test_4xhalf_add_mul + ; GFX9-DENORM: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $sgpr30_sgpr31 + ; GFX9-DENORM: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 + ; GFX9-DENORM: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 + ; GFX9-DENORM: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[COPY]](<2 x s16>), [[COPY1]](<2 x s16>) + ; GFX9-DENORM: [[COPY2:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2 + ; GFX9-DENORM: [[COPY3:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr3 + ; GFX9-DENORM: [[CONCAT_VECTORS1:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[COPY2]](<2 x s16>), [[COPY3]](<2 x s16>) + ; GFX9-DENORM: [[COPY4:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4 + ; GFX9-DENORM: [[COPY5:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr5 + ; GFX9-DENORM: [[CONCAT_VECTORS2:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[COPY4]](<2 x s16>), [[COPY5]](<2 x s16>) + ; GFX9-DENORM: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; GFX9-DENORM: [[FMA:%[0-9]+]]:_(<4 x s16>) = G_FMA [[CONCAT_VECTORS]], [[CONCAT_VECTORS1]], [[CONCAT_VECTORS2]] + ; GFX9-DENORM: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[FMA]](<4 x s16>) + ; GFX9-DENORM: $vgpr0 = COPY [[UV]](<2 x s16>) + ; GFX9-DENORM: $vgpr1 = COPY [[UV1]](<2 x s16>) + ; GFX9-DENORM: [[COPY7:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY6]] + ; GFX9-DENORM: S_SETPC_B64_return [[COPY7]], implicit $vgpr0, implicit $vgpr1 + ; GFX10-LABEL: name: test_4xhalf_add_mul + ; GFX10: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $sgpr30_sgpr31 + ; GFX10: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 + ; GFX10: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 + ; GFX10: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[COPY]](<2 x s16>), [[COPY1]](<2 x s16>) + ; GFX10: [[COPY2:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2 + ; GFX10: [[COPY3:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr3 + ; GFX10: [[CONCAT_VECTORS1:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[COPY2]](<2 x s16>), [[COPY3]](<2 x s16>) + ; GFX10: [[COPY4:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4 + ; GFX10: [[COPY5:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr5 + ; GFX10: [[CONCAT_VECTORS2:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[COPY4]](<2 x s16>), [[COPY5]](<2 x s16>) + ; GFX10: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; GFX10: [[FMA:%[0-9]+]]:_(<4 x s16>) = G_FMA [[CONCAT_VECTORS]], [[CONCAT_VECTORS1]], [[CONCAT_VECTORS2]] + ; GFX10: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[FMA]](<4 x s16>) + ; GFX10: $vgpr0 = COPY [[UV]](<2 x s16>) + ; GFX10: $vgpr1 = COPY [[UV1]](<2 x s16>) + ; GFX10: [[COPY7:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY6]] + ; GFX10: S_SETPC_B64_return [[COPY7]], implicit $vgpr0, implicit $vgpr1 + ; GFX10-CONTRACT-LABEL: name: test_4xhalf_add_mul + ; GFX10-CONTRACT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $sgpr30_sgpr31 + ; GFX10-CONTRACT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 + ; GFX10-CONTRACT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 + ; GFX10-CONTRACT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[COPY]](<2 x s16>), [[COPY1]](<2 x s16>) + ; GFX10-CONTRACT: [[COPY2:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2 + ; GFX10-CONTRACT: [[COPY3:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr3 + ; GFX10-CONTRACT: [[CONCAT_VECTORS1:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[COPY2]](<2 x s16>), [[COPY3]](<2 x s16>) + ; GFX10-CONTRACT: [[COPY4:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4 + ; GFX10-CONTRACT: [[COPY5:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr5 + ; GFX10-CONTRACT: [[CONCAT_VECTORS2:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[COPY4]](<2 x s16>), [[COPY5]](<2 x s16>) + ; GFX10-CONTRACT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; GFX10-CONTRACT: [[FMA:%[0-9]+]]:_(<4 x s16>) = G_FMA [[CONCAT_VECTORS]], [[CONCAT_VECTORS1]], [[CONCAT_VECTORS2]] + ; GFX10-CONTRACT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[FMA]](<4 x s16>) + ; GFX10-CONTRACT: $vgpr0 = COPY [[UV]](<2 x s16>) + ; GFX10-CONTRACT: $vgpr1 = COPY [[UV1]](<2 x s16>) + ; GFX10-CONTRACT: [[COPY7:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY6]] + ; GFX10-CONTRACT: S_SETPC_B64_return [[COPY7]], implicit $vgpr0, implicit $vgpr1 + ; GFX10-DENORM-LABEL: name: test_4xhalf_add_mul + ; GFX10-DENORM: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $sgpr30_sgpr31 + ; GFX10-DENORM: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 + ; GFX10-DENORM: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 + ; GFX10-DENORM: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[COPY]](<2 x s16>), [[COPY1]](<2 x s16>) + ; GFX10-DENORM: [[COPY2:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2 + ; GFX10-DENORM: [[COPY3:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr3 + ; GFX10-DENORM: [[CONCAT_VECTORS1:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[COPY2]](<2 x s16>), [[COPY3]](<2 x s16>) + ; GFX10-DENORM: [[COPY4:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4 + ; GFX10-DENORM: [[COPY5:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr5 + ; GFX10-DENORM: [[CONCAT_VECTORS2:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[COPY4]](<2 x s16>), [[COPY5]](<2 x s16>) + ; GFX10-DENORM: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; GFX10-DENORM: [[FMA:%[0-9]+]]:_(<4 x s16>) = G_FMA [[CONCAT_VECTORS]], [[CONCAT_VECTORS1]], [[CONCAT_VECTORS2]] + ; GFX10-DENORM: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[FMA]](<4 x s16>) + ; GFX10-DENORM: $vgpr0 = COPY [[UV]](<2 x s16>) + ; GFX10-DENORM: $vgpr1 = COPY [[UV1]](<2 x s16>) + ; GFX10-DENORM: [[COPY7:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY6]] + ; GFX10-DENORM: S_SETPC_B64_return [[COPY7]], implicit $vgpr0, implicit $vgpr1 + %4:_(<2 x s16>) = COPY $vgpr0 + %5:_(<2 x s16>) = COPY $vgpr1 + %0:_(<4 x s16>) = G_CONCAT_VECTORS %4(<2 x s16>), %5(<2 x s16>) + %6:_(<2 x s16>) = COPY $vgpr2 + %7:_(<2 x s16>) = COPY $vgpr3 + %1:_(<4 x s16>) = G_CONCAT_VECTORS %6(<2 x s16>), %7(<2 x s16>) + %8:_(<2 x s16>) = COPY $vgpr4 + %9:_(<2 x s16>) = COPY $vgpr5 + %2:_(<4 x s16>) = G_CONCAT_VECTORS %8(<2 x s16>), %9(<2 x s16>) + %3:sgpr_64 = COPY $sgpr30_sgpr31 + %10:_(<4 x s16>) = reassoc G_FMUL %0, %1 + %11:_(<4 x s16>) = reassoc G_FADD %10, %2 + %13:_(<2 x s16>), %14:_(<2 x s16>) = G_UNMERGE_VALUES %11(<4 x s16>) + $vgpr0 = COPY %13(<2 x s16>) + $vgpr1 = COPY %14(<2 x s16>) + %12:ccr_sgpr_64 = COPY %3 + S_SETPC_B64_return %12, implicit $vgpr0, implicit $vgpr1 + +... +--- +name: test_3xhalf_add_mul_rhs +alignment: 1 +tracksRegLiveness: true +registers: + - { id: 0, class: _ } + - { id: 1, class: _ } + - { id: 2, class: _ } + - { id: 3, class: sgpr_64 } + - { id: 4, class: _ } + - { id: 5, class: _ } + - { id: 6, class: _ } + - { id: 7, class: _ } + - { id: 8, class: _ } + - { id: 9, class: _ } + - { id: 10, class: _ } + - { id: 11, class: _ } + - { id: 12, class: _ } + - { id: 13, class: _ } + - { id: 14, class: _ } + - { id: 15, class: _ } + - { id: 16, class: _ } + - { id: 17, class: _ } + - { id: 18, class: _ } + - { id: 19, class: ccr_sgpr_64 } + - { id: 20, class: _ } + - { id: 21, class: _ } + - { id: 22, class: _ } + - { id: 23, class: _ } + - { id: 24, class: _ } +liveins: + - { reg: '$sgpr30_sgpr31', virtual-reg: '%3' } +frameInfo: + maxAlignment: 1 +machineFunctionInfo: + maxKernArgAlign: 1 + scratchRSrcReg: '$sgpr0_sgpr1_sgpr2_sgpr3' + frameOffsetReg: '$sgpr33' + stackPtrOffsetReg: '$sgpr32' + argumentInfo: + privateSegmentBuffer: { reg: '$sgpr0_sgpr1_sgpr2_sgpr3' } + occupancy: 10 +body: | + bb.1.entry: + liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $sgpr30_sgpr31 + + ; GFX9-LABEL: name: test_3xhalf_add_mul_rhs + ; GFX9: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $sgpr30_sgpr31 + ; GFX9: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 + ; GFX9: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 + ; GFX9: [[DEF:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF + ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[COPY]](<2 x s16>), [[COPY1]](<2 x s16>), [[DEF]](<2 x s16>) + ; GFX9: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[CONCAT_VECTORS]](<6 x s16>) + ; GFX9: [[TRUNC:%[0-9]+]]:_(s48) = G_TRUNC [[BITCAST]](s96) + ; GFX9: [[BITCAST1:%[0-9]+]]:_(<3 x s16>) = G_BITCAST [[TRUNC]](s48) + ; GFX9: [[COPY2:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2 + ; GFX9: [[COPY3:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr3 + ; GFX9: [[CONCAT_VECTORS1:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[COPY2]](<2 x s16>), [[COPY3]](<2 x s16>), [[DEF]](<2 x s16>) + ; GFX9: [[BITCAST2:%[0-9]+]]:_(s96) = G_BITCAST [[CONCAT_VECTORS1]](<6 x s16>) + ; GFX9: [[TRUNC1:%[0-9]+]]:_(s48) = G_TRUNC [[BITCAST2]](s96) + ; GFX9: [[BITCAST3:%[0-9]+]]:_(<3 x s16>) = G_BITCAST [[TRUNC1]](s48) + ; GFX9: [[COPY4:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4 + ; GFX9: [[COPY5:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr5 + ; GFX9: [[CONCAT_VECTORS2:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[COPY4]](<2 x s16>), [[COPY5]](<2 x s16>), [[DEF]](<2 x s16>) + ; GFX9: [[BITCAST4:%[0-9]+]]:_(s96) = G_BITCAST [[CONCAT_VECTORS2]](<6 x s16>) + ; GFX9: [[TRUNC2:%[0-9]+]]:_(s48) = G_TRUNC [[BITCAST4]](s96) + ; GFX9: [[BITCAST5:%[0-9]+]]:_(<3 x s16>) = G_BITCAST [[TRUNC2]](s48) + ; GFX9: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; GFX9: [[FMA:%[0-9]+]]:_(<3 x s16>) = G_FMA [[BITCAST1]], [[BITCAST3]], [[BITCAST5]] + ; GFX9: [[DEF1:%[0-9]+]]:_(<3 x s16>) = G_IMPLICIT_DEF + ; GFX9: [[CONCAT_VECTORS3:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[FMA]](<3 x s16>), [[DEF1]](<3 x s16>) + ; GFX9: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS3]](<6 x s16>) + ; GFX9: $vgpr0 = COPY [[UV]](<2 x s16>) + ; GFX9: $vgpr1 = COPY [[UV1]](<2 x s16>) + ; GFX9: [[COPY7:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY6]] + ; GFX9: S_SETPC_B64_return [[COPY7]], implicit $vgpr0, implicit $vgpr1 + ; GFX9-CONTRACT-LABEL: name: test_3xhalf_add_mul_rhs + ; GFX9-CONTRACT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $sgpr30_sgpr31 + ; GFX9-CONTRACT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 + ; GFX9-CONTRACT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 + ; GFX9-CONTRACT: [[DEF:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF + ; GFX9-CONTRACT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[COPY]](<2 x s16>), [[COPY1]](<2 x s16>), [[DEF]](<2 x s16>) + ; GFX9-CONTRACT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[CONCAT_VECTORS]](<6 x s16>) + ; GFX9-CONTRACT: [[TRUNC:%[0-9]+]]:_(s48) = G_TRUNC [[BITCAST]](s96) + ; GFX9-CONTRACT: [[BITCAST1:%[0-9]+]]:_(<3 x s16>) = G_BITCAST [[TRUNC]](s48) + ; GFX9-CONTRACT: [[COPY2:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2 + ; GFX9-CONTRACT: [[COPY3:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr3 + ; GFX9-CONTRACT: [[CONCAT_VECTORS1:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[COPY2]](<2 x s16>), [[COPY3]](<2 x s16>), [[DEF]](<2 x s16>) + ; GFX9-CONTRACT: [[BITCAST2:%[0-9]+]]:_(s96) = G_BITCAST [[CONCAT_VECTORS1]](<6 x s16>) + ; GFX9-CONTRACT: [[TRUNC1:%[0-9]+]]:_(s48) = G_TRUNC [[BITCAST2]](s96) + ; GFX9-CONTRACT: [[BITCAST3:%[0-9]+]]:_(<3 x s16>) = G_BITCAST [[TRUNC1]](s48) + ; GFX9-CONTRACT: [[COPY4:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4 + ; GFX9-CONTRACT: [[COPY5:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr5 + ; GFX9-CONTRACT: [[CONCAT_VECTORS2:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[COPY4]](<2 x s16>), [[COPY5]](<2 x s16>), [[DEF]](<2 x s16>) + ; GFX9-CONTRACT: [[BITCAST4:%[0-9]+]]:_(s96) = G_BITCAST [[CONCAT_VECTORS2]](<6 x s16>) + ; GFX9-CONTRACT: [[TRUNC2:%[0-9]+]]:_(s48) = G_TRUNC [[BITCAST4]](s96) + ; GFX9-CONTRACT: [[BITCAST5:%[0-9]+]]:_(<3 x s16>) = G_BITCAST [[TRUNC2]](s48) + ; GFX9-CONTRACT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; GFX9-CONTRACT: [[FMA:%[0-9]+]]:_(<3 x s16>) = G_FMA [[BITCAST1]], [[BITCAST3]], [[BITCAST5]] + ; GFX9-CONTRACT: [[DEF1:%[0-9]+]]:_(<3 x s16>) = G_IMPLICIT_DEF + ; GFX9-CONTRACT: [[CONCAT_VECTORS3:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[FMA]](<3 x s16>), [[DEF1]](<3 x s16>) + ; GFX9-CONTRACT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS3]](<6 x s16>) + ; GFX9-CONTRACT: $vgpr0 = COPY [[UV]](<2 x s16>) + ; GFX9-CONTRACT: $vgpr1 = COPY [[UV1]](<2 x s16>) + ; GFX9-CONTRACT: [[COPY7:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY6]] + ; GFX9-CONTRACT: S_SETPC_B64_return [[COPY7]], implicit $vgpr0, implicit $vgpr1 + ; GFX9-DENORM-LABEL: name: test_3xhalf_add_mul_rhs + ; GFX9-DENORM: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $sgpr30_sgpr31 + ; GFX9-DENORM: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 + ; GFX9-DENORM: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 + ; GFX9-DENORM: [[DEF:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF + ; GFX9-DENORM: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[COPY]](<2 x s16>), [[COPY1]](<2 x s16>), [[DEF]](<2 x s16>) + ; GFX9-DENORM: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[CONCAT_VECTORS]](<6 x s16>) + ; GFX9-DENORM: [[TRUNC:%[0-9]+]]:_(s48) = G_TRUNC [[BITCAST]](s96) + ; GFX9-DENORM: [[BITCAST1:%[0-9]+]]:_(<3 x s16>) = G_BITCAST [[TRUNC]](s48) + ; GFX9-DENORM: [[COPY2:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2 + ; GFX9-DENORM: [[COPY3:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr3 + ; GFX9-DENORM: [[CONCAT_VECTORS1:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[COPY2]](<2 x s16>), [[COPY3]](<2 x s16>), [[DEF]](<2 x s16>) + ; GFX9-DENORM: [[BITCAST2:%[0-9]+]]:_(s96) = G_BITCAST [[CONCAT_VECTORS1]](<6 x s16>) + ; GFX9-DENORM: [[TRUNC1:%[0-9]+]]:_(s48) = G_TRUNC [[BITCAST2]](s96) + ; GFX9-DENORM: [[BITCAST3:%[0-9]+]]:_(<3 x s16>) = G_BITCAST [[TRUNC1]](s48) + ; GFX9-DENORM: [[COPY4:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4 + ; GFX9-DENORM: [[COPY5:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr5 + ; GFX9-DENORM: [[CONCAT_VECTORS2:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[COPY4]](<2 x s16>), [[COPY5]](<2 x s16>), [[DEF]](<2 x s16>) + ; GFX9-DENORM: [[BITCAST4:%[0-9]+]]:_(s96) = G_BITCAST [[CONCAT_VECTORS2]](<6 x s16>) + ; GFX9-DENORM: [[TRUNC2:%[0-9]+]]:_(s48) = G_TRUNC [[BITCAST4]](s96) + ; GFX9-DENORM: [[BITCAST5:%[0-9]+]]:_(<3 x s16>) = G_BITCAST [[TRUNC2]](s48) + ; GFX9-DENORM: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; GFX9-DENORM: [[FMA:%[0-9]+]]:_(<3 x s16>) = G_FMA [[BITCAST1]], [[BITCAST3]], [[BITCAST5]] + ; GFX9-DENORM: [[DEF1:%[0-9]+]]:_(<3 x s16>) = G_IMPLICIT_DEF + ; GFX9-DENORM: [[CONCAT_VECTORS3:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[FMA]](<3 x s16>), [[DEF1]](<3 x s16>) + ; GFX9-DENORM: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS3]](<6 x s16>) + ; GFX9-DENORM: $vgpr0 = COPY [[UV]](<2 x s16>) + ; GFX9-DENORM: $vgpr1 = COPY [[UV1]](<2 x s16>) + ; GFX9-DENORM: [[COPY7:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY6]] + ; GFX9-DENORM: S_SETPC_B64_return [[COPY7]], implicit $vgpr0, implicit $vgpr1 + ; GFX10-LABEL: name: test_3xhalf_add_mul_rhs + ; GFX10: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $sgpr30_sgpr31 + ; GFX10: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 + ; GFX10: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 + ; GFX10: [[DEF:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF + ; GFX10: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[COPY]](<2 x s16>), [[COPY1]](<2 x s16>), [[DEF]](<2 x s16>) + ; GFX10: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[CONCAT_VECTORS]](<6 x s16>) + ; GFX10: [[TRUNC:%[0-9]+]]:_(s48) = G_TRUNC [[BITCAST]](s96) + ; GFX10: [[BITCAST1:%[0-9]+]]:_(<3 x s16>) = G_BITCAST [[TRUNC]](s48) + ; GFX10: [[COPY2:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2 + ; GFX10: [[COPY3:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr3 + ; GFX10: [[CONCAT_VECTORS1:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[COPY2]](<2 x s16>), [[COPY3]](<2 x s16>), [[DEF]](<2 x s16>) + ; GFX10: [[BITCAST2:%[0-9]+]]:_(s96) = G_BITCAST [[CONCAT_VECTORS1]](<6 x s16>) + ; GFX10: [[TRUNC1:%[0-9]+]]:_(s48) = G_TRUNC [[BITCAST2]](s96) + ; GFX10: [[BITCAST3:%[0-9]+]]:_(<3 x s16>) = G_BITCAST [[TRUNC1]](s48) + ; GFX10: [[COPY4:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4 + ; GFX10: [[COPY5:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr5 + ; GFX10: [[CONCAT_VECTORS2:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[COPY4]](<2 x s16>), [[COPY5]](<2 x s16>), [[DEF]](<2 x s16>) + ; GFX10: [[BITCAST4:%[0-9]+]]:_(s96) = G_BITCAST [[CONCAT_VECTORS2]](<6 x s16>) + ; GFX10: [[TRUNC2:%[0-9]+]]:_(s48) = G_TRUNC [[BITCAST4]](s96) + ; GFX10: [[BITCAST5:%[0-9]+]]:_(<3 x s16>) = G_BITCAST [[TRUNC2]](s48) + ; GFX10: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; GFX10: [[FMA:%[0-9]+]]:_(<3 x s16>) = G_FMA [[BITCAST1]], [[BITCAST3]], [[BITCAST5]] + ; GFX10: [[DEF1:%[0-9]+]]:_(<3 x s16>) = G_IMPLICIT_DEF + ; GFX10: [[CONCAT_VECTORS3:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[FMA]](<3 x s16>), [[DEF1]](<3 x s16>) + ; GFX10: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS3]](<6 x s16>) + ; GFX10: $vgpr0 = COPY [[UV]](<2 x s16>) + ; GFX10: $vgpr1 = COPY [[UV1]](<2 x s16>) + ; GFX10: [[COPY7:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY6]] + ; GFX10: S_SETPC_B64_return [[COPY7]], implicit $vgpr0, implicit $vgpr1 + ; GFX10-CONTRACT-LABEL: name: test_3xhalf_add_mul_rhs + ; GFX10-CONTRACT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $sgpr30_sgpr31 + ; GFX10-CONTRACT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 + ; GFX10-CONTRACT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 + ; GFX10-CONTRACT: [[DEF:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF + ; GFX10-CONTRACT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[COPY]](<2 x s16>), [[COPY1]](<2 x s16>), [[DEF]](<2 x s16>) + ; GFX10-CONTRACT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[CONCAT_VECTORS]](<6 x s16>) + ; GFX10-CONTRACT: [[TRUNC:%[0-9]+]]:_(s48) = G_TRUNC [[BITCAST]](s96) + ; GFX10-CONTRACT: [[BITCAST1:%[0-9]+]]:_(<3 x s16>) = G_BITCAST [[TRUNC]](s48) + ; GFX10-CONTRACT: [[COPY2:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2 + ; GFX10-CONTRACT: [[COPY3:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr3 + ; GFX10-CONTRACT: [[CONCAT_VECTORS1:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[COPY2]](<2 x s16>), [[COPY3]](<2 x s16>), [[DEF]](<2 x s16>) + ; GFX10-CONTRACT: [[BITCAST2:%[0-9]+]]:_(s96) = G_BITCAST [[CONCAT_VECTORS1]](<6 x s16>) + ; GFX10-CONTRACT: [[TRUNC1:%[0-9]+]]:_(s48) = G_TRUNC [[BITCAST2]](s96) + ; GFX10-CONTRACT: [[BITCAST3:%[0-9]+]]:_(<3 x s16>) = G_BITCAST [[TRUNC1]](s48) + ; GFX10-CONTRACT: [[COPY4:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4 + ; GFX10-CONTRACT: [[COPY5:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr5 + ; GFX10-CONTRACT: [[CONCAT_VECTORS2:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[COPY4]](<2 x s16>), [[COPY5]](<2 x s16>), [[DEF]](<2 x s16>) + ; GFX10-CONTRACT: [[BITCAST4:%[0-9]+]]:_(s96) = G_BITCAST [[CONCAT_VECTORS2]](<6 x s16>) + ; GFX10-CONTRACT: [[TRUNC2:%[0-9]+]]:_(s48) = G_TRUNC [[BITCAST4]](s96) + ; GFX10-CONTRACT: [[BITCAST5:%[0-9]+]]:_(<3 x s16>) = G_BITCAST [[TRUNC2]](s48) + ; GFX10-CONTRACT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; GFX10-CONTRACT: [[FMA:%[0-9]+]]:_(<3 x s16>) = G_FMA [[BITCAST1]], [[BITCAST3]], [[BITCAST5]] + ; GFX10-CONTRACT: [[DEF1:%[0-9]+]]:_(<3 x s16>) = G_IMPLICIT_DEF + ; GFX10-CONTRACT: [[CONCAT_VECTORS3:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[FMA]](<3 x s16>), [[DEF1]](<3 x s16>) + ; GFX10-CONTRACT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS3]](<6 x s16>) + ; GFX10-CONTRACT: $vgpr0 = COPY [[UV]](<2 x s16>) + ; GFX10-CONTRACT: $vgpr1 = COPY [[UV1]](<2 x s16>) + ; GFX10-CONTRACT: [[COPY7:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY6]] + ; GFX10-CONTRACT: S_SETPC_B64_return [[COPY7]], implicit $vgpr0, implicit $vgpr1 + ; GFX10-DENORM-LABEL: name: test_3xhalf_add_mul_rhs + ; GFX10-DENORM: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $sgpr30_sgpr31 + ; GFX10-DENORM: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 + ; GFX10-DENORM: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 + ; GFX10-DENORM: [[DEF:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF + ; GFX10-DENORM: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[COPY]](<2 x s16>), [[COPY1]](<2 x s16>), [[DEF]](<2 x s16>) + ; GFX10-DENORM: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[CONCAT_VECTORS]](<6 x s16>) + ; GFX10-DENORM: [[TRUNC:%[0-9]+]]:_(s48) = G_TRUNC [[BITCAST]](s96) + ; GFX10-DENORM: [[BITCAST1:%[0-9]+]]:_(<3 x s16>) = G_BITCAST [[TRUNC]](s48) + ; GFX10-DENORM: [[COPY2:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2 + ; GFX10-DENORM: [[COPY3:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr3 + ; GFX10-DENORM: [[CONCAT_VECTORS1:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[COPY2]](<2 x s16>), [[COPY3]](<2 x s16>), [[DEF]](<2 x s16>) + ; GFX10-DENORM: [[BITCAST2:%[0-9]+]]:_(s96) = G_BITCAST [[CONCAT_VECTORS1]](<6 x s16>) + ; GFX10-DENORM: [[TRUNC1:%[0-9]+]]:_(s48) = G_TRUNC [[BITCAST2]](s96) + ; GFX10-DENORM: [[BITCAST3:%[0-9]+]]:_(<3 x s16>) = G_BITCAST [[TRUNC1]](s48) + ; GFX10-DENORM: [[COPY4:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4 + ; GFX10-DENORM: [[COPY5:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr5 + ; GFX10-DENORM: [[CONCAT_VECTORS2:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[COPY4]](<2 x s16>), [[COPY5]](<2 x s16>), [[DEF]](<2 x s16>) + ; GFX10-DENORM: [[BITCAST4:%[0-9]+]]:_(s96) = G_BITCAST [[CONCAT_VECTORS2]](<6 x s16>) + ; GFX10-DENORM: [[TRUNC2:%[0-9]+]]:_(s48) = G_TRUNC [[BITCAST4]](s96) + ; GFX10-DENORM: [[BITCAST5:%[0-9]+]]:_(<3 x s16>) = G_BITCAST [[TRUNC2]](s48) + ; GFX10-DENORM: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; GFX10-DENORM: [[FMA:%[0-9]+]]:_(<3 x s16>) = G_FMA [[BITCAST1]], [[BITCAST3]], [[BITCAST5]] + ; GFX10-DENORM: [[DEF1:%[0-9]+]]:_(<3 x s16>) = G_IMPLICIT_DEF + ; GFX10-DENORM: [[CONCAT_VECTORS3:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[FMA]](<3 x s16>), [[DEF1]](<3 x s16>) + ; GFX10-DENORM: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS3]](<6 x s16>) + ; GFX10-DENORM: $vgpr0 = COPY [[UV]](<2 x s16>) + ; GFX10-DENORM: $vgpr1 = COPY [[UV1]](<2 x s16>) + ; GFX10-DENORM: [[COPY7:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY6]] + ; GFX10-DENORM: S_SETPC_B64_return [[COPY7]], implicit $vgpr0, implicit $vgpr1 + %4:_(<2 x s16>) = COPY $vgpr0 + %5:_(<2 x s16>) = COPY $vgpr1 + %10:_(<2 x s16>) = G_IMPLICIT_DEF + %11:_(<6 x s16>) = G_CONCAT_VECTORS %4(<2 x s16>), %5(<2 x s16>), %10(<2 x s16>) + %0:_(<3 x s16>), %12:_(<3 x s16>) = G_UNMERGE_VALUES %11(<6 x s16>) + %6:_(<2 x s16>) = COPY $vgpr2 + %7:_(<2 x s16>) = COPY $vgpr3 + %13:_(<6 x s16>) = G_CONCAT_VECTORS %6(<2 x s16>), %7(<2 x s16>), %10(<2 x s16>) + %1:_(<3 x s16>), %14:_(<3 x s16>) = G_UNMERGE_VALUES %13(<6 x s16>) + %8:_(<2 x s16>) = COPY $vgpr4 + %9:_(<2 x s16>) = COPY $vgpr5 + %15:_(<6 x s16>) = G_CONCAT_VECTORS %8(<2 x s16>), %9(<2 x s16>), %10(<2 x s16>) + %2:_(<3 x s16>), %16:_(<3 x s16>) = G_UNMERGE_VALUES %15(<6 x s16>) + %3:sgpr_64 = COPY $sgpr30_sgpr31 + %17:_(<3 x s16>) = reassoc G_FMUL %0, %1 + %18:_(<3 x s16>) = reassoc G_FADD %2, %17 + %22:_(<3 x s16>) = G_IMPLICIT_DEF + %23:_(<6 x s16>) = G_CONCAT_VECTORS %18(<3 x s16>), %22(<3 x s16>) + %20:_(<2 x s16>), %21:_(<2 x s16>), %24:_(<2 x s16>) = G_UNMERGE_VALUES %23(<6 x s16>) + $vgpr0 = COPY %20(<2 x s16>) + $vgpr1 = COPY %21(<2 x s16>) + %19:ccr_sgpr_64 = COPY %3 + S_SETPC_B64_return %19, implicit $vgpr0, implicit $vgpr1 + +... +--- +name: test_4xdouble_add_mul +alignment: 1 +tracksRegLiveness: true +registers: + - { id: 0, class: _ } + - { id: 1, class: _ } + - { id: 2, class: _ } + - { id: 3, class: sgpr_64 } + - { id: 4, class: _ } + - { id: 5, class: _ } + - { id: 6, class: _ } + - { id: 7, class: _ } + - { id: 8, class: _ } + - { id: 9, class: _ } + - { id: 10, class: _ } + - { id: 11, class: _ } + - { id: 12, class: _ } + - { id: 13, class: _ } + - { id: 14, class: _ } + - { id: 15, class: _ } + - { id: 16, class: _ } + - { id: 17, class: _ } + - { id: 18, class: _ } + - { id: 19, class: _ } + - { id: 20, class: _ } + - { id: 21, class: _ } + - { id: 22, class: _ } + - { id: 23, class: _ } + - { id: 24, class: _ } + - { id: 25, class: _ } + - { id: 26, class: _ } + - { id: 27, class: _ } + - { id: 28, class: _ } + - { id: 29, class: _ } + - { id: 30, class: _ } + - { id: 31, class: _ } + - { id: 32, class: _ } + - { id: 33, class: _ } + - { id: 34, class: _ } + - { id: 35, class: _ } + - { id: 36, class: _ } + - { id: 37, class: _ } + - { id: 38, class: _ } + - { id: 39, class: _ } + - { id: 40, class: _ } + - { id: 41, class: _ } + - { id: 42, class: ccr_sgpr_64 } + - { id: 43, class: _ } + - { id: 44, class: _ } + - { id: 45, class: _ } + - { id: 46, class: _ } + - { id: 47, class: _ } + - { id: 48, class: _ } + - { id: 49, class: _ } + - { id: 50, class: _ } +liveins: + - { reg: '$sgpr30_sgpr31', virtual-reg: '%3' } +frameInfo: + maxAlignment: 1 +machineFunctionInfo: + maxKernArgAlign: 1 + scratchRSrcReg: '$sgpr0_sgpr1_sgpr2_sgpr3' + frameOffsetReg: '$sgpr33' + stackPtrOffsetReg: '$sgpr32' + argumentInfo: + privateSegmentBuffer: { reg: '$sgpr0_sgpr1_sgpr2_sgpr3' } + occupancy: 10 +body: | + bb.1.entry: + liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $sgpr30_sgpr31 + + ; GFX9-LABEL: name: test_4xdouble_add_mul + ; GFX9: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $sgpr30_sgpr31 + ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; GFX9: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 + ; GFX9: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 + ; GFX9: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 + ; GFX9: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6 + ; GFX9: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7 + ; GFX9: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) + ; GFX9: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) + ; GFX9: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32) + ; GFX9: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY6]](s32), [[COPY7]](s32) + ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64), [[MV2]](s64), [[MV3]](s64) + ; GFX9: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8 + ; GFX9: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr9 + ; GFX9: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr10 + ; GFX9: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr11 + ; GFX9: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr12 + ; GFX9: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr13 + ; GFX9: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr14 + ; GFX9: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr15 + ; GFX9: [[MV4:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY8]](s32), [[COPY9]](s32) + ; GFX9: [[MV5:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY10]](s32), [[COPY11]](s32) + ; GFX9: [[MV6:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY12]](s32), [[COPY13]](s32) + ; GFX9: [[MV7:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY14]](s32), [[COPY15]](s32) + ; GFX9: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[MV4]](s64), [[MV5]](s64), [[MV6]](s64), [[MV7]](s64) + ; GFX9: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr16 + ; GFX9: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr17 + ; GFX9: [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr18 + ; GFX9: [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr19 + ; GFX9: [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr20 + ; GFX9: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr21 + ; GFX9: [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr22 + ; GFX9: [[COPY23:%[0-9]+]]:_(s32) = COPY $vgpr23 + ; GFX9: [[MV8:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY16]](s32), [[COPY17]](s32) + ; GFX9: [[MV9:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY18]](s32), [[COPY19]](s32) + ; GFX9: [[MV10:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY20]](s32), [[COPY21]](s32) + ; GFX9: [[MV11:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY22]](s32), [[COPY23]](s32) + ; GFX9: [[BUILD_VECTOR2:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[MV8]](s64), [[MV9]](s64), [[MV10]](s64), [[MV11]](s64) + ; GFX9: [[COPY24:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; GFX9: [[FMA:%[0-9]+]]:_(<4 x s64>) = G_FMA [[BUILD_VECTOR]], [[BUILD_VECTOR1]], [[BUILD_VECTOR2]] + ; GFX9: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FMA]](<4 x s64>) + ; GFX9: $vgpr0 = COPY [[UV]](s32) + ; GFX9: $vgpr1 = COPY [[UV1]](s32) + ; GFX9: $vgpr2 = COPY [[UV2]](s32) + ; GFX9: $vgpr3 = COPY [[UV3]](s32) + ; GFX9: $vgpr4 = COPY [[UV4]](s32) + ; GFX9: $vgpr5 = COPY [[UV5]](s32) + ; GFX9: $vgpr6 = COPY [[UV6]](s32) + ; GFX9: $vgpr7 = COPY [[UV7]](s32) + ; GFX9: [[COPY25:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY24]] + ; GFX9: S_SETPC_B64_return [[COPY25]], implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 + ; GFX9-CONTRACT-LABEL: name: test_4xdouble_add_mul + ; GFX9-CONTRACT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $sgpr30_sgpr31 + ; GFX9-CONTRACT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX9-CONTRACT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX9-CONTRACT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; GFX9-CONTRACT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 + ; GFX9-CONTRACT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 + ; GFX9-CONTRACT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 + ; GFX9-CONTRACT: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6 + ; GFX9-CONTRACT: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7 + ; GFX9-CONTRACT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) + ; GFX9-CONTRACT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) + ; GFX9-CONTRACT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32) + ; GFX9-CONTRACT: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY6]](s32), [[COPY7]](s32) + ; GFX9-CONTRACT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64), [[MV2]](s64), [[MV3]](s64) + ; GFX9-CONTRACT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8 + ; GFX9-CONTRACT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr9 + ; GFX9-CONTRACT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr10 + ; GFX9-CONTRACT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr11 + ; GFX9-CONTRACT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr12 + ; GFX9-CONTRACT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr13 + ; GFX9-CONTRACT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr14 + ; GFX9-CONTRACT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr15 + ; GFX9-CONTRACT: [[MV4:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY8]](s32), [[COPY9]](s32) + ; GFX9-CONTRACT: [[MV5:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY10]](s32), [[COPY11]](s32) + ; GFX9-CONTRACT: [[MV6:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY12]](s32), [[COPY13]](s32) + ; GFX9-CONTRACT: [[MV7:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY14]](s32), [[COPY15]](s32) + ; GFX9-CONTRACT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[MV4]](s64), [[MV5]](s64), [[MV6]](s64), [[MV7]](s64) + ; GFX9-CONTRACT: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr16 + ; GFX9-CONTRACT: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr17 + ; GFX9-CONTRACT: [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr18 + ; GFX9-CONTRACT: [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr19 + ; GFX9-CONTRACT: [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr20 + ; GFX9-CONTRACT: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr21 + ; GFX9-CONTRACT: [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr22 + ; GFX9-CONTRACT: [[COPY23:%[0-9]+]]:_(s32) = COPY $vgpr23 + ; GFX9-CONTRACT: [[MV8:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY16]](s32), [[COPY17]](s32) + ; GFX9-CONTRACT: [[MV9:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY18]](s32), [[COPY19]](s32) + ; GFX9-CONTRACT: [[MV10:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY20]](s32), [[COPY21]](s32) + ; GFX9-CONTRACT: [[MV11:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY22]](s32), [[COPY23]](s32) + ; GFX9-CONTRACT: [[BUILD_VECTOR2:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[MV8]](s64), [[MV9]](s64), [[MV10]](s64), [[MV11]](s64) + ; GFX9-CONTRACT: [[COPY24:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; GFX9-CONTRACT: [[FMA:%[0-9]+]]:_(<4 x s64>) = G_FMA [[BUILD_VECTOR]], [[BUILD_VECTOR1]], [[BUILD_VECTOR2]] + ; GFX9-CONTRACT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FMA]](<4 x s64>) + ; GFX9-CONTRACT: $vgpr0 = COPY [[UV]](s32) + ; GFX9-CONTRACT: $vgpr1 = COPY [[UV1]](s32) + ; GFX9-CONTRACT: $vgpr2 = COPY [[UV2]](s32) + ; GFX9-CONTRACT: $vgpr3 = COPY [[UV3]](s32) + ; GFX9-CONTRACT: $vgpr4 = COPY [[UV4]](s32) + ; GFX9-CONTRACT: $vgpr5 = COPY [[UV5]](s32) + ; GFX9-CONTRACT: $vgpr6 = COPY [[UV6]](s32) + ; GFX9-CONTRACT: $vgpr7 = COPY [[UV7]](s32) + ; GFX9-CONTRACT: [[COPY25:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY24]] + ; GFX9-CONTRACT: S_SETPC_B64_return [[COPY25]], implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 + ; GFX9-DENORM-LABEL: name: test_4xdouble_add_mul + ; GFX9-DENORM: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $sgpr30_sgpr31 + ; GFX9-DENORM: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX9-DENORM: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX9-DENORM: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; GFX9-DENORM: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 + ; GFX9-DENORM: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 + ; GFX9-DENORM: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 + ; GFX9-DENORM: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6 + ; GFX9-DENORM: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7 + ; GFX9-DENORM: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) + ; GFX9-DENORM: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) + ; GFX9-DENORM: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32) + ; GFX9-DENORM: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY6]](s32), [[COPY7]](s32) + ; GFX9-DENORM: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64), [[MV2]](s64), [[MV3]](s64) + ; GFX9-DENORM: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8 + ; GFX9-DENORM: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr9 + ; GFX9-DENORM: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr10 + ; GFX9-DENORM: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr11 + ; GFX9-DENORM: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr12 + ; GFX9-DENORM: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr13 + ; GFX9-DENORM: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr14 + ; GFX9-DENORM: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr15 + ; GFX9-DENORM: [[MV4:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY8]](s32), [[COPY9]](s32) + ; GFX9-DENORM: [[MV5:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY10]](s32), [[COPY11]](s32) + ; GFX9-DENORM: [[MV6:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY12]](s32), [[COPY13]](s32) + ; GFX9-DENORM: [[MV7:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY14]](s32), [[COPY15]](s32) + ; GFX9-DENORM: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[MV4]](s64), [[MV5]](s64), [[MV6]](s64), [[MV7]](s64) + ; GFX9-DENORM: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr16 + ; GFX9-DENORM: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr17 + ; GFX9-DENORM: [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr18 + ; GFX9-DENORM: [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr19 + ; GFX9-DENORM: [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr20 + ; GFX9-DENORM: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr21 + ; GFX9-DENORM: [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr22 + ; GFX9-DENORM: [[COPY23:%[0-9]+]]:_(s32) = COPY $vgpr23 + ; GFX9-DENORM: [[MV8:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY16]](s32), [[COPY17]](s32) + ; GFX9-DENORM: [[MV9:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY18]](s32), [[COPY19]](s32) + ; GFX9-DENORM: [[MV10:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY20]](s32), [[COPY21]](s32) + ; GFX9-DENORM: [[MV11:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY22]](s32), [[COPY23]](s32) + ; GFX9-DENORM: [[BUILD_VECTOR2:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[MV8]](s64), [[MV9]](s64), [[MV10]](s64), [[MV11]](s64) + ; GFX9-DENORM: [[COPY24:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; GFX9-DENORM: [[FMA:%[0-9]+]]:_(<4 x s64>) = G_FMA [[BUILD_VECTOR]], [[BUILD_VECTOR1]], [[BUILD_VECTOR2]] + ; GFX9-DENORM: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FMA]](<4 x s64>) + ; GFX9-DENORM: $vgpr0 = COPY [[UV]](s32) + ; GFX9-DENORM: $vgpr1 = COPY [[UV1]](s32) + ; GFX9-DENORM: $vgpr2 = COPY [[UV2]](s32) + ; GFX9-DENORM: $vgpr3 = COPY [[UV3]](s32) + ; GFX9-DENORM: $vgpr4 = COPY [[UV4]](s32) + ; GFX9-DENORM: $vgpr5 = COPY [[UV5]](s32) + ; GFX9-DENORM: $vgpr6 = COPY [[UV6]](s32) + ; GFX9-DENORM: $vgpr7 = COPY [[UV7]](s32) + ; GFX9-DENORM: [[COPY25:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY24]] + ; GFX9-DENORM: S_SETPC_B64_return [[COPY25]], implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 + ; GFX10-LABEL: name: test_4xdouble_add_mul + ; GFX10: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $sgpr30_sgpr31 + ; GFX10: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX10: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX10: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; GFX10: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 + ; GFX10: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 + ; GFX10: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 + ; GFX10: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6 + ; GFX10: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7 + ; GFX10: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) + ; GFX10: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) + ; GFX10: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32) + ; GFX10: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY6]](s32), [[COPY7]](s32) + ; GFX10: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64), [[MV2]](s64), [[MV3]](s64) + ; GFX10: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8 + ; GFX10: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr9 + ; GFX10: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr10 + ; GFX10: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr11 + ; GFX10: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr12 + ; GFX10: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr13 + ; GFX10: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr14 + ; GFX10: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr15 + ; GFX10: [[MV4:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY8]](s32), [[COPY9]](s32) + ; GFX10: [[MV5:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY10]](s32), [[COPY11]](s32) + ; GFX10: [[MV6:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY12]](s32), [[COPY13]](s32) + ; GFX10: [[MV7:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY14]](s32), [[COPY15]](s32) + ; GFX10: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[MV4]](s64), [[MV5]](s64), [[MV6]](s64), [[MV7]](s64) + ; GFX10: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr16 + ; GFX10: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr17 + ; GFX10: [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr18 + ; GFX10: [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr19 + ; GFX10: [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr20 + ; GFX10: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr21 + ; GFX10: [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr22 + ; GFX10: [[COPY23:%[0-9]+]]:_(s32) = COPY $vgpr23 + ; GFX10: [[MV8:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY16]](s32), [[COPY17]](s32) + ; GFX10: [[MV9:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY18]](s32), [[COPY19]](s32) + ; GFX10: [[MV10:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY20]](s32), [[COPY21]](s32) + ; GFX10: [[MV11:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY22]](s32), [[COPY23]](s32) + ; GFX10: [[BUILD_VECTOR2:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[MV8]](s64), [[MV9]](s64), [[MV10]](s64), [[MV11]](s64) + ; GFX10: [[COPY24:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; GFX10: [[FMA:%[0-9]+]]:_(<4 x s64>) = G_FMA [[BUILD_VECTOR]], [[BUILD_VECTOR1]], [[BUILD_VECTOR2]] + ; GFX10: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FMA]](<4 x s64>) + ; GFX10: $vgpr0 = COPY [[UV]](s32) + ; GFX10: $vgpr1 = COPY [[UV1]](s32) + ; GFX10: $vgpr2 = COPY [[UV2]](s32) + ; GFX10: $vgpr3 = COPY [[UV3]](s32) + ; GFX10: $vgpr4 = COPY [[UV4]](s32) + ; GFX10: $vgpr5 = COPY [[UV5]](s32) + ; GFX10: $vgpr6 = COPY [[UV6]](s32) + ; GFX10: $vgpr7 = COPY [[UV7]](s32) + ; GFX10: [[COPY25:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY24]] + ; GFX10: S_SETPC_B64_return [[COPY25]], implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 + ; GFX10-CONTRACT-LABEL: name: test_4xdouble_add_mul + ; GFX10-CONTRACT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $sgpr30_sgpr31 + ; GFX10-CONTRACT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX10-CONTRACT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX10-CONTRACT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; GFX10-CONTRACT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 + ; GFX10-CONTRACT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 + ; GFX10-CONTRACT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 + ; GFX10-CONTRACT: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6 + ; GFX10-CONTRACT: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7 + ; GFX10-CONTRACT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) + ; GFX10-CONTRACT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) + ; GFX10-CONTRACT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32) + ; GFX10-CONTRACT: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY6]](s32), [[COPY7]](s32) + ; GFX10-CONTRACT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64), [[MV2]](s64), [[MV3]](s64) + ; GFX10-CONTRACT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8 + ; GFX10-CONTRACT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr9 + ; GFX10-CONTRACT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr10 + ; GFX10-CONTRACT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr11 + ; GFX10-CONTRACT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr12 + ; GFX10-CONTRACT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr13 + ; GFX10-CONTRACT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr14 + ; GFX10-CONTRACT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr15 + ; GFX10-CONTRACT: [[MV4:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY8]](s32), [[COPY9]](s32) + ; GFX10-CONTRACT: [[MV5:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY10]](s32), [[COPY11]](s32) + ; GFX10-CONTRACT: [[MV6:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY12]](s32), [[COPY13]](s32) + ; GFX10-CONTRACT: [[MV7:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY14]](s32), [[COPY15]](s32) + ; GFX10-CONTRACT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[MV4]](s64), [[MV5]](s64), [[MV6]](s64), [[MV7]](s64) + ; GFX10-CONTRACT: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr16 + ; GFX10-CONTRACT: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr17 + ; GFX10-CONTRACT: [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr18 + ; GFX10-CONTRACT: [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr19 + ; GFX10-CONTRACT: [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr20 + ; GFX10-CONTRACT: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr21 + ; GFX10-CONTRACT: [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr22 + ; GFX10-CONTRACT: [[COPY23:%[0-9]+]]:_(s32) = COPY $vgpr23 + ; GFX10-CONTRACT: [[MV8:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY16]](s32), [[COPY17]](s32) + ; GFX10-CONTRACT: [[MV9:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY18]](s32), [[COPY19]](s32) + ; GFX10-CONTRACT: [[MV10:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY20]](s32), [[COPY21]](s32) + ; GFX10-CONTRACT: [[MV11:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY22]](s32), [[COPY23]](s32) + ; GFX10-CONTRACT: [[BUILD_VECTOR2:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[MV8]](s64), [[MV9]](s64), [[MV10]](s64), [[MV11]](s64) + ; GFX10-CONTRACT: [[COPY24:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; GFX10-CONTRACT: [[FMA:%[0-9]+]]:_(<4 x s64>) = G_FMA [[BUILD_VECTOR]], [[BUILD_VECTOR1]], [[BUILD_VECTOR2]] + ; GFX10-CONTRACT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FMA]](<4 x s64>) + ; GFX10-CONTRACT: $vgpr0 = COPY [[UV]](s32) + ; GFX10-CONTRACT: $vgpr1 = COPY [[UV1]](s32) + ; GFX10-CONTRACT: $vgpr2 = COPY [[UV2]](s32) + ; GFX10-CONTRACT: $vgpr3 = COPY [[UV3]](s32) + ; GFX10-CONTRACT: $vgpr4 = COPY [[UV4]](s32) + ; GFX10-CONTRACT: $vgpr5 = COPY [[UV5]](s32) + ; GFX10-CONTRACT: $vgpr6 = COPY [[UV6]](s32) + ; GFX10-CONTRACT: $vgpr7 = COPY [[UV7]](s32) + ; GFX10-CONTRACT: [[COPY25:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY24]] + ; GFX10-CONTRACT: S_SETPC_B64_return [[COPY25]], implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 + ; GFX10-DENORM-LABEL: name: test_4xdouble_add_mul + ; GFX10-DENORM: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $sgpr30_sgpr31 + ; GFX10-DENORM: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX10-DENORM: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX10-DENORM: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; GFX10-DENORM: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 + ; GFX10-DENORM: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 + ; GFX10-DENORM: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 + ; GFX10-DENORM: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6 + ; GFX10-DENORM: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7 + ; GFX10-DENORM: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) + ; GFX10-DENORM: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) + ; GFX10-DENORM: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32) + ; GFX10-DENORM: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY6]](s32), [[COPY7]](s32) + ; GFX10-DENORM: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64), [[MV2]](s64), [[MV3]](s64) + ; GFX10-DENORM: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8 + ; GFX10-DENORM: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr9 + ; GFX10-DENORM: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr10 + ; GFX10-DENORM: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr11 + ; GFX10-DENORM: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr12 + ; GFX10-DENORM: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr13 + ; GFX10-DENORM: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr14 + ; GFX10-DENORM: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr15 + ; GFX10-DENORM: [[MV4:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY8]](s32), [[COPY9]](s32) + ; GFX10-DENORM: [[MV5:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY10]](s32), [[COPY11]](s32) + ; GFX10-DENORM: [[MV6:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY12]](s32), [[COPY13]](s32) + ; GFX10-DENORM: [[MV7:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY14]](s32), [[COPY15]](s32) + ; GFX10-DENORM: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[MV4]](s64), [[MV5]](s64), [[MV6]](s64), [[MV7]](s64) + ; GFX10-DENORM: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr16 + ; GFX10-DENORM: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr17 + ; GFX10-DENORM: [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr18 + ; GFX10-DENORM: [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr19 + ; GFX10-DENORM: [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr20 + ; GFX10-DENORM: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr21 + ; GFX10-DENORM: [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr22 + ; GFX10-DENORM: [[COPY23:%[0-9]+]]:_(s32) = COPY $vgpr23 + ; GFX10-DENORM: [[MV8:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY16]](s32), [[COPY17]](s32) + ; GFX10-DENORM: [[MV9:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY18]](s32), [[COPY19]](s32) + ; GFX10-DENORM: [[MV10:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY20]](s32), [[COPY21]](s32) + ; GFX10-DENORM: [[MV11:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY22]](s32), [[COPY23]](s32) + ; GFX10-DENORM: [[BUILD_VECTOR2:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[MV8]](s64), [[MV9]](s64), [[MV10]](s64), [[MV11]](s64) + ; GFX10-DENORM: [[COPY24:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; GFX10-DENORM: [[FMA:%[0-9]+]]:_(<4 x s64>) = G_FMA [[BUILD_VECTOR]], [[BUILD_VECTOR1]], [[BUILD_VECTOR2]] + ; GFX10-DENORM: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FMA]](<4 x s64>) + ; GFX10-DENORM: $vgpr0 = COPY [[UV]](s32) + ; GFX10-DENORM: $vgpr1 = COPY [[UV1]](s32) + ; GFX10-DENORM: $vgpr2 = COPY [[UV2]](s32) + ; GFX10-DENORM: $vgpr3 = COPY [[UV3]](s32) + ; GFX10-DENORM: $vgpr4 = COPY [[UV4]](s32) + ; GFX10-DENORM: $vgpr5 = COPY [[UV5]](s32) + ; GFX10-DENORM: $vgpr6 = COPY [[UV6]](s32) + ; GFX10-DENORM: $vgpr7 = COPY [[UV7]](s32) + ; GFX10-DENORM: [[COPY25:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY24]] + ; GFX10-DENORM: S_SETPC_B64_return [[COPY25]], implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 + %4:_(s32) = COPY $vgpr0 + %5:_(s32) = COPY $vgpr1 + %6:_(s32) = COPY $vgpr2 + %7:_(s32) = COPY $vgpr3 + %8:_(s32) = COPY $vgpr4 + %9:_(s32) = COPY $vgpr5 + %10:_(s32) = COPY $vgpr6 + %11:_(s32) = COPY $vgpr7 + %28:_(s64) = G_MERGE_VALUES %4(s32), %5(s32) + %29:_(s64) = G_MERGE_VALUES %6(s32), %7(s32) + %30:_(s64) = G_MERGE_VALUES %8(s32), %9(s32) + %31:_(s64) = G_MERGE_VALUES %10(s32), %11(s32) + %0:_(<4 x s64>) = G_BUILD_VECTOR %28(s64), %29(s64), %30(s64), %31(s64) + %12:_(s32) = COPY $vgpr8 + %13:_(s32) = COPY $vgpr9 + %14:_(s32) = COPY $vgpr10 + %15:_(s32) = COPY $vgpr11 + %16:_(s32) = COPY $vgpr12 + %17:_(s32) = COPY $vgpr13 + %18:_(s32) = COPY $vgpr14 + %19:_(s32) = COPY $vgpr15 + %32:_(s64) = G_MERGE_VALUES %12(s32), %13(s32) + %33:_(s64) = G_MERGE_VALUES %14(s32), %15(s32) + %34:_(s64) = G_MERGE_VALUES %16(s32), %17(s32) + %35:_(s64) = G_MERGE_VALUES %18(s32), %19(s32) + %1:_(<4 x s64>) = G_BUILD_VECTOR %32(s64), %33(s64), %34(s64), %35(s64) + %20:_(s32) = COPY $vgpr16 + %21:_(s32) = COPY $vgpr17 + %22:_(s32) = COPY $vgpr18 + %23:_(s32) = COPY $vgpr19 + %24:_(s32) = COPY $vgpr20 + %25:_(s32) = COPY $vgpr21 + %26:_(s32) = COPY $vgpr22 + %27:_(s32) = COPY $vgpr23 + %36:_(s64) = G_MERGE_VALUES %20(s32), %21(s32) + %37:_(s64) = G_MERGE_VALUES %22(s32), %23(s32) + %38:_(s64) = G_MERGE_VALUES %24(s32), %25(s32) + %39:_(s64) = G_MERGE_VALUES %26(s32), %27(s32) + %2:_(<4 x s64>) = G_BUILD_VECTOR %36(s64), %37(s64), %38(s64), %39(s64) + %3:sgpr_64 = COPY $sgpr30_sgpr31 + %40:_(<4 x s64>) = reassoc G_FMUL %0, %1 + %41:_(<4 x s64>) = reassoc G_FADD %40, %2 + %43:_(s32), %44:_(s32), %45:_(s32), %46:_(s32), %47:_(s32), %48:_(s32), %49:_(s32), %50:_(s32) = G_UNMERGE_VALUES %41(<4 x s64>) + $vgpr0 = COPY %43(s32) + $vgpr1 = COPY %44(s32) + $vgpr2 = COPY %45(s32) + $vgpr3 = COPY %46(s32) + $vgpr4 = COPY %47(s32) + $vgpr5 = COPY %48(s32) + $vgpr6 = COPY %49(s32) + $vgpr7 = COPY %50(s32) + %42:ccr_sgpr_64 = COPY %3 + S_SETPC_B64_return %42, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 + +... +--- +name: test_3xdouble_add_mul_rhs +alignment: 1 +tracksRegLiveness: true +registers: + - { id: 0, class: _ } + - { id: 1, class: _ } + - { id: 2, class: _ } + - { id: 3, class: sgpr_64 } + - { id: 4, class: _ } + - { id: 5, class: _ } + - { id: 6, class: _ } + - { id: 7, class: _ } + - { id: 8, class: _ } + - { id: 9, class: _ } + - { id: 10, class: _ } + - { id: 11, class: _ } + - { id: 12, class: _ } + - { id: 13, class: _ } + - { id: 14, class: _ } + - { id: 15, class: _ } + - { id: 16, class: _ } + - { id: 17, class: _ } + - { id: 18, class: _ } + - { id: 19, class: _ } + - { id: 20, class: _ } + - { id: 21, class: _ } + - { id: 22, class: _ } + - { id: 23, class: _ } + - { id: 24, class: _ } + - { id: 25, class: _ } + - { id: 26, class: _ } + - { id: 27, class: _ } + - { id: 28, class: _ } + - { id: 29, class: _ } + - { id: 30, class: _ } + - { id: 31, class: _ } + - { id: 32, class: _ } + - { id: 33, class: ccr_sgpr_64 } + - { id: 34, class: _ } + - { id: 35, class: _ } + - { id: 36, class: _ } + - { id: 37, class: _ } + - { id: 38, class: _ } + - { id: 39, class: _ } +liveins: + - { reg: '$sgpr30_sgpr31', virtual-reg: '%3' } +frameInfo: + maxAlignment: 1 +machineFunctionInfo: + maxKernArgAlign: 1 + scratchRSrcReg: '$sgpr0_sgpr1_sgpr2_sgpr3' + frameOffsetReg: '$sgpr33' + stackPtrOffsetReg: '$sgpr32' + argumentInfo: + privateSegmentBuffer: { reg: '$sgpr0_sgpr1_sgpr2_sgpr3' } + occupancy: 10 +body: | + bb.1.entry: + liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $sgpr30_sgpr31 + + ; GFX9-LABEL: name: test_3xdouble_add_mul_rhs + ; GFX9: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $sgpr30_sgpr31 + ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; GFX9: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 + ; GFX9: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 + ; GFX9: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 + ; GFX9: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) + ; GFX9: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) + ; GFX9: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32) + ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64), [[MV2]](s64) + ; GFX9: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6 + ; GFX9: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7 + ; GFX9: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8 + ; GFX9: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr9 + ; GFX9: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr10 + ; GFX9: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr11 + ; GFX9: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY6]](s32), [[COPY7]](s32) + ; GFX9: [[MV4:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY8]](s32), [[COPY9]](s32) + ; GFX9: [[MV5:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY10]](s32), [[COPY11]](s32) + ; GFX9: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[MV3]](s64), [[MV4]](s64), [[MV5]](s64) + ; GFX9: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr12 + ; GFX9: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr13 + ; GFX9: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr14 + ; GFX9: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr15 + ; GFX9: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr16 + ; GFX9: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr17 + ; GFX9: [[MV6:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY12]](s32), [[COPY13]](s32) + ; GFX9: [[MV7:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY14]](s32), [[COPY15]](s32) + ; GFX9: [[MV8:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY16]](s32), [[COPY17]](s32) + ; GFX9: [[BUILD_VECTOR2:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[MV6]](s64), [[MV7]](s64), [[MV8]](s64) + ; GFX9: [[COPY18:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; GFX9: [[FMA:%[0-9]+]]:_(<3 x s64>) = G_FMA [[BUILD_VECTOR]], [[BUILD_VECTOR1]], [[BUILD_VECTOR2]] + ; GFX9: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FMA]](<3 x s64>) + ; GFX9: $vgpr0 = COPY [[UV]](s32) + ; GFX9: $vgpr1 = COPY [[UV1]](s32) + ; GFX9: $vgpr2 = COPY [[UV2]](s32) + ; GFX9: $vgpr3 = COPY [[UV3]](s32) + ; GFX9: $vgpr4 = COPY [[UV4]](s32) + ; GFX9: $vgpr5 = COPY [[UV5]](s32) + ; GFX9: [[COPY19:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY18]] + ; GFX9: S_SETPC_B64_return [[COPY19]], implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5 + ; GFX9-CONTRACT-LABEL: name: test_3xdouble_add_mul_rhs + ; GFX9-CONTRACT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $sgpr30_sgpr31 + ; GFX9-CONTRACT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX9-CONTRACT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX9-CONTRACT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; GFX9-CONTRACT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 + ; GFX9-CONTRACT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 + ; GFX9-CONTRACT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 + ; GFX9-CONTRACT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) + ; GFX9-CONTRACT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) + ; GFX9-CONTRACT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32) + ; GFX9-CONTRACT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64), [[MV2]](s64) + ; GFX9-CONTRACT: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6 + ; GFX9-CONTRACT: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7 + ; GFX9-CONTRACT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8 + ; GFX9-CONTRACT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr9 + ; GFX9-CONTRACT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr10 + ; GFX9-CONTRACT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr11 + ; GFX9-CONTRACT: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY6]](s32), [[COPY7]](s32) + ; GFX9-CONTRACT: [[MV4:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY8]](s32), [[COPY9]](s32) + ; GFX9-CONTRACT: [[MV5:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY10]](s32), [[COPY11]](s32) + ; GFX9-CONTRACT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[MV3]](s64), [[MV4]](s64), [[MV5]](s64) + ; GFX9-CONTRACT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr12 + ; GFX9-CONTRACT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr13 + ; GFX9-CONTRACT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr14 + ; GFX9-CONTRACT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr15 + ; GFX9-CONTRACT: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr16 + ; GFX9-CONTRACT: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr17 + ; GFX9-CONTRACT: [[MV6:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY12]](s32), [[COPY13]](s32) + ; GFX9-CONTRACT: [[MV7:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY14]](s32), [[COPY15]](s32) + ; GFX9-CONTRACT: [[MV8:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY16]](s32), [[COPY17]](s32) + ; GFX9-CONTRACT: [[BUILD_VECTOR2:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[MV6]](s64), [[MV7]](s64), [[MV8]](s64) + ; GFX9-CONTRACT: [[COPY18:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; GFX9-CONTRACT: [[FMA:%[0-9]+]]:_(<3 x s64>) = G_FMA [[BUILD_VECTOR]], [[BUILD_VECTOR1]], [[BUILD_VECTOR2]] + ; GFX9-CONTRACT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FMA]](<3 x s64>) + ; GFX9-CONTRACT: $vgpr0 = COPY [[UV]](s32) + ; GFX9-CONTRACT: $vgpr1 = COPY [[UV1]](s32) + ; GFX9-CONTRACT: $vgpr2 = COPY [[UV2]](s32) + ; GFX9-CONTRACT: $vgpr3 = COPY [[UV3]](s32) + ; GFX9-CONTRACT: $vgpr4 = COPY [[UV4]](s32) + ; GFX9-CONTRACT: $vgpr5 = COPY [[UV5]](s32) + ; GFX9-CONTRACT: [[COPY19:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY18]] + ; GFX9-CONTRACT: S_SETPC_B64_return [[COPY19]], implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5 + ; GFX9-DENORM-LABEL: name: test_3xdouble_add_mul_rhs + ; GFX9-DENORM: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $sgpr30_sgpr31 + ; GFX9-DENORM: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX9-DENORM: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX9-DENORM: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; GFX9-DENORM: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 + ; GFX9-DENORM: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 + ; GFX9-DENORM: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 + ; GFX9-DENORM: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) + ; GFX9-DENORM: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) + ; GFX9-DENORM: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32) + ; GFX9-DENORM: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64), [[MV2]](s64) + ; GFX9-DENORM: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6 + ; GFX9-DENORM: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7 + ; GFX9-DENORM: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8 + ; GFX9-DENORM: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr9 + ; GFX9-DENORM: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr10 + ; GFX9-DENORM: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr11 + ; GFX9-DENORM: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY6]](s32), [[COPY7]](s32) + ; GFX9-DENORM: [[MV4:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY8]](s32), [[COPY9]](s32) + ; GFX9-DENORM: [[MV5:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY10]](s32), [[COPY11]](s32) + ; GFX9-DENORM: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[MV3]](s64), [[MV4]](s64), [[MV5]](s64) + ; GFX9-DENORM: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr12 + ; GFX9-DENORM: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr13 + ; GFX9-DENORM: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr14 + ; GFX9-DENORM: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr15 + ; GFX9-DENORM: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr16 + ; GFX9-DENORM: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr17 + ; GFX9-DENORM: [[MV6:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY12]](s32), [[COPY13]](s32) + ; GFX9-DENORM: [[MV7:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY14]](s32), [[COPY15]](s32) + ; GFX9-DENORM: [[MV8:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY16]](s32), [[COPY17]](s32) + ; GFX9-DENORM: [[BUILD_VECTOR2:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[MV6]](s64), [[MV7]](s64), [[MV8]](s64) + ; GFX9-DENORM: [[COPY18:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; GFX9-DENORM: [[FMA:%[0-9]+]]:_(<3 x s64>) = G_FMA [[BUILD_VECTOR]], [[BUILD_VECTOR1]], [[BUILD_VECTOR2]] + ; GFX9-DENORM: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FMA]](<3 x s64>) + ; GFX9-DENORM: $vgpr0 = COPY [[UV]](s32) + ; GFX9-DENORM: $vgpr1 = COPY [[UV1]](s32) + ; GFX9-DENORM: $vgpr2 = COPY [[UV2]](s32) + ; GFX9-DENORM: $vgpr3 = COPY [[UV3]](s32) + ; GFX9-DENORM: $vgpr4 = COPY [[UV4]](s32) + ; GFX9-DENORM: $vgpr5 = COPY [[UV5]](s32) + ; GFX9-DENORM: [[COPY19:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY18]] + ; GFX9-DENORM: S_SETPC_B64_return [[COPY19]], implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5 + ; GFX10-LABEL: name: test_3xdouble_add_mul_rhs + ; GFX10: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $sgpr30_sgpr31 + ; GFX10: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX10: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX10: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; GFX10: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 + ; GFX10: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 + ; GFX10: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 + ; GFX10: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) + ; GFX10: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) + ; GFX10: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32) + ; GFX10: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64), [[MV2]](s64) + ; GFX10: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6 + ; GFX10: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7 + ; GFX10: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8 + ; GFX10: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr9 + ; GFX10: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr10 + ; GFX10: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr11 + ; GFX10: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY6]](s32), [[COPY7]](s32) + ; GFX10: [[MV4:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY8]](s32), [[COPY9]](s32) + ; GFX10: [[MV5:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY10]](s32), [[COPY11]](s32) + ; GFX10: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[MV3]](s64), [[MV4]](s64), [[MV5]](s64) + ; GFX10: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr12 + ; GFX10: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr13 + ; GFX10: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr14 + ; GFX10: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr15 + ; GFX10: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr16 + ; GFX10: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr17 + ; GFX10: [[MV6:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY12]](s32), [[COPY13]](s32) + ; GFX10: [[MV7:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY14]](s32), [[COPY15]](s32) + ; GFX10: [[MV8:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY16]](s32), [[COPY17]](s32) + ; GFX10: [[BUILD_VECTOR2:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[MV6]](s64), [[MV7]](s64), [[MV8]](s64) + ; GFX10: [[COPY18:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; GFX10: [[FMA:%[0-9]+]]:_(<3 x s64>) = G_FMA [[BUILD_VECTOR]], [[BUILD_VECTOR1]], [[BUILD_VECTOR2]] + ; GFX10: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FMA]](<3 x s64>) + ; GFX10: $vgpr0 = COPY [[UV]](s32) + ; GFX10: $vgpr1 = COPY [[UV1]](s32) + ; GFX10: $vgpr2 = COPY [[UV2]](s32) + ; GFX10: $vgpr3 = COPY [[UV3]](s32) + ; GFX10: $vgpr4 = COPY [[UV4]](s32) + ; GFX10: $vgpr5 = COPY [[UV5]](s32) + ; GFX10: [[COPY19:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY18]] + ; GFX10: S_SETPC_B64_return [[COPY19]], implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5 + ; GFX10-CONTRACT-LABEL: name: test_3xdouble_add_mul_rhs + ; GFX10-CONTRACT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $sgpr30_sgpr31 + ; GFX10-CONTRACT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX10-CONTRACT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX10-CONTRACT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; GFX10-CONTRACT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 + ; GFX10-CONTRACT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 + ; GFX10-CONTRACT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 + ; GFX10-CONTRACT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) + ; GFX10-CONTRACT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) + ; GFX10-CONTRACT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32) + ; GFX10-CONTRACT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64), [[MV2]](s64) + ; GFX10-CONTRACT: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6 + ; GFX10-CONTRACT: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7 + ; GFX10-CONTRACT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8 + ; GFX10-CONTRACT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr9 + ; GFX10-CONTRACT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr10 + ; GFX10-CONTRACT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr11 + ; GFX10-CONTRACT: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY6]](s32), [[COPY7]](s32) + ; GFX10-CONTRACT: [[MV4:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY8]](s32), [[COPY9]](s32) + ; GFX10-CONTRACT: [[MV5:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY10]](s32), [[COPY11]](s32) + ; GFX10-CONTRACT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[MV3]](s64), [[MV4]](s64), [[MV5]](s64) + ; GFX10-CONTRACT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr12 + ; GFX10-CONTRACT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr13 + ; GFX10-CONTRACT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr14 + ; GFX10-CONTRACT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr15 + ; GFX10-CONTRACT: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr16 + ; GFX10-CONTRACT: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr17 + ; GFX10-CONTRACT: [[MV6:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY12]](s32), [[COPY13]](s32) + ; GFX10-CONTRACT: [[MV7:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY14]](s32), [[COPY15]](s32) + ; GFX10-CONTRACT: [[MV8:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY16]](s32), [[COPY17]](s32) + ; GFX10-CONTRACT: [[BUILD_VECTOR2:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[MV6]](s64), [[MV7]](s64), [[MV8]](s64) + ; GFX10-CONTRACT: [[COPY18:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; GFX10-CONTRACT: [[FMA:%[0-9]+]]:_(<3 x s64>) = G_FMA [[BUILD_VECTOR]], [[BUILD_VECTOR1]], [[BUILD_VECTOR2]] + ; GFX10-CONTRACT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FMA]](<3 x s64>) + ; GFX10-CONTRACT: $vgpr0 = COPY [[UV]](s32) + ; GFX10-CONTRACT: $vgpr1 = COPY [[UV1]](s32) + ; GFX10-CONTRACT: $vgpr2 = COPY [[UV2]](s32) + ; GFX10-CONTRACT: $vgpr3 = COPY [[UV3]](s32) + ; GFX10-CONTRACT: $vgpr4 = COPY [[UV4]](s32) + ; GFX10-CONTRACT: $vgpr5 = COPY [[UV5]](s32) + ; GFX10-CONTRACT: [[COPY19:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY18]] + ; GFX10-CONTRACT: S_SETPC_B64_return [[COPY19]], implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5 + ; GFX10-DENORM-LABEL: name: test_3xdouble_add_mul_rhs + ; GFX10-DENORM: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $sgpr30_sgpr31 + ; GFX10-DENORM: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX10-DENORM: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX10-DENORM: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; GFX10-DENORM: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 + ; GFX10-DENORM: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 + ; GFX10-DENORM: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 + ; GFX10-DENORM: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) + ; GFX10-DENORM: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) + ; GFX10-DENORM: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32) + ; GFX10-DENORM: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64), [[MV2]](s64) + ; GFX10-DENORM: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6 + ; GFX10-DENORM: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7 + ; GFX10-DENORM: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8 + ; GFX10-DENORM: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr9 + ; GFX10-DENORM: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr10 + ; GFX10-DENORM: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr11 + ; GFX10-DENORM: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY6]](s32), [[COPY7]](s32) + ; GFX10-DENORM: [[MV4:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY8]](s32), [[COPY9]](s32) + ; GFX10-DENORM: [[MV5:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY10]](s32), [[COPY11]](s32) + ; GFX10-DENORM: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[MV3]](s64), [[MV4]](s64), [[MV5]](s64) + ; GFX10-DENORM: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr12 + ; GFX10-DENORM: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr13 + ; GFX10-DENORM: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr14 + ; GFX10-DENORM: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr15 + ; GFX10-DENORM: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr16 + ; GFX10-DENORM: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr17 + ; GFX10-DENORM: [[MV6:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY12]](s32), [[COPY13]](s32) + ; GFX10-DENORM: [[MV7:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY14]](s32), [[COPY15]](s32) + ; GFX10-DENORM: [[MV8:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY16]](s32), [[COPY17]](s32) + ; GFX10-DENORM: [[BUILD_VECTOR2:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[MV6]](s64), [[MV7]](s64), [[MV8]](s64) + ; GFX10-DENORM: [[COPY18:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; GFX10-DENORM: [[FMA:%[0-9]+]]:_(<3 x s64>) = G_FMA [[BUILD_VECTOR]], [[BUILD_VECTOR1]], [[BUILD_VECTOR2]] + ; GFX10-DENORM: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FMA]](<3 x s64>) + ; GFX10-DENORM: $vgpr0 = COPY [[UV]](s32) + ; GFX10-DENORM: $vgpr1 = COPY [[UV1]](s32) + ; GFX10-DENORM: $vgpr2 = COPY [[UV2]](s32) + ; GFX10-DENORM: $vgpr3 = COPY [[UV3]](s32) + ; GFX10-DENORM: $vgpr4 = COPY [[UV4]](s32) + ; GFX10-DENORM: $vgpr5 = COPY [[UV5]](s32) + ; GFX10-DENORM: [[COPY19:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY18]] + ; GFX10-DENORM: S_SETPC_B64_return [[COPY19]], implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5 + %4:_(s32) = COPY $vgpr0 + %5:_(s32) = COPY $vgpr1 + %6:_(s32) = COPY $vgpr2 + %7:_(s32) = COPY $vgpr3 + %8:_(s32) = COPY $vgpr4 + %9:_(s32) = COPY $vgpr5 + %22:_(s64) = G_MERGE_VALUES %4(s32), %5(s32) + %23:_(s64) = G_MERGE_VALUES %6(s32), %7(s32) + %24:_(s64) = G_MERGE_VALUES %8(s32), %9(s32) + %0:_(<3 x s64>) = G_BUILD_VECTOR %22(s64), %23(s64), %24(s64) + %10:_(s32) = COPY $vgpr6 + %11:_(s32) = COPY $vgpr7 + %12:_(s32) = COPY $vgpr8 + %13:_(s32) = COPY $vgpr9 + %14:_(s32) = COPY $vgpr10 + %15:_(s32) = COPY $vgpr11 + %25:_(s64) = G_MERGE_VALUES %10(s32), %11(s32) + %26:_(s64) = G_MERGE_VALUES %12(s32), %13(s32) + %27:_(s64) = G_MERGE_VALUES %14(s32), %15(s32) + %1:_(<3 x s64>) = G_BUILD_VECTOR %25(s64), %26(s64), %27(s64) + %16:_(s32) = COPY $vgpr12 + %17:_(s32) = COPY $vgpr13 + %18:_(s32) = COPY $vgpr14 + %19:_(s32) = COPY $vgpr15 + %20:_(s32) = COPY $vgpr16 + %21:_(s32) = COPY $vgpr17 + %28:_(s64) = G_MERGE_VALUES %16(s32), %17(s32) + %29:_(s64) = G_MERGE_VALUES %18(s32), %19(s32) + %30:_(s64) = G_MERGE_VALUES %20(s32), %21(s32) + %2:_(<3 x s64>) = G_BUILD_VECTOR %28(s64), %29(s64), %30(s64) + %3:sgpr_64 = COPY $sgpr30_sgpr31 + %31:_(<3 x s64>) = reassoc G_FMUL %0, %1 + %32:_(<3 x s64>) = reassoc G_FADD %2, %31 + %34:_(s32), %35:_(s32), %36:_(s32), %37:_(s32), %38:_(s32), %39:_(s32) = G_UNMERGE_VALUES %32(<3 x s64>) + $vgpr0 = COPY %34(s32) + $vgpr1 = COPY %35(s32) + $vgpr2 = COPY %36(s32) + $vgpr3 = COPY %37(s32) + $vgpr4 = COPY %38(s32) + $vgpr5 = COPY %39(s32) + %33:ccr_sgpr_64 = COPY %3 + S_SETPC_B64_return %33, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5 + +... Index: llvm/test/CodeGen/AMDGPU/GlobalISel/combine-fma-add-mul.ll =================================================================== --- /dev/null +++ llvm/test/CodeGen/AMDGPU/GlobalISel/combine-fma-add-mul.ll @@ -0,0 +1,871 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -global-isel -march=amdgcn -mcpu=gfx900 < %s | FileCheck -check-prefix=GFX9 %s +; RUN: llc -global-isel -march=amdgcn -mcpu=gfx900 -fp-contract=fast < %s | FileCheck -check-prefix=GFX9-CONTRACT %s +; RUN: llc -global-isel -march=amdgcn -mcpu=gfx900 --denormal-fp-math=preserve-sign < %s | FileCheck -check-prefix=GFX9-DENORM %s +; RUN: llc -global-isel -march=amdgcn -mcpu=gfx1010 < %s | FileCheck -check-prefix=GFX10 %s +; RUN: llc -global-isel -march=amdgcn -mcpu=gfx1010 -fp-contract=fast < %s | FileCheck -check-prefix=GFX10-CONTRACT %s +; RUN: llc -global-isel -march=amdgcn -mcpu=gfx1010 --denormal-fp-math=preserve-sign < %s | FileCheck -check-prefix=GFX10-DENORM %s + +; fold (fadd (fmul x, y), z) -> (fma x, y, z) +; fold (fadd x, (fmul y, z)) -> (fma y, z, x) + +define float @test_f32_add_mul(float %x, float %y, float %z) { +; GFX9-LABEL: test_f32_add_mul: +; GFX9: ; %bb.0: ; %.entry +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: v_mul_f32_e32 v0, v0, v1 +; GFX9-NEXT: v_add_f32_e32 v0, v0, v2 +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-CONTRACT-LABEL: test_f32_add_mul: +; GFX9-CONTRACT: ; %bb.0: ; %.entry +; GFX9-CONTRACT-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-CONTRACT-NEXT: v_fma_f32 v0, v0, v1, v2 +; GFX9-CONTRACT-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-DENORM-LABEL: test_f32_add_mul: +; GFX9-DENORM: ; %bb.0: ; %.entry +; GFX9-DENORM-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-DENORM-NEXT: v_mad_f32 v0, v0, v1, v2 +; GFX9-DENORM-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-LABEL: test_f32_add_mul: +; GFX10: ; %bb.0: ; %.entry +; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX10-NEXT: v_mul_f32_e32 v0, v0, v1 +; GFX10-NEXT: v_add_f32_e32 v0, v0, v2 +; GFX10-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-CONTRACT-LABEL: test_f32_add_mul: +; GFX10-CONTRACT: ; %bb.0: ; %.entry +; GFX10-CONTRACT-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-CONTRACT-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX10-CONTRACT-NEXT: v_fma_f32 v0, v0, v1, v2 +; GFX10-CONTRACT-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-DENORM-LABEL: test_f32_add_mul: +; GFX10-DENORM: ; %bb.0: ; %.entry +; GFX10-DENORM-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-DENORM-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX10-DENORM-NEXT: v_mad_f32 v0, v0, v1, v2 +; GFX10-DENORM-NEXT: s_setpc_b64 s[30:31] +.entry: + %a = fmul reassoc float %x, %y + %b = fadd reassoc float %a, %z + ret float %b +} + +define float @test_f32_add_mul_rhs(float %x, float %y, float %z) { +; GFX9-LABEL: test_f32_add_mul_rhs: +; GFX9: ; %bb.0: ; %.entry +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: v_mul_f32_e32 v0, v0, v1 +; GFX9-NEXT: v_add_f32_e32 v0, v2, v0 +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-CONTRACT-LABEL: test_f32_add_mul_rhs: +; GFX9-CONTRACT: ; %bb.0: ; %.entry +; GFX9-CONTRACT-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-CONTRACT-NEXT: v_fma_f32 v0, v0, v1, v2 +; GFX9-CONTRACT-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-DENORM-LABEL: test_f32_add_mul_rhs: +; GFX9-DENORM: ; %bb.0: ; %.entry +; GFX9-DENORM-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-DENORM-NEXT: v_mad_f32 v0, v0, v1, v2 +; GFX9-DENORM-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-LABEL: test_f32_add_mul_rhs: +; GFX10: ; %bb.0: ; %.entry +; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX10-NEXT: v_mul_f32_e32 v0, v0, v1 +; GFX10-NEXT: v_add_f32_e32 v0, v2, v0 +; GFX10-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-CONTRACT-LABEL: test_f32_add_mul_rhs: +; GFX10-CONTRACT: ; %bb.0: ; %.entry +; GFX10-CONTRACT-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-CONTRACT-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX10-CONTRACT-NEXT: v_fma_f32 v0, v0, v1, v2 +; GFX10-CONTRACT-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-DENORM-LABEL: test_f32_add_mul_rhs: +; GFX10-DENORM: ; %bb.0: ; %.entry +; GFX10-DENORM-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-DENORM-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX10-DENORM-NEXT: v_mad_f32 v0, v0, v1, v2 +; GFX10-DENORM-NEXT: s_setpc_b64 s[30:31] +.entry: + %a = fmul reassoc float %x, %y + %b = fadd reassoc float %z, %a + ret float %b +} + +define half @test_half_add_mul(half %x, half %y, half %z) { +; GFX9-LABEL: test_half_add_mul: +; GFX9: ; %bb.0: ; %.entry +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: v_mul_f16_e32 v0, v0, v1 +; GFX9-NEXT: v_add_f16_e32 v0, v0, v2 +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-CONTRACT-LABEL: test_half_add_mul: +; GFX9-CONTRACT: ; %bb.0: ; %.entry +; GFX9-CONTRACT-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-CONTRACT-NEXT: v_fma_f16 v0, v0, v1, v2 +; GFX9-CONTRACT-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-DENORM-LABEL: test_half_add_mul: +; GFX9-DENORM: ; %bb.0: ; %.entry +; GFX9-DENORM-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-DENORM-NEXT: v_mad_legacy_f16 v0, v0, v1, v2 +; GFX9-DENORM-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-LABEL: test_half_add_mul: +; GFX10: ; %bb.0: ; %.entry +; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX10-NEXT: v_mul_f16_e32 v0, v0, v1 +; GFX10-NEXT: v_add_f16_e32 v0, v0, v2 +; GFX10-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-CONTRACT-LABEL: test_half_add_mul: +; GFX10-CONTRACT: ; %bb.0: ; %.entry +; GFX10-CONTRACT-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-CONTRACT-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX10-CONTRACT-NEXT: v_fmac_f16_e32 v2, v0, v1 +; GFX10-CONTRACT-NEXT: v_mov_b32_e32 v0, v2 +; GFX10-CONTRACT-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-DENORM-LABEL: test_half_add_mul: +; GFX10-DENORM: ; %bb.0: ; %.entry +; GFX10-DENORM-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-DENORM-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX10-DENORM-NEXT: v_mul_f16_e32 v0, v0, v1 +; GFX10-DENORM-NEXT: v_add_f16_e32 v0, v0, v2 +; GFX10-DENORM-NEXT: s_setpc_b64 s[30:31] +.entry: + %a = fmul reassoc half %x, %y + %b = fadd reassoc half %a, %z + ret half %b +} + +define half @test_half_add_mul_rhs(half %x, half %y, half %z) { +; GFX9-LABEL: test_half_add_mul_rhs: +; GFX9: ; %bb.0: ; %.entry +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: v_mul_f16_e32 v0, v0, v1 +; GFX9-NEXT: v_add_f16_e32 v0, v2, v0 +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-CONTRACT-LABEL: test_half_add_mul_rhs: +; GFX9-CONTRACT: ; %bb.0: ; %.entry +; GFX9-CONTRACT-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-CONTRACT-NEXT: v_fma_f16 v0, v0, v1, v2 +; GFX9-CONTRACT-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-DENORM-LABEL: test_half_add_mul_rhs: +; GFX9-DENORM: ; %bb.0: ; %.entry +; GFX9-DENORM-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-DENORM-NEXT: v_mad_legacy_f16 v0, v0, v1, v2 +; GFX9-DENORM-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-LABEL: test_half_add_mul_rhs: +; GFX10: ; %bb.0: ; %.entry +; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX10-NEXT: v_mul_f16_e32 v0, v0, v1 +; GFX10-NEXT: v_add_f16_e32 v0, v2, v0 +; GFX10-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-CONTRACT-LABEL: test_half_add_mul_rhs: +; GFX10-CONTRACT: ; %bb.0: ; %.entry +; GFX10-CONTRACT-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-CONTRACT-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX10-CONTRACT-NEXT: v_fmac_f16_e32 v2, v0, v1 +; GFX10-CONTRACT-NEXT: v_mov_b32_e32 v0, v2 +; GFX10-CONTRACT-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-DENORM-LABEL: test_half_add_mul_rhs: +; GFX10-DENORM: ; %bb.0: ; %.entry +; GFX10-DENORM-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-DENORM-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX10-DENORM-NEXT: v_mul_f16_e32 v0, v0, v1 +; GFX10-DENORM-NEXT: v_add_f16_e32 v0, v2, v0 +; GFX10-DENORM-NEXT: s_setpc_b64 s[30:31] +.entry: + %a = fmul reassoc half %x, %y + %b = fadd reassoc half %z, %a + ret half %b +} + +define double @test_double_add_mul(double %x, double %y, double %z) { +; GFX9-LABEL: test_double_add_mul: +; GFX9: ; %bb.0: ; %.entry +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: v_mul_f64 v[0:1], v[0:1], v[2:3] +; GFX9-NEXT: v_add_f64 v[0:1], v[0:1], v[4:5] +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-CONTRACT-LABEL: test_double_add_mul: +; GFX9-CONTRACT: ; %bb.0: ; %.entry +; GFX9-CONTRACT-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-CONTRACT-NEXT: v_fma_f64 v[0:1], v[0:1], v[2:3], v[4:5] +; GFX9-CONTRACT-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-DENORM-LABEL: test_double_add_mul: +; GFX9-DENORM: ; %bb.0: ; %.entry +; GFX9-DENORM-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-DENORM-NEXT: v_mul_f64 v[0:1], v[0:1], v[2:3] +; GFX9-DENORM-NEXT: v_add_f64 v[0:1], v[0:1], v[4:5] +; GFX9-DENORM-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-LABEL: test_double_add_mul: +; GFX10: ; %bb.0: ; %.entry +; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX10-NEXT: v_mul_f64 v[2:3], v[0:1], v[2:3] +; GFX10-NEXT: v_add_f64 v[0:1], v[2:3], v[4:5] +; GFX10-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-CONTRACT-LABEL: test_double_add_mul: +; GFX10-CONTRACT: ; %bb.0: ; %.entry +; GFX10-CONTRACT-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-CONTRACT-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX10-CONTRACT-NEXT: v_fma_f64 v[0:1], v[0:1], v[2:3], v[4:5] +; GFX10-CONTRACT-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-DENORM-LABEL: test_double_add_mul: +; GFX10-DENORM: ; %bb.0: ; %.entry +; GFX10-DENORM-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-DENORM-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX10-DENORM-NEXT: v_mul_f64 v[2:3], v[0:1], v[2:3] +; GFX10-DENORM-NEXT: v_add_f64 v[0:1], v[2:3], v[4:5] +; GFX10-DENORM-NEXT: s_setpc_b64 s[30:31] +.entry: + %a = fmul reassoc double %x, %y + %b = fadd reassoc double %a, %z + ret double %b +} + +define double @test_double_add_mul_rhs(double %x, double %y, double %z) { +; GFX9-LABEL: test_double_add_mul_rhs: +; GFX9: ; %bb.0: ; %.entry +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: v_mul_f64 v[0:1], v[0:1], v[2:3] +; GFX9-NEXT: v_add_f64 v[0:1], v[4:5], v[0:1] +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-CONTRACT-LABEL: test_double_add_mul_rhs: +; GFX9-CONTRACT: ; %bb.0: ; %.entry +; GFX9-CONTRACT-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-CONTRACT-NEXT: v_fma_f64 v[0:1], v[0:1], v[2:3], v[4:5] +; GFX9-CONTRACT-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-DENORM-LABEL: test_double_add_mul_rhs: +; GFX9-DENORM: ; %bb.0: ; %.entry +; GFX9-DENORM-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-DENORM-NEXT: v_mul_f64 v[0:1], v[0:1], v[2:3] +; GFX9-DENORM-NEXT: v_add_f64 v[0:1], v[4:5], v[0:1] +; GFX9-DENORM-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-LABEL: test_double_add_mul_rhs: +; GFX10: ; %bb.0: ; %.entry +; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX10-NEXT: v_mul_f64 v[0:1], v[0:1], v[2:3] +; GFX10-NEXT: v_mov_b32_e32 v6, v4 +; GFX10-NEXT: v_mov_b32_e32 v7, v5 +; GFX10-NEXT: v_add_f64 v[0:1], v[6:7], v[0:1] +; GFX10-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-CONTRACT-LABEL: test_double_add_mul_rhs: +; GFX10-CONTRACT: ; %bb.0: ; %.entry +; GFX10-CONTRACT-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-CONTRACT-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX10-CONTRACT-NEXT: v_fma_f64 v[0:1], v[0:1], v[2:3], v[4:5] +; GFX10-CONTRACT-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-DENORM-LABEL: test_double_add_mul_rhs: +; GFX10-DENORM: ; %bb.0: ; %.entry +; GFX10-DENORM-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-DENORM-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX10-DENORM-NEXT: v_mul_f64 v[0:1], v[0:1], v[2:3] +; GFX10-DENORM-NEXT: v_mov_b32_e32 v6, v4 +; GFX10-DENORM-NEXT: v_mov_b32_e32 v7, v5 +; GFX10-DENORM-NEXT: v_add_f64 v[0:1], v[6:7], v[0:1] +; GFX10-DENORM-NEXT: s_setpc_b64 s[30:31] +.entry: + %a = fmul reassoc double %x, %y + %b = fadd reassoc double %z, %a + ret double %b +} + +define <4 x float> @test_4xfloat_add_mul(<4 x float> %x, <4 x float> %y, <4 x float> %z) { +; GFX9-LABEL: test_4xfloat_add_mul: +; GFX9: ; %bb.0: ; %.entry +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: v_mul_f32_e32 v0, v0, v4 +; GFX9-NEXT: v_mul_f32_e32 v1, v1, v5 +; GFX9-NEXT: v_mul_f32_e32 v2, v2, v6 +; GFX9-NEXT: v_mul_f32_e32 v3, v3, v7 +; GFX9-NEXT: v_add_f32_e32 v0, v0, v8 +; GFX9-NEXT: v_add_f32_e32 v1, v1, v9 +; GFX9-NEXT: v_add_f32_e32 v2, v2, v10 +; GFX9-NEXT: v_add_f32_e32 v3, v3, v11 +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-CONTRACT-LABEL: test_4xfloat_add_mul: +; GFX9-CONTRACT: ; %bb.0: ; %.entry +; GFX9-CONTRACT-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-CONTRACT-NEXT: v_fma_f32 v0, v0, v4, v8 +; GFX9-CONTRACT-NEXT: v_fma_f32 v1, v1, v5, v9 +; GFX9-CONTRACT-NEXT: v_fma_f32 v2, v2, v6, v10 +; GFX9-CONTRACT-NEXT: v_fma_f32 v3, v3, v7, v11 +; GFX9-CONTRACT-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-DENORM-LABEL: test_4xfloat_add_mul: +; GFX9-DENORM: ; %bb.0: ; %.entry +; GFX9-DENORM-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-DENORM-NEXT: v_mad_f32 v0, v0, v4, v8 +; GFX9-DENORM-NEXT: v_mad_f32 v1, v1, v5, v9 +; GFX9-DENORM-NEXT: v_mad_f32 v2, v2, v6, v10 +; GFX9-DENORM-NEXT: v_mad_f32 v3, v3, v7, v11 +; GFX9-DENORM-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-LABEL: test_4xfloat_add_mul: +; GFX10: ; %bb.0: ; %.entry +; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX10-NEXT: v_mul_f32_e32 v23, v2, v6 +; GFX10-NEXT: v_mul_f32_e32 v15, v0, v4 +; GFX10-NEXT: v_mul_f32_e32 v19, v1, v5 +; GFX10-NEXT: v_mul_f32_e32 v6, v3, v7 +; GFX10-NEXT: v_add_f32_e32 v2, v23, v10 +; GFX10-NEXT: v_add_f32_e32 v0, v15, v8 +; GFX10-NEXT: v_add_f32_e32 v1, v19, v9 +; GFX10-NEXT: v_add_f32_e32 v3, v6, v11 +; GFX10-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-CONTRACT-LABEL: test_4xfloat_add_mul: +; GFX10-CONTRACT: ; %bb.0: ; %.entry +; GFX10-CONTRACT-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-CONTRACT-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX10-CONTRACT-NEXT: v_fma_f32 v0, v0, v4, v8 +; GFX10-CONTRACT-NEXT: v_fma_f32 v1, v1, v5, v9 +; GFX10-CONTRACT-NEXT: v_fma_f32 v2, v2, v6, v10 +; GFX10-CONTRACT-NEXT: v_fma_f32 v3, v3, v7, v11 +; GFX10-CONTRACT-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-DENORM-LABEL: test_4xfloat_add_mul: +; GFX10-DENORM: ; %bb.0: ; %.entry +; GFX10-DENORM-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-DENORM-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX10-DENORM-NEXT: v_mad_f32 v0, v0, v4, v8 +; GFX10-DENORM-NEXT: v_mad_f32 v1, v1, v5, v9 +; GFX10-DENORM-NEXT: v_mad_f32 v2, v2, v6, v10 +; GFX10-DENORM-NEXT: v_mad_f32 v3, v3, v7, v11 +; GFX10-DENORM-NEXT: s_setpc_b64 s[30:31] +.entry: + %a = fmul reassoc <4 x float> %x, %y + %b = fadd reassoc <4 x float> %a, %z + ret <4 x float> %b +} + +define <3 x float> @test_3xfloat_add_mul_rhs(<3 x float> %x, <3 x float> %y, <3 x float> %z) { +; GFX9-LABEL: test_3xfloat_add_mul_rhs: +; GFX9: ; %bb.0: ; %.entry +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: v_mul_f32_e32 v0, v0, v3 +; GFX9-NEXT: v_mul_f32_e32 v1, v1, v4 +; GFX9-NEXT: v_mul_f32_e32 v2, v2, v5 +; GFX9-NEXT: v_add_f32_e32 v0, v6, v0 +; GFX9-NEXT: v_add_f32_e32 v1, v7, v1 +; GFX9-NEXT: v_add_f32_e32 v2, v8, v2 +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-CONTRACT-LABEL: test_3xfloat_add_mul_rhs: +; GFX9-CONTRACT: ; %bb.0: ; %.entry +; GFX9-CONTRACT-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-CONTRACT-NEXT: v_fma_f32 v0, v0, v3, v6 +; GFX9-CONTRACT-NEXT: v_fma_f32 v1, v1, v4, v7 +; GFX9-CONTRACT-NEXT: v_fma_f32 v2, v2, v5, v8 +; GFX9-CONTRACT-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-DENORM-LABEL: test_3xfloat_add_mul_rhs: +; GFX9-DENORM: ; %bb.0: ; %.entry +; GFX9-DENORM-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-DENORM-NEXT: v_mad_f32 v0, v0, v3, v6 +; GFX9-DENORM-NEXT: v_mad_f32 v1, v1, v4, v7 +; GFX9-DENORM-NEXT: v_mad_f32 v2, v2, v5, v8 +; GFX9-DENORM-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-LABEL: test_3xfloat_add_mul_rhs: +; GFX10: ; %bb.0: ; %.entry +; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX10-NEXT: v_mul_f32_e32 v0, v0, v3 +; GFX10-NEXT: v_mul_f32_e32 v1, v1, v4 +; GFX10-NEXT: v_mul_f32_e32 v2, v2, v5 +; GFX10-NEXT: v_add_f32_e32 v0, v6, v0 +; GFX10-NEXT: v_add_f32_e32 v1, v7, v1 +; GFX10-NEXT: v_add_f32_e32 v2, v8, v2 +; GFX10-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-CONTRACT-LABEL: test_3xfloat_add_mul_rhs: +; GFX10-CONTRACT: ; %bb.0: ; %.entry +; GFX10-CONTRACT-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-CONTRACT-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX10-CONTRACT-NEXT: v_fma_f32 v0, v0, v3, v6 +; GFX10-CONTRACT-NEXT: v_fma_f32 v1, v1, v4, v7 +; GFX10-CONTRACT-NEXT: v_fma_f32 v2, v2, v5, v8 +; GFX10-CONTRACT-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-DENORM-LABEL: test_3xfloat_add_mul_rhs: +; GFX10-DENORM: ; %bb.0: ; %.entry +; GFX10-DENORM-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-DENORM-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX10-DENORM-NEXT: v_mad_f32 v0, v0, v3, v6 +; GFX10-DENORM-NEXT: v_mad_f32 v1, v1, v4, v7 +; GFX10-DENORM-NEXT: v_mad_f32 v2, v2, v5, v8 +; GFX10-DENORM-NEXT: s_setpc_b64 s[30:31] +.entry: + %a = fmul reassoc <3 x float> %x, %y + %b = fadd reassoc <3 x float> %z, %a + ret <3 x float> %b +} + +define <4 x half> @test_4xhalf_add_mul(<4 x half> %x, <4 x half> %y, <4 x half> %z) { +; GFX9-LABEL: test_4xhalf_add_mul: +; GFX9: ; %bb.0: ; %.entry +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: v_pk_mul_f16 v0, v0, v2 +; GFX9-NEXT: v_pk_mul_f16 v1, v1, v3 +; GFX9-NEXT: v_pk_add_f16 v0, v0, v4 +; GFX9-NEXT: v_pk_add_f16 v1, v1, v5 +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-CONTRACT-LABEL: test_4xhalf_add_mul: +; GFX9-CONTRACT: ; %bb.0: ; %.entry +; GFX9-CONTRACT-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-CONTRACT-NEXT: v_pk_fma_f16 v0, v0, v2, v4 +; GFX9-CONTRACT-NEXT: v_pk_fma_f16 v1, v1, v3, v5 +; GFX9-CONTRACT-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-DENORM-LABEL: test_4xhalf_add_mul: +; GFX9-DENORM: ; %bb.0: ; %.entry +; GFX9-DENORM-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-DENORM-NEXT: v_pk_mul_f16 v0, v0, v2 +; GFX9-DENORM-NEXT: v_pk_mul_f16 v1, v1, v3 +; GFX9-DENORM-NEXT: v_pk_add_f16 v0, v0, v4 +; GFX9-DENORM-NEXT: v_pk_add_f16 v1, v1, v5 +; GFX9-DENORM-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-LABEL: test_4xhalf_add_mul: +; GFX10: ; %bb.0: ; %.entry +; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX10-NEXT: v_pk_mul_f16 v7, v0, v2 +; GFX10-NEXT: v_pk_mul_f16 v3, v1, v3 +; GFX10-NEXT: v_pk_add_f16 v0, v7, v4 +; GFX10-NEXT: v_pk_add_f16 v1, v3, v5 +; GFX10-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-CONTRACT-LABEL: test_4xhalf_add_mul: +; GFX10-CONTRACT: ; %bb.0: ; %.entry +; GFX10-CONTRACT-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-CONTRACT-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX10-CONTRACT-NEXT: v_pk_fma_f16 v0, v0, v2, v4 +; GFX10-CONTRACT-NEXT: v_pk_fma_f16 v1, v1, v3, v5 +; GFX10-CONTRACT-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-DENORM-LABEL: test_4xhalf_add_mul: +; GFX10-DENORM: ; %bb.0: ; %.entry +; GFX10-DENORM-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-DENORM-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX10-DENORM-NEXT: v_pk_mul_f16 v7, v0, v2 +; GFX10-DENORM-NEXT: v_pk_mul_f16 v3, v1, v3 +; GFX10-DENORM-NEXT: v_pk_add_f16 v0, v7, v4 +; GFX10-DENORM-NEXT: v_pk_add_f16 v1, v3, v5 +; GFX10-DENORM-NEXT: s_setpc_b64 s[30:31] +.entry: + %a = fmul reassoc <4 x half> %x, %y + %b = fadd reassoc <4 x half> %a, %z + ret <4 x half> %b +} + +define <3 x half> @test_3xhalf_add_mul_rhs(<3 x half> %x, <3 x half> %y, <3 x half> %z) { +; GFX9-LABEL: test_3xhalf_add_mul_rhs: +; GFX9: ; %bb.0: ; %.entry +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: v_lshrrev_b32_e32 v6, 16, v0 +; GFX9-NEXT: v_lshrrev_b32_e32 v7, 16, v2 +; GFX9-NEXT: v_mov_b32_e32 v9, 0xffff +; GFX9-NEXT: v_lshlrev_b32_e32 v6, 16, v6 +; GFX9-NEXT: v_and_or_b32 v0, v0, v9, v6 +; GFX9-NEXT: v_lshlrev_b32_e32 v6, 16, v7 +; GFX9-NEXT: v_and_or_b32 v2, v2, v9, v6 +; GFX9-NEXT: v_lshrrev_b32_e32 v8, 16, v4 +; GFX9-NEXT: v_pk_mul_f16 v0, v0, v2 +; GFX9-NEXT: v_lshlrev_b32_e32 v2, 16, v8 +; GFX9-NEXT: v_and_or_b32 v2, v4, v9, v2 +; GFX9-NEXT: v_lshrrev_b32_e32 v4, 16, v0 +; GFX9-NEXT: s_lshl_b32 s4, s4, 16 +; GFX9-NEXT: v_lshlrev_b32_e32 v4, 16, v4 +; GFX9-NEXT: v_and_or_b32 v0, v0, v9, v4 +; GFX9-NEXT: v_and_or_b32 v1, v1, v9, s4 +; GFX9-NEXT: v_and_or_b32 v3, v3, v9, s4 +; GFX9-NEXT: v_pk_mul_f16 v1, v1, v3 +; GFX9-NEXT: v_pk_add_f16 v0, v2, v0 +; GFX9-NEXT: v_lshrrev_b32_e32 v2, 16, v0 +; GFX9-NEXT: v_and_or_b32 v3, v5, v9, s4 +; GFX9-NEXT: v_and_or_b32 v1, v1, v9, s4 +; GFX9-NEXT: v_lshlrev_b32_e32 v2, 16, v2 +; GFX9-NEXT: v_pk_add_f16 v1, v3, v1 +; GFX9-NEXT: s_lshl_b32 s4, s4, 16 +; GFX9-NEXT: v_and_or_b32 v0, v0, v9, v2 +; GFX9-NEXT: v_and_or_b32 v1, v1, v9, s4 +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-CONTRACT-LABEL: test_3xhalf_add_mul_rhs: +; GFX9-CONTRACT: ; %bb.0: ; %.entry +; GFX9-CONTRACT-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-CONTRACT-NEXT: v_lshrrev_b32_e32 v6, 16, v0 +; GFX9-CONTRACT-NEXT: v_lshrrev_b32_e32 v7, 16, v2 +; GFX9-CONTRACT-NEXT: v_mov_b32_e32 v9, 0xffff +; GFX9-CONTRACT-NEXT: v_lshlrev_b32_e32 v6, 16, v6 +; GFX9-CONTRACT-NEXT: v_and_or_b32 v0, v0, v9, v6 +; GFX9-CONTRACT-NEXT: v_lshlrev_b32_e32 v6, 16, v7 +; GFX9-CONTRACT-NEXT: v_lshrrev_b32_e32 v8, 16, v4 +; GFX9-CONTRACT-NEXT: v_and_or_b32 v2, v2, v9, v6 +; GFX9-CONTRACT-NEXT: v_lshlrev_b32_e32 v6, 16, v8 +; GFX9-CONTRACT-NEXT: v_and_or_b32 v4, v4, v9, v6 +; GFX9-CONTRACT-NEXT: s_lshl_b32 s4, s4, 16 +; GFX9-CONTRACT-NEXT: v_pk_fma_f16 v0, v0, v2, v4 +; GFX9-CONTRACT-NEXT: v_lshrrev_b32_e32 v2, 16, v0 +; GFX9-CONTRACT-NEXT: v_and_or_b32 v1, v1, v9, s4 +; GFX9-CONTRACT-NEXT: v_and_or_b32 v3, v3, v9, s4 +; GFX9-CONTRACT-NEXT: v_and_or_b32 v5, v5, v9, s4 +; GFX9-CONTRACT-NEXT: v_lshlrev_b32_e32 v2, 16, v2 +; GFX9-CONTRACT-NEXT: v_pk_fma_f16 v1, v1, v3, v5 +; GFX9-CONTRACT-NEXT: s_lshl_b32 s4, s4, 16 +; GFX9-CONTRACT-NEXT: v_and_or_b32 v0, v0, v9, v2 +; GFX9-CONTRACT-NEXT: v_and_or_b32 v1, v1, v9, s4 +; GFX9-CONTRACT-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-DENORM-LABEL: test_3xhalf_add_mul_rhs: +; GFX9-DENORM: ; %bb.0: ; %.entry +; GFX9-DENORM-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-DENORM-NEXT: v_lshrrev_b32_e32 v6, 16, v0 +; GFX9-DENORM-NEXT: v_lshrrev_b32_e32 v7, 16, v2 +; GFX9-DENORM-NEXT: v_mov_b32_e32 v9, 0xffff +; GFX9-DENORM-NEXT: v_lshlrev_b32_e32 v6, 16, v6 +; GFX9-DENORM-NEXT: v_and_or_b32 v0, v0, v9, v6 +; GFX9-DENORM-NEXT: v_lshlrev_b32_e32 v6, 16, v7 +; GFX9-DENORM-NEXT: v_and_or_b32 v2, v2, v9, v6 +; GFX9-DENORM-NEXT: v_lshrrev_b32_e32 v8, 16, v4 +; GFX9-DENORM-NEXT: v_pk_mul_f16 v0, v0, v2 +; GFX9-DENORM-NEXT: v_lshlrev_b32_e32 v2, 16, v8 +; GFX9-DENORM-NEXT: v_and_or_b32 v2, v4, v9, v2 +; GFX9-DENORM-NEXT: v_lshrrev_b32_e32 v4, 16, v0 +; GFX9-DENORM-NEXT: s_lshl_b32 s4, s4, 16 +; GFX9-DENORM-NEXT: v_lshlrev_b32_e32 v4, 16, v4 +; GFX9-DENORM-NEXT: v_and_or_b32 v0, v0, v9, v4 +; GFX9-DENORM-NEXT: v_and_or_b32 v1, v1, v9, s4 +; GFX9-DENORM-NEXT: v_and_or_b32 v3, v3, v9, s4 +; GFX9-DENORM-NEXT: v_pk_mul_f16 v1, v1, v3 +; GFX9-DENORM-NEXT: v_pk_add_f16 v0, v2, v0 +; GFX9-DENORM-NEXT: v_lshrrev_b32_e32 v2, 16, v0 +; GFX9-DENORM-NEXT: v_and_or_b32 v3, v5, v9, s4 +; GFX9-DENORM-NEXT: v_and_or_b32 v1, v1, v9, s4 +; GFX9-DENORM-NEXT: v_lshlrev_b32_e32 v2, 16, v2 +; GFX9-DENORM-NEXT: v_pk_add_f16 v1, v3, v1 +; GFX9-DENORM-NEXT: s_lshl_b32 s4, s4, 16 +; GFX9-DENORM-NEXT: v_and_or_b32 v0, v0, v9, v2 +; GFX9-DENORM-NEXT: v_and_or_b32 v1, v1, v9, s4 +; GFX9-DENORM-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-LABEL: test_3xhalf_add_mul_rhs: +; GFX10: ; %bb.0: ; %.entry +; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX10-NEXT: v_lshrrev_b32_e32 v6, 16, v0 +; GFX10-NEXT: v_lshrrev_b32_e32 v7, 16, v2 +; GFX10-NEXT: v_mov_b32_e32 v11, 0xffff +; GFX10-NEXT: s_lshl_b32 s4, s4, 16 +; GFX10-NEXT: v_lshlrev_b32_e32 v6, 16, v6 +; GFX10-NEXT: v_lshlrev_b32_e32 v9, 16, v7 +; GFX10-NEXT: v_and_or_b32 v1, v1, v11, s4 +; GFX10-NEXT: v_and_or_b32 v3, v3, v11, s4 +; GFX10-NEXT: v_and_or_b32 v0, v0, v11, v6 +; GFX10-NEXT: v_and_or_b32 v2, v2, v11, v9 +; GFX10-NEXT: v_pk_mul_f16 v1, v1, v3 +; GFX10-NEXT: v_pk_mul_f16 v0, v0, v2 +; GFX10-NEXT: v_lshrrev_b32_e32 v2, 16, v4 +; GFX10-NEXT: v_and_or_b32 v1, v1, v11, s4 +; GFX10-NEXT: v_lshrrev_b32_e32 v6, 16, v0 +; GFX10-NEXT: v_lshlrev_b32_e32 v2, 16, v2 +; GFX10-NEXT: v_lshlrev_b32_e32 v6, 16, v6 +; GFX10-NEXT: v_and_or_b32 v2, v4, v11, v2 +; GFX10-NEXT: v_and_or_b32 v0, v0, v11, v6 +; GFX10-NEXT: v_pk_add_f16 v0, v2, v0 +; GFX10-NEXT: v_and_or_b32 v2, v5, v11, s4 +; GFX10-NEXT: s_lshl_b32 s4, s4, 16 +; GFX10-NEXT: v_lshrrev_b32_e32 v3, 16, v0 +; GFX10-NEXT: v_pk_add_f16 v1, v2, v1 +; GFX10-NEXT: v_lshlrev_b32_e32 v6, 16, v3 +; GFX10-NEXT: v_and_or_b32 v1, v1, v11, s4 +; GFX10-NEXT: v_and_or_b32 v0, v0, v11, v6 +; GFX10-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-CONTRACT-LABEL: test_3xhalf_add_mul_rhs: +; GFX10-CONTRACT: ; %bb.0: ; %.entry +; GFX10-CONTRACT-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-CONTRACT-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX10-CONTRACT-NEXT: v_lshrrev_b32_e32 v7, 16, v2 +; GFX10-CONTRACT-NEXT: v_lshrrev_b32_e32 v6, 16, v0 +; GFX10-CONTRACT-NEXT: v_lshrrev_b32_e32 v8, 16, v4 +; GFX10-CONTRACT-NEXT: v_mov_b32_e32 v10, 0xffff +; GFX10-CONTRACT-NEXT: s_lshl_b32 s4, s4, 16 +; GFX10-CONTRACT-NEXT: v_lshlrev_b32_e32 v7, 16, v7 +; GFX10-CONTRACT-NEXT: v_lshlrev_b32_e32 v11, 16, v6 +; GFX10-CONTRACT-NEXT: v_lshlrev_b32_e32 v15, 16, v8 +; GFX10-CONTRACT-NEXT: v_and_or_b32 v1, v1, v10, s4 +; GFX10-CONTRACT-NEXT: v_and_or_b32 v2, v2, v10, v7 +; GFX10-CONTRACT-NEXT: v_and_or_b32 v11, v0, v10, v11 +; GFX10-CONTRACT-NEXT: v_and_or_b32 v4, v4, v10, v15 +; GFX10-CONTRACT-NEXT: v_pk_fma_f16 v0, v11, v2, v4 +; GFX10-CONTRACT-NEXT: v_and_or_b32 v2, v3, v10, s4 +; GFX10-CONTRACT-NEXT: v_and_or_b32 v4, v5, v10, s4 +; GFX10-CONTRACT-NEXT: s_lshl_b32 s4, s4, 16 +; GFX10-CONTRACT-NEXT: v_lshrrev_b32_e32 v3, 16, v0 +; GFX10-CONTRACT-NEXT: v_pk_fma_f16 v1, v1, v2, v4 +; GFX10-CONTRACT-NEXT: v_lshlrev_b32_e32 v3, 16, v3 +; GFX10-CONTRACT-NEXT: v_and_or_b32 v1, v1, v10, s4 +; GFX10-CONTRACT-NEXT: v_and_or_b32 v0, v0, v10, v3 +; GFX10-CONTRACT-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-DENORM-LABEL: test_3xhalf_add_mul_rhs: +; GFX10-DENORM: ; %bb.0: ; %.entry +; GFX10-DENORM-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-DENORM-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX10-DENORM-NEXT: v_lshrrev_b32_e32 v6, 16, v0 +; GFX10-DENORM-NEXT: v_lshrrev_b32_e32 v7, 16, v2 +; GFX10-DENORM-NEXT: v_mov_b32_e32 v11, 0xffff +; GFX10-DENORM-NEXT: s_lshl_b32 s4, s4, 16 +; GFX10-DENORM-NEXT: v_lshlrev_b32_e32 v6, 16, v6 +; GFX10-DENORM-NEXT: v_lshlrev_b32_e32 v9, 16, v7 +; GFX10-DENORM-NEXT: v_and_or_b32 v1, v1, v11, s4 +; GFX10-DENORM-NEXT: v_and_or_b32 v3, v3, v11, s4 +; GFX10-DENORM-NEXT: v_and_or_b32 v0, v0, v11, v6 +; GFX10-DENORM-NEXT: v_and_or_b32 v2, v2, v11, v9 +; GFX10-DENORM-NEXT: v_pk_mul_f16 v1, v1, v3 +; GFX10-DENORM-NEXT: v_pk_mul_f16 v0, v0, v2 +; GFX10-DENORM-NEXT: v_lshrrev_b32_e32 v2, 16, v4 +; GFX10-DENORM-NEXT: v_and_or_b32 v1, v1, v11, s4 +; GFX10-DENORM-NEXT: v_lshrrev_b32_e32 v6, 16, v0 +; GFX10-DENORM-NEXT: v_lshlrev_b32_e32 v2, 16, v2 +; GFX10-DENORM-NEXT: v_lshlrev_b32_e32 v6, 16, v6 +; GFX10-DENORM-NEXT: v_and_or_b32 v2, v4, v11, v2 +; GFX10-DENORM-NEXT: v_and_or_b32 v0, v0, v11, v6 +; GFX10-DENORM-NEXT: v_pk_add_f16 v0, v2, v0 +; GFX10-DENORM-NEXT: v_and_or_b32 v2, v5, v11, s4 +; GFX10-DENORM-NEXT: s_lshl_b32 s4, s4, 16 +; GFX10-DENORM-NEXT: v_lshrrev_b32_e32 v3, 16, v0 +; GFX10-DENORM-NEXT: v_pk_add_f16 v1, v2, v1 +; GFX10-DENORM-NEXT: v_lshlrev_b32_e32 v6, 16, v3 +; GFX10-DENORM-NEXT: v_and_or_b32 v1, v1, v11, s4 +; GFX10-DENORM-NEXT: v_and_or_b32 v0, v0, v11, v6 +; GFX10-DENORM-NEXT: s_setpc_b64 s[30:31] +.entry: + %a = fmul reassoc <3 x half> %x, %y + %b = fadd reassoc <3 x half> %z, %a + ret <3 x half> %b +} + + +define <4 x double> @test_4xdouble_add_mul(<4 x double> %x, <4 x double> %y, <4 x double> %z) { +; GFX9-LABEL: test_4xdouble_add_mul: +; GFX9: ; %bb.0: ; %.entry +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: v_mul_f64 v[0:1], v[0:1], v[8:9] +; GFX9-NEXT: v_mul_f64 v[2:3], v[2:3], v[10:11] +; GFX9-NEXT: v_mul_f64 v[4:5], v[4:5], v[12:13] +; GFX9-NEXT: v_mul_f64 v[6:7], v[6:7], v[14:15] +; GFX9-NEXT: v_add_f64 v[0:1], v[0:1], v[16:17] +; GFX9-NEXT: v_add_f64 v[2:3], v[2:3], v[18:19] +; GFX9-NEXT: v_add_f64 v[4:5], v[4:5], v[20:21] +; GFX9-NEXT: v_add_f64 v[6:7], v[6:7], v[22:23] +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-CONTRACT-LABEL: test_4xdouble_add_mul: +; GFX9-CONTRACT: ; %bb.0: ; %.entry +; GFX9-CONTRACT-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-CONTRACT-NEXT: v_fma_f64 v[0:1], v[0:1], v[8:9], v[16:17] +; GFX9-CONTRACT-NEXT: v_fma_f64 v[2:3], v[2:3], v[10:11], v[18:19] +; GFX9-CONTRACT-NEXT: v_fma_f64 v[4:5], v[4:5], v[12:13], v[20:21] +; GFX9-CONTRACT-NEXT: v_fma_f64 v[6:7], v[6:7], v[14:15], v[22:23] +; GFX9-CONTRACT-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-DENORM-LABEL: test_4xdouble_add_mul: +; GFX9-DENORM: ; %bb.0: ; %.entry +; GFX9-DENORM-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-DENORM-NEXT: v_mul_f64 v[0:1], v[0:1], v[8:9] +; GFX9-DENORM-NEXT: v_mul_f64 v[2:3], v[2:3], v[10:11] +; GFX9-DENORM-NEXT: v_mul_f64 v[4:5], v[4:5], v[12:13] +; GFX9-DENORM-NEXT: v_mul_f64 v[6:7], v[6:7], v[14:15] +; GFX9-DENORM-NEXT: v_add_f64 v[0:1], v[0:1], v[16:17] +; GFX9-DENORM-NEXT: v_add_f64 v[2:3], v[2:3], v[18:19] +; GFX9-DENORM-NEXT: v_add_f64 v[4:5], v[4:5], v[20:21] +; GFX9-DENORM-NEXT: v_add_f64 v[6:7], v[6:7], v[22:23] +; GFX9-DENORM-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-LABEL: test_4xdouble_add_mul: +; GFX10: ; %bb.0: ; %.entry +; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX10-NEXT: v_mov_b32_e32 v26, v0 +; GFX10-NEXT: v_mov_b32_e32 v27, v1 +; GFX10-NEXT: v_mov_b32_e32 v24, v2 +; GFX10-NEXT: v_mov_b32_e32 v25, v3 +; GFX10-NEXT: v_mov_b32_e32 v30, v4 +; GFX10-NEXT: v_mov_b32_e32 v31, v5 +; GFX10-NEXT: v_mov_b32_e32 v28, v6 +; GFX10-NEXT: v_mov_b32_e32 v29, v7 +; GFX10-NEXT: v_mul_f64 v[26:27], v[26:27], v[8:9] +; GFX10-NEXT: v_mul_f64 v[8:9], v[24:25], v[10:11] +; GFX10-NEXT: v_mul_f64 v[10:11], v[30:31], v[12:13] +; GFX10-NEXT: v_mul_f64 v[12:13], v[28:29], v[14:15] +; GFX10-NEXT: v_add_f64 v[0:1], v[26:27], v[16:17] +; GFX10-NEXT: v_add_f64 v[2:3], v[8:9], v[18:19] +; GFX10-NEXT: v_add_f64 v[4:5], v[10:11], v[20:21] +; GFX10-NEXT: v_add_f64 v[6:7], v[12:13], v[22:23] +; GFX10-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-CONTRACT-LABEL: test_4xdouble_add_mul: +; GFX10-CONTRACT: ; %bb.0: ; %.entry +; GFX10-CONTRACT-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-CONTRACT-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX10-CONTRACT-NEXT: v_mov_b32_e32 v26, v0 +; GFX10-CONTRACT-NEXT: v_mov_b32_e32 v27, v1 +; GFX10-CONTRACT-NEXT: v_mov_b32_e32 v24, v2 +; GFX10-CONTRACT-NEXT: v_mov_b32_e32 v25, v3 +; GFX10-CONTRACT-NEXT: v_mov_b32_e32 v30, v4 +; GFX10-CONTRACT-NEXT: v_mov_b32_e32 v31, v5 +; GFX10-CONTRACT-NEXT: v_mov_b32_e32 v28, v6 +; GFX10-CONTRACT-NEXT: v_mov_b32_e32 v29, v7 +; GFX10-CONTRACT-NEXT: v_fma_f64 v[0:1], v[26:27], v[8:9], v[16:17] +; GFX10-CONTRACT-NEXT: v_fma_f64 v[2:3], v[24:25], v[10:11], v[18:19] +; GFX10-CONTRACT-NEXT: v_fma_f64 v[4:5], v[30:31], v[12:13], v[20:21] +; GFX10-CONTRACT-NEXT: v_fma_f64 v[6:7], v[28:29], v[14:15], v[22:23] +; GFX10-CONTRACT-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-DENORM-LABEL: test_4xdouble_add_mul: +; GFX10-DENORM: ; %bb.0: ; %.entry +; GFX10-DENORM-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-DENORM-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX10-DENORM-NEXT: v_mov_b32_e32 v26, v0 +; GFX10-DENORM-NEXT: v_mov_b32_e32 v27, v1 +; GFX10-DENORM-NEXT: v_mov_b32_e32 v24, v2 +; GFX10-DENORM-NEXT: v_mov_b32_e32 v25, v3 +; GFX10-DENORM-NEXT: v_mov_b32_e32 v30, v4 +; GFX10-DENORM-NEXT: v_mov_b32_e32 v31, v5 +; GFX10-DENORM-NEXT: v_mov_b32_e32 v28, v6 +; GFX10-DENORM-NEXT: v_mov_b32_e32 v29, v7 +; GFX10-DENORM-NEXT: v_mul_f64 v[26:27], v[26:27], v[8:9] +; GFX10-DENORM-NEXT: v_mul_f64 v[8:9], v[24:25], v[10:11] +; GFX10-DENORM-NEXT: v_mul_f64 v[10:11], v[30:31], v[12:13] +; GFX10-DENORM-NEXT: v_mul_f64 v[12:13], v[28:29], v[14:15] +; GFX10-DENORM-NEXT: v_add_f64 v[0:1], v[26:27], v[16:17] +; GFX10-DENORM-NEXT: v_add_f64 v[2:3], v[8:9], v[18:19] +; GFX10-DENORM-NEXT: v_add_f64 v[4:5], v[10:11], v[20:21] +; GFX10-DENORM-NEXT: v_add_f64 v[6:7], v[12:13], v[22:23] +; GFX10-DENORM-NEXT: s_setpc_b64 s[30:31] +.entry: + %a = fmul reassoc <4 x double> %x, %y + %b = fadd reassoc <4 x double> %a, %z + ret <4 x double> %b +} + +define <3 x double> @test_3xdouble_add_mul_rhs(<3 x double> %x, <3 x double> %y, <3 x double> %z) { +; GFX9-LABEL: test_3xdouble_add_mul_rhs: +; GFX9: ; %bb.0: ; %.entry +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: v_mul_f64 v[0:1], v[0:1], v[6:7] +; GFX9-NEXT: v_mul_f64 v[2:3], v[2:3], v[8:9] +; GFX9-NEXT: v_mul_f64 v[4:5], v[4:5], v[10:11] +; GFX9-NEXT: v_add_f64 v[0:1], v[12:13], v[0:1] +; GFX9-NEXT: v_add_f64 v[2:3], v[14:15], v[2:3] +; GFX9-NEXT: v_add_f64 v[4:5], v[16:17], v[4:5] +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-CONTRACT-LABEL: test_3xdouble_add_mul_rhs: +; GFX9-CONTRACT: ; %bb.0: ; %.entry +; GFX9-CONTRACT-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-CONTRACT-NEXT: v_fma_f64 v[0:1], v[0:1], v[6:7], v[12:13] +; GFX9-CONTRACT-NEXT: v_fma_f64 v[2:3], v[2:3], v[8:9], v[14:15] +; GFX9-CONTRACT-NEXT: v_fma_f64 v[4:5], v[4:5], v[10:11], v[16:17] +; GFX9-CONTRACT-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-DENORM-LABEL: test_3xdouble_add_mul_rhs: +; GFX9-DENORM: ; %bb.0: ; %.entry +; GFX9-DENORM-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-DENORM-NEXT: v_mul_f64 v[0:1], v[0:1], v[6:7] +; GFX9-DENORM-NEXT: v_mul_f64 v[2:3], v[2:3], v[8:9] +; GFX9-DENORM-NEXT: v_mul_f64 v[4:5], v[4:5], v[10:11] +; GFX9-DENORM-NEXT: v_add_f64 v[0:1], v[12:13], v[0:1] +; GFX9-DENORM-NEXT: v_add_f64 v[2:3], v[14:15], v[2:3] +; GFX9-DENORM-NEXT: v_add_f64 v[4:5], v[16:17], v[4:5] +; GFX9-DENORM-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-LABEL: test_3xdouble_add_mul_rhs: +; GFX10: ; %bb.0: ; %.entry +; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX10-NEXT: v_mul_f64 v[0:1], v[0:1], v[6:7] +; GFX10-NEXT: v_mul_f64 v[2:3], v[2:3], v[8:9] +; GFX10-NEXT: v_mul_f64 v[4:5], v[4:5], v[10:11] +; GFX10-NEXT: v_mov_b32_e32 v18, v12 +; GFX10-NEXT: v_mov_b32_e32 v19, v13 +; GFX10-NEXT: v_mov_b32_e32 v12, v14 +; GFX10-NEXT: v_mov_b32_e32 v13, v15 +; GFX10-NEXT: v_mov_b32_e32 v14, v16 +; GFX10-NEXT: v_mov_b32_e32 v15, v17 +; GFX10-NEXT: v_add_f64 v[0:1], v[18:19], v[0:1] +; GFX10-NEXT: v_add_f64 v[2:3], v[12:13], v[2:3] +; GFX10-NEXT: v_add_f64 v[4:5], v[14:15], v[4:5] +; GFX10-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-CONTRACT-LABEL: test_3xdouble_add_mul_rhs: +; GFX10-CONTRACT: ; %bb.0: ; %.entry +; GFX10-CONTRACT-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-CONTRACT-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX10-CONTRACT-NEXT: v_fma_f64 v[0:1], v[0:1], v[6:7], v[12:13] +; GFX10-CONTRACT-NEXT: v_fma_f64 v[2:3], v[2:3], v[8:9], v[14:15] +; GFX10-CONTRACT-NEXT: v_fma_f64 v[4:5], v[4:5], v[10:11], v[16:17] +; GFX10-CONTRACT-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-DENORM-LABEL: test_3xdouble_add_mul_rhs: +; GFX10-DENORM: ; %bb.0: ; %.entry +; GFX10-DENORM-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-DENORM-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX10-DENORM-NEXT: v_mul_f64 v[0:1], v[0:1], v[6:7] +; GFX10-DENORM-NEXT: v_mul_f64 v[2:3], v[2:3], v[8:9] +; GFX10-DENORM-NEXT: v_mul_f64 v[4:5], v[4:5], v[10:11] +; GFX10-DENORM-NEXT: v_mov_b32_e32 v18, v12 +; GFX10-DENORM-NEXT: v_mov_b32_e32 v19, v13 +; GFX10-DENORM-NEXT: v_mov_b32_e32 v12, v14 +; GFX10-DENORM-NEXT: v_mov_b32_e32 v13, v15 +; GFX10-DENORM-NEXT: v_mov_b32_e32 v14, v16 +; GFX10-DENORM-NEXT: v_mov_b32_e32 v15, v17 +; GFX10-DENORM-NEXT: v_add_f64 v[0:1], v[18:19], v[0:1] +; GFX10-DENORM-NEXT: v_add_f64 v[2:3], v[12:13], v[2:3] +; GFX10-DENORM-NEXT: v_add_f64 v[4:5], v[14:15], v[4:5] +; GFX10-DENORM-NEXT: s_setpc_b64 s[30:31] +.entry: + %a = fmul reassoc <3 x double> %x, %y + %b = fadd reassoc <3 x double> %z, %a + ret <3 x double> %b +}