Index: llvm/lib/Target/AMDGPU/AMDGPUGISel.td =================================================================== --- llvm/lib/Target/AMDGPU/AMDGPUGISel.td +++ llvm/lib/Target/AMDGPU/AMDGPUGISel.td @@ -186,6 +186,11 @@ def : GINodeEquiv; def : GINodeEquiv; def : GINodeEquiv; +def : GINodeEquiv; +def : GINodeEquiv; +def : GINodeEquiv; +def : GINodeEquiv; + def : GINodeEquiv; def : GINodeEquiv; Index: llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp =================================================================== --- llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp +++ llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp @@ -2937,6 +2937,8 @@ case TargetOpcode::G_ATOMICRMW_FADD: case AMDGPU::G_AMDGPU_ATOMIC_INC: case AMDGPU::G_AMDGPU_ATOMIC_DEC: + case AMDGPU::G_AMDGPU_ATOMIC_FMIN: + case AMDGPU::G_AMDGPU_ATOMIC_FMAX: return selectG_LOAD_ATOMICRMW(I); case AMDGPU::G_AMDGPU_ATOMIC_CMPXCHG: return selectG_AMDGPU_ATOMIC_CMPXCHG(I); Index: llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.h =================================================================== --- llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.h +++ llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.h @@ -131,6 +131,9 @@ bool legalizeRsqClampIntrinsic(MachineInstr &MI, MachineRegisterInfo &MRI, MachineIRBuilder &B) const; + bool legalizeDSAtomicFPIntrinsic(LegalizerHelper &Helper, + MachineInstr &MI, Intrinsic::ID IID) const; + bool getImplicitArgPtr(Register DstReg, MachineRegisterInfo &MRI, MachineIRBuilder &B) const; Index: llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp =================================================================== --- llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp +++ llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp @@ -3209,6 +3209,37 @@ return true; } +static unsigned getDSFPAtomicOpcode(Intrinsic::ID IID) { + switch (IID) { + case Intrinsic::amdgcn_ds_fadd: + return AMDGPU::G_ATOMICRMW_FADD; + case Intrinsic::amdgcn_ds_fmin: + return AMDGPU::G_AMDGPU_ATOMIC_FMIN; + case Intrinsic::amdgcn_ds_fmax: + return AMDGPU::G_AMDGPU_ATOMIC_FMAX; + default: + llvm_unreachable("not a DS FP intrinsic"); + } +} + +bool AMDGPULegalizerInfo::legalizeDSAtomicFPIntrinsic(LegalizerHelper &Helper, + MachineInstr &MI, + Intrinsic::ID IID) const { + GISelChangeObserver &Observer = Helper.Observer; + Observer.changingInstr(MI); + + MI.setDesc(ST.getInstrInfo()->get(getDSFPAtomicOpcode(IID))); + + // The remaining operands were used to set fields in the MemOperand on + // construction. + for (int I = 6; I > 3; --I) + MI.RemoveOperand(I); + + MI.RemoveOperand(1); // Remove the intrinsic ID. + Observer.changedInstr(MI); + return true; +} + bool AMDGPULegalizerInfo::getImplicitArgPtr(Register DstReg, MachineRegisterInfo &MRI, MachineIRBuilder &B) const { @@ -4444,6 +4475,10 @@ return legalizeDebugTrapIntrinsic(MI, MRI, B); case Intrinsic::amdgcn_rsq_clamp: return legalizeRsqClampIntrinsic(MI, MRI, B); + case Intrinsic::amdgcn_ds_fadd: + case Intrinsic::amdgcn_ds_fmin: + case Intrinsic::amdgcn_ds_fmax: + return legalizeDSAtomicFPIntrinsic(Helper, MI, IntrID); default: { if (const AMDGPU::ImageDimIntrinsicInfo *ImageDimIntr = AMDGPU::getImageDimIntrinsicInfo(IntrID)) Index: llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp =================================================================== --- llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp +++ llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp @@ -4203,9 +4203,6 @@ OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, Size); break; } - case Intrinsic::amdgcn_ds_fadd: - case Intrinsic::amdgcn_ds_fmin: - case Intrinsic::amdgcn_ds_fmax: case Intrinsic::amdgcn_global_atomic_csub: return getDefaultMappingAllVGPR(MI); case Intrinsic::amdgcn_ds_ordered_add: @@ -4391,7 +4388,9 @@ case AMDGPU::G_ATOMICRMW_FADD: case AMDGPU::G_AMDGPU_ATOMIC_CMPXCHG: case AMDGPU::G_AMDGPU_ATOMIC_INC: - case AMDGPU::G_AMDGPU_ATOMIC_DEC: { + case AMDGPU::G_AMDGPU_ATOMIC_DEC: + case AMDGPU::G_AMDGPU_ATOMIC_FMIN: + case AMDGPU::G_AMDGPU_ATOMIC_FMAX: { OpdsMapping[0] = getVGPROpMapping(MI.getOperand(0).getReg(), MRI, *TRI); OpdsMapping[1] = getValueMappingForPtr(MRI, MI.getOperand(1).getReg()); OpdsMapping[2] = getVGPROpMapping(MI.getOperand(2).getReg(), MRI, *TRI); Index: llvm/lib/Target/AMDGPU/SIInstructions.td =================================================================== --- llvm/lib/Target/AMDGPU/SIInstructions.td +++ llvm/lib/Target/AMDGPU/SIInstructions.td @@ -2400,6 +2400,8 @@ let Namespace = "AMDGPU" in { def G_AMDGPU_ATOMIC_INC : G_ATOMICRMW_OP; def G_AMDGPU_ATOMIC_DEC : G_ATOMICRMW_OP; +def G_AMDGPU_ATOMIC_FMIN : G_ATOMICRMW_OP; +def G_AMDGPU_ATOMIC_FMAX : G_ATOMICRMW_OP; } class BufferAtomicGenericInstruction : AMDGPUGenericInstruction { Index: llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.ds.fadd.ll =================================================================== --- /dev/null +++ llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.ds.fadd.ll @@ -0,0 +1,190 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=tonga -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GFX8 %s +; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GFX9 %s + +define amdgpu_ps float @ds_fadd_f32_ss(float addrspace(3)* inreg %ptr, float inreg %val) { +; GFX8-LABEL: ds_fadd_f32_ss: +; GFX8: ; %bb.0: +; GFX8-NEXT: v_mov_b32_e32 v0, s2 +; GFX8-NEXT: v_mov_b32_e32 v1, s3 +; GFX8-NEXT: s_mov_b32 m0, -1 +; GFX8-NEXT: ds_add_rtn_f32 v0, v0, v1 +; GFX8-NEXT: s_waitcnt lgkmcnt(0) +; GFX8-NEXT: ; return to shader part epilog +; +; GFX9-LABEL: ds_fadd_f32_ss: +; GFX9: ; %bb.0: +; GFX9-NEXT: v_mov_b32_e32 v0, s2 +; GFX9-NEXT: v_mov_b32_e32 v1, s3 +; GFX9-NEXT: ds_add_rtn_f32 v0, v0, v1 +; GFX9-NEXT: s_waitcnt lgkmcnt(0) +; GFX9-NEXT: ; return to shader part epilog + %ret = call float @llvm.amdgcn.ds.fadd(float addrspace(3)* %ptr, float %val, i32 0, i32 0, i1 false) + ret float %ret +} + +define amdgpu_ps float @ds_fadd_f32_ss_offset(float addrspace(3)* inreg %ptr, float inreg %val) { +; GFX8-LABEL: ds_fadd_f32_ss_offset: +; GFX8: ; %bb.0: +; GFX8-NEXT: s_add_u32 s0, s2, 0x200 +; GFX8-NEXT: v_mov_b32_e32 v0, s0 +; GFX8-NEXT: v_mov_b32_e32 v1, s3 +; GFX8-NEXT: s_mov_b32 m0, -1 +; GFX8-NEXT: ds_add_rtn_f32 v0, v0, v1 +; GFX8-NEXT: s_waitcnt lgkmcnt(0) +; GFX8-NEXT: ; return to shader part epilog +; +; GFX9-LABEL: ds_fadd_f32_ss_offset: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_add_u32 s0, s2, 0x200 +; GFX9-NEXT: v_mov_b32_e32 v0, s0 +; GFX9-NEXT: v_mov_b32_e32 v1, s3 +; GFX9-NEXT: ds_add_rtn_f32 v0, v0, v1 +; GFX9-NEXT: s_waitcnt lgkmcnt(0) +; GFX9-NEXT: ; return to shader part epilog + %gep = getelementptr float, float addrspace(3)* %ptr, i32 128 + %ret = call float @llvm.amdgcn.ds.fadd(float addrspace(3)* %gep, float %val, i32 0, i32 0, i1 false) + ret float %ret +} + +define amdgpu_ps void @ds_fadd_f32_ss_nortn(float addrspace(3)* inreg %ptr, float inreg %val) { +; GFX8-LABEL: ds_fadd_f32_ss_nortn: +; GFX8: ; %bb.0: +; GFX8-NEXT: v_mov_b32_e32 v0, s2 +; GFX8-NEXT: v_mov_b32_e32 v1, s3 +; GFX8-NEXT: s_mov_b32 m0, -1 +; GFX8-NEXT: ds_add_rtn_f32 v0, v0, v1 +; GFX8-NEXT: s_endpgm +; +; GFX9-LABEL: ds_fadd_f32_ss_nortn: +; GFX9: ; %bb.0: +; GFX9-NEXT: v_mov_b32_e32 v0, s2 +; GFX9-NEXT: v_mov_b32_e32 v1, s3 +; GFX9-NEXT: ds_add_rtn_f32 v0, v0, v1 +; GFX9-NEXT: s_endpgm + %unused = call float @llvm.amdgcn.ds.fadd(float addrspace(3)* %ptr, float %val, i32 0, i32 0, i1 false) + ret void +} + +define amdgpu_ps void @ds_fadd_f32_ss_offset_nortn(float addrspace(3)* inreg %ptr, float inreg %val) { +; GFX8-LABEL: ds_fadd_f32_ss_offset_nortn: +; GFX8: ; %bb.0: +; GFX8-NEXT: s_add_u32 s0, s2, 0x200 +; GFX8-NEXT: v_mov_b32_e32 v0, s0 +; GFX8-NEXT: v_mov_b32_e32 v1, s3 +; GFX8-NEXT: s_mov_b32 m0, -1 +; GFX8-NEXT: ds_add_rtn_f32 v0, v0, v1 +; GFX8-NEXT: s_endpgm +; +; GFX9-LABEL: ds_fadd_f32_ss_offset_nortn: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_add_u32 s0, s2, 0x200 +; GFX9-NEXT: v_mov_b32_e32 v0, s0 +; GFX9-NEXT: v_mov_b32_e32 v1, s3 +; GFX9-NEXT: ds_add_rtn_f32 v0, v0, v1 +; GFX9-NEXT: s_endpgm + %gep = getelementptr float, float addrspace(3)* %ptr, i32 128 + %unused = call float @llvm.amdgcn.ds.fadd(float addrspace(3)* %gep, float %val, i32 0, i32 0, i1 false) + ret void +} + +define float @ds_fadd_f32_vv(float addrspace(3)* %ptr, float %val) { +; GFX8-LABEL: ds_fadd_f32_vv: +; GFX8: ; %bb.0: +; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-NEXT: s_mov_b32 m0, -1 +; GFX8-NEXT: ds_add_rtn_f32 v0, v0, v1 +; GFX8-NEXT: s_waitcnt lgkmcnt(0) +; GFX8-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-LABEL: ds_fadd_f32_vv: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: ds_add_rtn_f32 v0, v0, v1 +; GFX9-NEXT: s_waitcnt lgkmcnt(0) +; GFX9-NEXT: s_setpc_b64 s[30:31] + %ret = call float @llvm.amdgcn.ds.fadd(float addrspace(3)* %ptr, float %val, i32 0, i32 0, i1 false) + ret float %ret +} + +define float @ds_fadd_f32_vv_offset(float addrspace(3)* %ptr, float %val) { +; GFX8-LABEL: ds_fadd_f32_vv_offset: +; GFX8: ; %bb.0: +; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-NEXT: s_mov_b32 m0, -1 +; GFX8-NEXT: ds_add_rtn_f32 v0, v0, v1 offset:512 +; GFX8-NEXT: s_waitcnt lgkmcnt(0) +; GFX8-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-LABEL: ds_fadd_f32_vv_offset: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: ds_add_rtn_f32 v0, v0, v1 offset:512 +; GFX9-NEXT: s_waitcnt lgkmcnt(0) +; GFX9-NEXT: s_setpc_b64 s[30:31] + %gep = getelementptr float, float addrspace(3)* %ptr, i32 128 + %ret = call float @llvm.amdgcn.ds.fadd(float addrspace(3)* %gep, float %val, i32 0, i32 0, i1 false) + ret float %ret +} + +define void @ds_fadd_f32_vv_nortn(float addrspace(3)* %ptr, float %val) { +; GFX8-LABEL: ds_fadd_f32_vv_nortn: +; GFX8: ; %bb.0: +; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-NEXT: s_mov_b32 m0, -1 +; GFX8-NEXT: ds_add_rtn_f32 v0, v0, v1 +; GFX8-NEXT: s_waitcnt lgkmcnt(0) +; GFX8-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-LABEL: ds_fadd_f32_vv_nortn: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: ds_add_rtn_f32 v0, v0, v1 +; GFX9-NEXT: s_waitcnt lgkmcnt(0) +; GFX9-NEXT: s_setpc_b64 s[30:31] + %ret = call float @llvm.amdgcn.ds.fadd(float addrspace(3)* %ptr, float %val, i32 0, i32 0, i1 false) + ret void +} + +define void @ds_fadd_f32_vv_offset_nortn(float addrspace(3)* %ptr, float %val) { +; GFX8-LABEL: ds_fadd_f32_vv_offset_nortn: +; GFX8: ; %bb.0: +; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-NEXT: s_mov_b32 m0, -1 +; GFX8-NEXT: ds_add_rtn_f32 v0, v0, v1 offset:512 +; GFX8-NEXT: s_waitcnt lgkmcnt(0) +; GFX8-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-LABEL: ds_fadd_f32_vv_offset_nortn: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: ds_add_rtn_f32 v0, v0, v1 offset:512 +; GFX9-NEXT: s_waitcnt lgkmcnt(0) +; GFX9-NEXT: s_setpc_b64 s[30:31] + %gep = getelementptr float, float addrspace(3)* %ptr, i32 128 + %ret = call float @llvm.amdgcn.ds.fadd(float addrspace(3)* %gep, float %val, i32 0, i32 0, i1 false) + ret void +} + +define float @ds_fadd_f32_vv_volatile(float addrspace(3)* %ptr, float %val) { +; GFX8-LABEL: ds_fadd_f32_vv_volatile: +; GFX8: ; %bb.0: +; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-NEXT: s_mov_b32 m0, -1 +; GFX8-NEXT: ds_add_rtn_f32 v0, v0, v1 +; GFX8-NEXT: s_waitcnt lgkmcnt(0) +; GFX8-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-LABEL: ds_fadd_f32_vv_volatile: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: ds_add_rtn_f32 v0, v0, v1 +; GFX9-NEXT: s_waitcnt lgkmcnt(0) +; GFX9-NEXT: s_setpc_b64 s[30:31] + %ret = call float @llvm.amdgcn.ds.fadd(float addrspace(3)* %ptr, float %val, i32 0, i32 0, i1 true) + ret float %ret +} + +declare float @llvm.amdgcn.ds.fadd(float addrspace(3)* nocapture, float, i32 immarg, i32 immarg, i1 immarg) #0 + +attributes #0 = { argmemonly nounwind willreturn } Index: llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.ds.fmax.ll =================================================================== --- /dev/null +++ llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.ds.fmax.ll @@ -0,0 +1,385 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=tonga -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GFX8 %s +; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GFX9 %s + +; Make sure the memory operand information is preserved. +; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=tonga -stop-after=instruction-select -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GFX8-MIR %s +; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -stop-after=instruction-select -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GFX9-MIR %s + + +define amdgpu_ps float @ds_fmax_f32_ss(float addrspace(3)* inreg %ptr, float inreg %val) { +; GFX8-LABEL: ds_fmax_f32_ss: +; GFX8: ; %bb.0: +; GFX8-NEXT: v_mov_b32_e32 v0, s2 +; GFX8-NEXT: v_mov_b32_e32 v1, s3 +; GFX8-NEXT: s_mov_b32 m0, -1 +; GFX8-NEXT: ds_max_rtn_f32 v0, v0, v1 +; GFX8-NEXT: s_waitcnt lgkmcnt(0) +; GFX8-NEXT: ; return to shader part epilog +; +; GFX9-LABEL: ds_fmax_f32_ss: +; GFX9: ; %bb.0: +; GFX9-NEXT: v_mov_b32_e32 v0, s2 +; GFX9-NEXT: v_mov_b32_e32 v1, s3 +; GFX9-NEXT: ds_max_rtn_f32 v0, v0, v1 +; GFX9-NEXT: s_waitcnt lgkmcnt(0) +; GFX9-NEXT: ; return to shader part epilog + ; GFX8-MIR-LABEL: name: ds_fmax_f32_ss + ; GFX8-MIR: bb.1 (%ir-block.0): + ; GFX8-MIR: liveins: $sgpr2, $sgpr3 + ; GFX8-MIR: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GFX8-MIR: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 + ; GFX8-MIR: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]] + ; GFX8-MIR: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY1]] + ; GFX8-MIR: $m0 = S_MOV_B32 -1 + ; GFX8-MIR: [[DS_MAX_RTN_F32_:%[0-9]+]]:vgpr_32 = DS_MAX_RTN_F32 [[COPY2]], [[COPY3]], 0, 0, implicit $m0, implicit $exec :: (load store 4 on %ir.ptr, addrspace 3) + ; GFX8-MIR: $vgpr0 = COPY [[DS_MAX_RTN_F32_]] + ; GFX8-MIR: SI_RETURN_TO_EPILOG implicit $vgpr0 + ; GFX9-MIR-LABEL: name: ds_fmax_f32_ss + ; GFX9-MIR: bb.1 (%ir-block.0): + ; GFX9-MIR: liveins: $sgpr2, $sgpr3 + ; GFX9-MIR: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GFX9-MIR: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 + ; GFX9-MIR: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]] + ; GFX9-MIR: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY1]] + ; GFX9-MIR: [[DS_MAX_RTN_F32_gfx9_:%[0-9]+]]:vgpr_32 = DS_MAX_RTN_F32_gfx9 [[COPY2]], [[COPY3]], 0, 0, implicit $exec :: (load store 4 on %ir.ptr, addrspace 3) + ; GFX9-MIR: $vgpr0 = COPY [[DS_MAX_RTN_F32_gfx9_]] + ; GFX9-MIR: SI_RETURN_TO_EPILOG implicit $vgpr0 + %ret = call float @llvm.amdgcn.ds.fmax(float addrspace(3)* %ptr, float %val, i32 0, i32 0, i1 false) + ret float %ret +} + +define amdgpu_ps float @ds_fmax_f32_ss_offset(float addrspace(3)* inreg %ptr, float inreg %val) { +; GFX8-LABEL: ds_fmax_f32_ss_offset: +; GFX8: ; %bb.0: +; GFX8-NEXT: s_add_u32 s0, s2, 0x200 +; GFX8-NEXT: v_mov_b32_e32 v0, s0 +; GFX8-NEXT: v_mov_b32_e32 v1, s3 +; GFX8-NEXT: s_mov_b32 m0, -1 +; GFX8-NEXT: ds_max_rtn_f32 v0, v0, v1 +; GFX8-NEXT: s_waitcnt lgkmcnt(0) +; GFX8-NEXT: ; return to shader part epilog +; +; GFX9-LABEL: ds_fmax_f32_ss_offset: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_add_u32 s0, s2, 0x200 +; GFX9-NEXT: v_mov_b32_e32 v0, s0 +; GFX9-NEXT: v_mov_b32_e32 v1, s3 +; GFX9-NEXT: ds_max_rtn_f32 v0, v0, v1 +; GFX9-NEXT: s_waitcnt lgkmcnt(0) +; GFX9-NEXT: ; return to shader part epilog + ; GFX8-MIR-LABEL: name: ds_fmax_f32_ss_offset + ; GFX8-MIR: bb.1 (%ir-block.0): + ; GFX8-MIR: liveins: $sgpr2, $sgpr3 + ; GFX8-MIR: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GFX8-MIR: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 + ; GFX8-MIR: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 512 + ; GFX8-MIR: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[COPY]], [[S_MOV_B32_]], implicit-def $scc + ; GFX8-MIR: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[S_ADD_U32_]] + ; GFX8-MIR: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY1]] + ; GFX8-MIR: $m0 = S_MOV_B32 -1 + ; GFX8-MIR: [[DS_MAX_RTN_F32_:%[0-9]+]]:vgpr_32 = DS_MAX_RTN_F32 [[COPY2]], [[COPY3]], 0, 0, implicit $m0, implicit $exec :: (load store 4 on %ir.gep, addrspace 3) + ; GFX8-MIR: $vgpr0 = COPY [[DS_MAX_RTN_F32_]] + ; GFX8-MIR: SI_RETURN_TO_EPILOG implicit $vgpr0 + ; GFX9-MIR-LABEL: name: ds_fmax_f32_ss_offset + ; GFX9-MIR: bb.1 (%ir-block.0): + ; GFX9-MIR: liveins: $sgpr2, $sgpr3 + ; GFX9-MIR: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GFX9-MIR: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 + ; GFX9-MIR: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 512 + ; GFX9-MIR: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[COPY]], [[S_MOV_B32_]], implicit-def $scc + ; GFX9-MIR: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[S_ADD_U32_]] + ; GFX9-MIR: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY1]] + ; GFX9-MIR: [[DS_MAX_RTN_F32_gfx9_:%[0-9]+]]:vgpr_32 = DS_MAX_RTN_F32_gfx9 [[COPY2]], [[COPY3]], 0, 0, implicit $exec :: (load store 4 on %ir.gep, addrspace 3) + ; GFX9-MIR: $vgpr0 = COPY [[DS_MAX_RTN_F32_gfx9_]] + ; GFX9-MIR: SI_RETURN_TO_EPILOG implicit $vgpr0 + %gep = getelementptr float, float addrspace(3)* %ptr, i32 128 + %ret = call float @llvm.amdgcn.ds.fmax(float addrspace(3)* %gep, float %val, i32 0, i32 0, i1 false) + ret float %ret +} + +define amdgpu_ps void @ds_fmax_f32_ss_nortn(float addrspace(3)* inreg %ptr, float inreg %val) { +; GFX8-LABEL: ds_fmax_f32_ss_nortn: +; GFX8: ; %bb.0: +; GFX8-NEXT: v_mov_b32_e32 v0, s2 +; GFX8-NEXT: v_mov_b32_e32 v1, s3 +; GFX8-NEXT: s_mov_b32 m0, -1 +; GFX8-NEXT: ds_max_rtn_f32 v0, v0, v1 +; GFX8-NEXT: s_endpgm +; +; GFX9-LABEL: ds_fmax_f32_ss_nortn: +; GFX9: ; %bb.0: +; GFX9-NEXT: v_mov_b32_e32 v0, s2 +; GFX9-NEXT: v_mov_b32_e32 v1, s3 +; GFX9-NEXT: ds_max_rtn_f32 v0, v0, v1 +; GFX9-NEXT: s_endpgm + ; GFX8-MIR-LABEL: name: ds_fmax_f32_ss_nortn + ; GFX8-MIR: bb.1 (%ir-block.0): + ; GFX8-MIR: liveins: $sgpr2, $sgpr3 + ; GFX8-MIR: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GFX8-MIR: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 + ; GFX8-MIR: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]] + ; GFX8-MIR: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY1]] + ; GFX8-MIR: $m0 = S_MOV_B32 -1 + ; GFX8-MIR: [[DS_MAX_RTN_F32_:%[0-9]+]]:vgpr_32 = DS_MAX_RTN_F32 [[COPY2]], [[COPY3]], 0, 0, implicit $m0, implicit $exec :: (load store 4 on %ir.ptr, addrspace 3) + ; GFX8-MIR: S_ENDPGM 0 + ; GFX9-MIR-LABEL: name: ds_fmax_f32_ss_nortn + ; GFX9-MIR: bb.1 (%ir-block.0): + ; GFX9-MIR: liveins: $sgpr2, $sgpr3 + ; GFX9-MIR: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GFX9-MIR: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 + ; GFX9-MIR: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]] + ; GFX9-MIR: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY1]] + ; GFX9-MIR: [[DS_MAX_RTN_F32_gfx9_:%[0-9]+]]:vgpr_32 = DS_MAX_RTN_F32_gfx9 [[COPY2]], [[COPY3]], 0, 0, implicit $exec :: (load store 4 on %ir.ptr, addrspace 3) + ; GFX9-MIR: S_ENDPGM 0 + %unused = call float @llvm.amdgcn.ds.fmax(float addrspace(3)* %ptr, float %val, i32 0, i32 0, i1 false) + ret void +} + +define amdgpu_ps void @ds_fmax_f32_ss_offset_nortn(float addrspace(3)* inreg %ptr, float inreg %val) { +; GFX8-LABEL: ds_fmax_f32_ss_offset_nortn: +; GFX8: ; %bb.0: +; GFX8-NEXT: s_add_u32 s0, s2, 0x200 +; GFX8-NEXT: v_mov_b32_e32 v0, s0 +; GFX8-NEXT: v_mov_b32_e32 v1, s3 +; GFX8-NEXT: s_mov_b32 m0, -1 +; GFX8-NEXT: ds_max_rtn_f32 v0, v0, v1 +; GFX8-NEXT: s_endpgm +; +; GFX9-LABEL: ds_fmax_f32_ss_offset_nortn: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_add_u32 s0, s2, 0x200 +; GFX9-NEXT: v_mov_b32_e32 v0, s0 +; GFX9-NEXT: v_mov_b32_e32 v1, s3 +; GFX9-NEXT: ds_max_rtn_f32 v0, v0, v1 +; GFX9-NEXT: s_endpgm + ; GFX8-MIR-LABEL: name: ds_fmax_f32_ss_offset_nortn + ; GFX8-MIR: bb.1 (%ir-block.0): + ; GFX8-MIR: liveins: $sgpr2, $sgpr3 + ; GFX8-MIR: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GFX8-MIR: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 + ; GFX8-MIR: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 512 + ; GFX8-MIR: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[COPY]], [[S_MOV_B32_]], implicit-def $scc + ; GFX8-MIR: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[S_ADD_U32_]] + ; GFX8-MIR: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY1]] + ; GFX8-MIR: $m0 = S_MOV_B32 -1 + ; GFX8-MIR: [[DS_MAX_RTN_F32_:%[0-9]+]]:vgpr_32 = DS_MAX_RTN_F32 [[COPY2]], [[COPY3]], 0, 0, implicit $m0, implicit $exec :: (load store 4 on %ir.gep, addrspace 3) + ; GFX8-MIR: S_ENDPGM 0 + ; GFX9-MIR-LABEL: name: ds_fmax_f32_ss_offset_nortn + ; GFX9-MIR: bb.1 (%ir-block.0): + ; GFX9-MIR: liveins: $sgpr2, $sgpr3 + ; GFX9-MIR: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GFX9-MIR: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 + ; GFX9-MIR: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 512 + ; GFX9-MIR: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[COPY]], [[S_MOV_B32_]], implicit-def $scc + ; GFX9-MIR: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[S_ADD_U32_]] + ; GFX9-MIR: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY1]] + ; GFX9-MIR: [[DS_MAX_RTN_F32_gfx9_:%[0-9]+]]:vgpr_32 = DS_MAX_RTN_F32_gfx9 [[COPY2]], [[COPY3]], 0, 0, implicit $exec :: (load store 4 on %ir.gep, addrspace 3) + ; GFX9-MIR: S_ENDPGM 0 + %gep = getelementptr float, float addrspace(3)* %ptr, i32 128 + %unused = call float @llvm.amdgcn.ds.fmax(float addrspace(3)* %gep, float %val, i32 0, i32 0, i1 false) + ret void +} + +define float @ds_fmax_f32_vv(float addrspace(3)* %ptr, float %val) { +; GFX8-LABEL: ds_fmax_f32_vv: +; GFX8: ; %bb.0: +; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-NEXT: s_mov_b32 m0, -1 +; GFX8-NEXT: ds_max_rtn_f32 v0, v0, v1 +; GFX8-NEXT: s_waitcnt lgkmcnt(0) +; GFX8-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-LABEL: ds_fmax_f32_vv: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: ds_max_rtn_f32 v0, v0, v1 +; GFX9-NEXT: s_waitcnt lgkmcnt(0) +; GFX9-NEXT: s_setpc_b64 s[30:31] + ; GFX8-MIR-LABEL: name: ds_fmax_f32_vv + ; GFX8-MIR: bb.1 (%ir-block.0): + ; GFX8-MIR: liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31 + ; GFX8-MIR: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX8-MIR: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GFX8-MIR: [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; GFX8-MIR: $m0 = S_MOV_B32 -1 + ; GFX8-MIR: [[DS_MAX_RTN_F32_:%[0-9]+]]:vgpr_32 = DS_MAX_RTN_F32 [[COPY]], [[COPY1]], 0, 0, implicit $m0, implicit $exec :: (load store 4 on %ir.ptr, addrspace 3) + ; GFX8-MIR: $vgpr0 = COPY [[DS_MAX_RTN_F32_]] + ; GFX8-MIR: [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY2]] + ; GFX8-MIR: S_SETPC_B64_return [[COPY3]], implicit $vgpr0 + ; GFX9-MIR-LABEL: name: ds_fmax_f32_vv + ; GFX9-MIR: bb.1 (%ir-block.0): + ; GFX9-MIR: liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31 + ; GFX9-MIR: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX9-MIR: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GFX9-MIR: [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; GFX9-MIR: [[DS_MAX_RTN_F32_gfx9_:%[0-9]+]]:vgpr_32 = DS_MAX_RTN_F32_gfx9 [[COPY]], [[COPY1]], 0, 0, implicit $exec :: (load store 4 on %ir.ptr, addrspace 3) + ; GFX9-MIR: $vgpr0 = COPY [[DS_MAX_RTN_F32_gfx9_]] + ; GFX9-MIR: [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY2]] + ; GFX9-MIR: S_SETPC_B64_return [[COPY3]], implicit $vgpr0 + %ret = call float @llvm.amdgcn.ds.fmax(float addrspace(3)* %ptr, float %val, i32 0, i32 0, i1 false) + ret float %ret +} + +define float @ds_fmax_f32_vv_offset(float addrspace(3)* %ptr, float %val) { +; GFX8-LABEL: ds_fmax_f32_vv_offset: +; GFX8: ; %bb.0: +; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-NEXT: s_mov_b32 m0, -1 +; GFX8-NEXT: ds_max_rtn_f32 v0, v0, v1 offset:512 +; GFX8-NEXT: s_waitcnt lgkmcnt(0) +; GFX8-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-LABEL: ds_fmax_f32_vv_offset: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: ds_max_rtn_f32 v0, v0, v1 offset:512 +; GFX9-NEXT: s_waitcnt lgkmcnt(0) +; GFX9-NEXT: s_setpc_b64 s[30:31] + ; GFX8-MIR-LABEL: name: ds_fmax_f32_vv_offset + ; GFX8-MIR: bb.1 (%ir-block.0): + ; GFX8-MIR: liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31 + ; GFX8-MIR: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX8-MIR: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GFX8-MIR: [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; GFX8-MIR: $m0 = S_MOV_B32 -1 + ; GFX8-MIR: [[DS_MAX_RTN_F32_:%[0-9]+]]:vgpr_32 = DS_MAX_RTN_F32 [[COPY]], [[COPY1]], 512, 0, implicit $m0, implicit $exec :: (load store 4 on %ir.gep, addrspace 3) + ; GFX8-MIR: $vgpr0 = COPY [[DS_MAX_RTN_F32_]] + ; GFX8-MIR: [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY2]] + ; GFX8-MIR: S_SETPC_B64_return [[COPY3]], implicit $vgpr0 + ; GFX9-MIR-LABEL: name: ds_fmax_f32_vv_offset + ; GFX9-MIR: bb.1 (%ir-block.0): + ; GFX9-MIR: liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31 + ; GFX9-MIR: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX9-MIR: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GFX9-MIR: [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; GFX9-MIR: [[DS_MAX_RTN_F32_gfx9_:%[0-9]+]]:vgpr_32 = DS_MAX_RTN_F32_gfx9 [[COPY]], [[COPY1]], 512, 0, implicit $exec :: (load store 4 on %ir.gep, addrspace 3) + ; GFX9-MIR: $vgpr0 = COPY [[DS_MAX_RTN_F32_gfx9_]] + ; GFX9-MIR: [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY2]] + ; GFX9-MIR: S_SETPC_B64_return [[COPY3]], implicit $vgpr0 + %gep = getelementptr float, float addrspace(3)* %ptr, i32 128 + %ret = call float @llvm.amdgcn.ds.fmax(float addrspace(3)* %gep, float %val, i32 0, i32 0, i1 false) + ret float %ret +} + +define void @ds_fmax_f32_vv_nortn(float addrspace(3)* %ptr, float %val) { +; GFX8-LABEL: ds_fmax_f32_vv_nortn: +; GFX8: ; %bb.0: +; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-NEXT: s_mov_b32 m0, -1 +; GFX8-NEXT: ds_max_rtn_f32 v0, v0, v1 +; GFX8-NEXT: s_waitcnt lgkmcnt(0) +; GFX8-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-LABEL: ds_fmax_f32_vv_nortn: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: ds_max_rtn_f32 v0, v0, v1 +; GFX9-NEXT: s_waitcnt lgkmcnt(0) +; GFX9-NEXT: s_setpc_b64 s[30:31] + ; GFX8-MIR-LABEL: name: ds_fmax_f32_vv_nortn + ; GFX8-MIR: bb.1 (%ir-block.0): + ; GFX8-MIR: liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31 + ; GFX8-MIR: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX8-MIR: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GFX8-MIR: [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; GFX8-MIR: $m0 = S_MOV_B32 -1 + ; GFX8-MIR: [[DS_MAX_RTN_F32_:%[0-9]+]]:vgpr_32 = DS_MAX_RTN_F32 [[COPY]], [[COPY1]], 0, 0, implicit $m0, implicit $exec :: (load store 4 on %ir.ptr, addrspace 3) + ; GFX8-MIR: [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY2]] + ; GFX8-MIR: S_SETPC_B64_return [[COPY3]] + ; GFX9-MIR-LABEL: name: ds_fmax_f32_vv_nortn + ; GFX9-MIR: bb.1 (%ir-block.0): + ; GFX9-MIR: liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31 + ; GFX9-MIR: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX9-MIR: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GFX9-MIR: [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; GFX9-MIR: [[DS_MAX_RTN_F32_gfx9_:%[0-9]+]]:vgpr_32 = DS_MAX_RTN_F32_gfx9 [[COPY]], [[COPY1]], 0, 0, implicit $exec :: (load store 4 on %ir.ptr, addrspace 3) + ; GFX9-MIR: [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY2]] + ; GFX9-MIR: S_SETPC_B64_return [[COPY3]] + %ret = call float @llvm.amdgcn.ds.fmax(float addrspace(3)* %ptr, float %val, i32 0, i32 0, i1 false) + ret void +} + +define void @ds_fmax_f32_vv_offset_nortn(float addrspace(3)* %ptr, float %val) { +; GFX8-LABEL: ds_fmax_f32_vv_offset_nortn: +; GFX8: ; %bb.0: +; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-NEXT: s_mov_b32 m0, -1 +; GFX8-NEXT: ds_max_rtn_f32 v0, v0, v1 offset:512 +; GFX8-NEXT: s_waitcnt lgkmcnt(0) +; GFX8-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-LABEL: ds_fmax_f32_vv_offset_nortn: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: ds_max_rtn_f32 v0, v0, v1 offset:512 +; GFX9-NEXT: s_waitcnt lgkmcnt(0) +; GFX9-NEXT: s_setpc_b64 s[30:31] + ; GFX8-MIR-LABEL: name: ds_fmax_f32_vv_offset_nortn + ; GFX8-MIR: bb.1 (%ir-block.0): + ; GFX8-MIR: liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31 + ; GFX8-MIR: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX8-MIR: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GFX8-MIR: [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; GFX8-MIR: $m0 = S_MOV_B32 -1 + ; GFX8-MIR: [[DS_MAX_RTN_F32_:%[0-9]+]]:vgpr_32 = DS_MAX_RTN_F32 [[COPY]], [[COPY1]], 512, 0, implicit $m0, implicit $exec :: (load store 4 on %ir.gep, addrspace 3) + ; GFX8-MIR: [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY2]] + ; GFX8-MIR: S_SETPC_B64_return [[COPY3]] + ; GFX9-MIR-LABEL: name: ds_fmax_f32_vv_offset_nortn + ; GFX9-MIR: bb.1 (%ir-block.0): + ; GFX9-MIR: liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31 + ; GFX9-MIR: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX9-MIR: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GFX9-MIR: [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; GFX9-MIR: [[DS_MAX_RTN_F32_gfx9_:%[0-9]+]]:vgpr_32 = DS_MAX_RTN_F32_gfx9 [[COPY]], [[COPY1]], 512, 0, implicit $exec :: (load store 4 on %ir.gep, addrspace 3) + ; GFX9-MIR: [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY2]] + ; GFX9-MIR: S_SETPC_B64_return [[COPY3]] + %gep = getelementptr float, float addrspace(3)* %ptr, i32 128 + %ret = call float @llvm.amdgcn.ds.fmax(float addrspace(3)* %gep, float %val, i32 0, i32 0, i1 false) + ret void +} + +define float @ds_fmax_f32_vv_volatile(float addrspace(3)* %ptr, float %val) { +; GFX8-LABEL: ds_fmax_f32_vv_volatile: +; GFX8: ; %bb.0: +; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-NEXT: s_mov_b32 m0, -1 +; GFX8-NEXT: ds_max_rtn_f32 v0, v0, v1 +; GFX8-NEXT: s_waitcnt lgkmcnt(0) +; GFX8-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-LABEL: ds_fmax_f32_vv_volatile: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: ds_max_rtn_f32 v0, v0, v1 +; GFX9-NEXT: s_waitcnt lgkmcnt(0) +; GFX9-NEXT: s_setpc_b64 s[30:31] + ; GFX8-MIR-LABEL: name: ds_fmax_f32_vv_volatile + ; GFX8-MIR: bb.1 (%ir-block.0): + ; GFX8-MIR: liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31 + ; GFX8-MIR: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX8-MIR: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GFX8-MIR: [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; GFX8-MIR: $m0 = S_MOV_B32 -1 + ; GFX8-MIR: [[DS_MAX_RTN_F32_:%[0-9]+]]:vgpr_32 = DS_MAX_RTN_F32 [[COPY]], [[COPY1]], 0, 0, implicit $m0, implicit $exec :: (volatile load store 4 on %ir.ptr, addrspace 3) + ; GFX8-MIR: $vgpr0 = COPY [[DS_MAX_RTN_F32_]] + ; GFX8-MIR: [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY2]] + ; GFX8-MIR: S_SETPC_B64_return [[COPY3]], implicit $vgpr0 + ; GFX9-MIR-LABEL: name: ds_fmax_f32_vv_volatile + ; GFX9-MIR: bb.1 (%ir-block.0): + ; GFX9-MIR: liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31 + ; GFX9-MIR: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX9-MIR: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GFX9-MIR: [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; GFX9-MIR: [[DS_MAX_RTN_F32_gfx9_:%[0-9]+]]:vgpr_32 = DS_MAX_RTN_F32_gfx9 [[COPY]], [[COPY1]], 0, 0, implicit $exec :: (volatile load store 4 on %ir.ptr, addrspace 3) + ; GFX9-MIR: $vgpr0 = COPY [[DS_MAX_RTN_F32_gfx9_]] + ; GFX9-MIR: [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY2]] + ; GFX9-MIR: S_SETPC_B64_return [[COPY3]], implicit $vgpr0 + %ret = call float @llvm.amdgcn.ds.fmax(float addrspace(3)* %ptr, float %val, i32 0, i32 0, i1 true) + ret float %ret +} + +declare float @llvm.amdgcn.ds.fmax(float addrspace(3)* nocapture, float, i32 immarg, i32 immarg, i1 immarg) #0 + +attributes #0 = { argmemonly nounwind willreturn } Index: llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.ds.fmin.ll =================================================================== --- /dev/null +++ llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.ds.fmin.ll @@ -0,0 +1,190 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=tonga -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GFX8 %s +; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GFX9 %s + +define amdgpu_ps float @ds_fmin_f32_ss(float addrspace(3)* inreg %ptr, float inreg %val) { +; GFX8-LABEL: ds_fmin_f32_ss: +; GFX8: ; %bb.0: +; GFX8-NEXT: v_mov_b32_e32 v0, s2 +; GFX8-NEXT: v_mov_b32_e32 v1, s3 +; GFX8-NEXT: s_mov_b32 m0, -1 +; GFX8-NEXT: ds_min_rtn_f32 v0, v0, v1 +; GFX8-NEXT: s_waitcnt lgkmcnt(0) +; GFX8-NEXT: ; return to shader part epilog +; +; GFX9-LABEL: ds_fmin_f32_ss: +; GFX9: ; %bb.0: +; GFX9-NEXT: v_mov_b32_e32 v0, s2 +; GFX9-NEXT: v_mov_b32_e32 v1, s3 +; GFX9-NEXT: ds_min_rtn_f32 v0, v0, v1 +; GFX9-NEXT: s_waitcnt lgkmcnt(0) +; GFX9-NEXT: ; return to shader part epilog + %ret = call float @llvm.amdgcn.ds.fmin(float addrspace(3)* %ptr, float %val, i32 0, i32 0, i1 false) + ret float %ret +} + +define amdgpu_ps float @ds_fmin_f32_ss_offset(float addrspace(3)* inreg %ptr, float inreg %val) { +; GFX8-LABEL: ds_fmin_f32_ss_offset: +; GFX8: ; %bb.0: +; GFX8-NEXT: s_add_u32 s0, s2, 0x200 +; GFX8-NEXT: v_mov_b32_e32 v0, s0 +; GFX8-NEXT: v_mov_b32_e32 v1, s3 +; GFX8-NEXT: s_mov_b32 m0, -1 +; GFX8-NEXT: ds_min_rtn_f32 v0, v0, v1 +; GFX8-NEXT: s_waitcnt lgkmcnt(0) +; GFX8-NEXT: ; return to shader part epilog +; +; GFX9-LABEL: ds_fmin_f32_ss_offset: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_add_u32 s0, s2, 0x200 +; GFX9-NEXT: v_mov_b32_e32 v0, s0 +; GFX9-NEXT: v_mov_b32_e32 v1, s3 +; GFX9-NEXT: ds_min_rtn_f32 v0, v0, v1 +; GFX9-NEXT: s_waitcnt lgkmcnt(0) +; GFX9-NEXT: ; return to shader part epilog + %gep = getelementptr float, float addrspace(3)* %ptr, i32 128 + %ret = call float @llvm.amdgcn.ds.fmin(float addrspace(3)* %gep, float %val, i32 0, i32 0, i1 false) + ret float %ret +} + +define amdgpu_ps void @ds_fmin_f32_ss_nortn(float addrspace(3)* inreg %ptr, float inreg %val) { +; GFX8-LABEL: ds_fmin_f32_ss_nortn: +; GFX8: ; %bb.0: +; GFX8-NEXT: v_mov_b32_e32 v0, s2 +; GFX8-NEXT: v_mov_b32_e32 v1, s3 +; GFX8-NEXT: s_mov_b32 m0, -1 +; GFX8-NEXT: ds_min_rtn_f32 v0, v0, v1 +; GFX8-NEXT: s_endpgm +; +; GFX9-LABEL: ds_fmin_f32_ss_nortn: +; GFX9: ; %bb.0: +; GFX9-NEXT: v_mov_b32_e32 v0, s2 +; GFX9-NEXT: v_mov_b32_e32 v1, s3 +; GFX9-NEXT: ds_min_rtn_f32 v0, v0, v1 +; GFX9-NEXT: s_endpgm + %unused = call float @llvm.amdgcn.ds.fmin(float addrspace(3)* %ptr, float %val, i32 0, i32 0, i1 false) + ret void +} + +define amdgpu_ps void @ds_fmin_f32_ss_offset_nortn(float addrspace(3)* inreg %ptr, float inreg %val) { +; GFX8-LABEL: ds_fmin_f32_ss_offset_nortn: +; GFX8: ; %bb.0: +; GFX8-NEXT: s_add_u32 s0, s2, 0x200 +; GFX8-NEXT: v_mov_b32_e32 v0, s0 +; GFX8-NEXT: v_mov_b32_e32 v1, s3 +; GFX8-NEXT: s_mov_b32 m0, -1 +; GFX8-NEXT: ds_min_rtn_f32 v0, v0, v1 +; GFX8-NEXT: s_endpgm +; +; GFX9-LABEL: ds_fmin_f32_ss_offset_nortn: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_add_u32 s0, s2, 0x200 +; GFX9-NEXT: v_mov_b32_e32 v0, s0 +; GFX9-NEXT: v_mov_b32_e32 v1, s3 +; GFX9-NEXT: ds_min_rtn_f32 v0, v0, v1 +; GFX9-NEXT: s_endpgm + %gep = getelementptr float, float addrspace(3)* %ptr, i32 128 + %unused = call float @llvm.amdgcn.ds.fmin(float addrspace(3)* %gep, float %val, i32 0, i32 0, i1 false) + ret void +} + +define float @ds_fmin_f32_vv(float addrspace(3)* %ptr, float %val) { +; GFX8-LABEL: ds_fmin_f32_vv: +; GFX8: ; %bb.0: +; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-NEXT: s_mov_b32 m0, -1 +; GFX8-NEXT: ds_min_rtn_f32 v0, v0, v1 +; GFX8-NEXT: s_waitcnt lgkmcnt(0) +; GFX8-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-LABEL: ds_fmin_f32_vv: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: ds_min_rtn_f32 v0, v0, v1 +; GFX9-NEXT: s_waitcnt lgkmcnt(0) +; GFX9-NEXT: s_setpc_b64 s[30:31] + %ret = call float @llvm.amdgcn.ds.fmin(float addrspace(3)* %ptr, float %val, i32 0, i32 0, i1 false) + ret float %ret +} + +define float @ds_fmin_f32_vv_offset(float addrspace(3)* %ptr, float %val) { +; GFX8-LABEL: ds_fmin_f32_vv_offset: +; GFX8: ; %bb.0: +; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-NEXT: s_mov_b32 m0, -1 +; GFX8-NEXT: ds_min_rtn_f32 v0, v0, v1 offset:512 +; GFX8-NEXT: s_waitcnt lgkmcnt(0) +; GFX8-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-LABEL: ds_fmin_f32_vv_offset: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: ds_min_rtn_f32 v0, v0, v1 offset:512 +; GFX9-NEXT: s_waitcnt lgkmcnt(0) +; GFX9-NEXT: s_setpc_b64 s[30:31] + %gep = getelementptr float, float addrspace(3)* %ptr, i32 128 + %ret = call float @llvm.amdgcn.ds.fmin(float addrspace(3)* %gep, float %val, i32 0, i32 0, i1 false) + ret float %ret +} + +define void @ds_fmin_f32_vv_nortn(float addrspace(3)* %ptr, float %val) { +; GFX8-LABEL: ds_fmin_f32_vv_nortn: +; GFX8: ; %bb.0: +; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-NEXT: s_mov_b32 m0, -1 +; GFX8-NEXT: ds_min_rtn_f32 v0, v0, v1 +; GFX8-NEXT: s_waitcnt lgkmcnt(0) +; GFX8-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-LABEL: ds_fmin_f32_vv_nortn: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: ds_min_rtn_f32 v0, v0, v1 +; GFX9-NEXT: s_waitcnt lgkmcnt(0) +; GFX9-NEXT: s_setpc_b64 s[30:31] + %ret = call float @llvm.amdgcn.ds.fmin(float addrspace(3)* %ptr, float %val, i32 0, i32 0, i1 false) + ret void +} + +define void @ds_fmin_f32_vv_offset_nortn(float addrspace(3)* %ptr, float %val) { +; GFX8-LABEL: ds_fmin_f32_vv_offset_nortn: +; GFX8: ; %bb.0: +; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-NEXT: s_mov_b32 m0, -1 +; GFX8-NEXT: ds_min_rtn_f32 v0, v0, v1 offset:512 +; GFX8-NEXT: s_waitcnt lgkmcnt(0) +; GFX8-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-LABEL: ds_fmin_f32_vv_offset_nortn: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: ds_min_rtn_f32 v0, v0, v1 offset:512 +; GFX9-NEXT: s_waitcnt lgkmcnt(0) +; GFX9-NEXT: s_setpc_b64 s[30:31] + %gep = getelementptr float, float addrspace(3)* %ptr, i32 128 + %ret = call float @llvm.amdgcn.ds.fmin(float addrspace(3)* %gep, float %val, i32 0, i32 0, i1 false) + ret void +} + +define float @ds_fmin_f32_vv_volatile(float addrspace(3)* %ptr, float %val) { +; GFX8-LABEL: ds_fmin_f32_vv_volatile: +; GFX8: ; %bb.0: +; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-NEXT: s_mov_b32 m0, -1 +; GFX8-NEXT: ds_min_rtn_f32 v0, v0, v1 +; GFX8-NEXT: s_waitcnt lgkmcnt(0) +; GFX8-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-LABEL: ds_fmin_f32_vv_volatile: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: ds_min_rtn_f32 v0, v0, v1 +; GFX9-NEXT: s_waitcnt lgkmcnt(0) +; GFX9-NEXT: s_setpc_b64 s[30:31] + %ret = call float @llvm.amdgcn.ds.fmin(float addrspace(3)* %ptr, float %val, i32 0, i32 0, i1 true) + ret float %ret +} + +declare float @llvm.amdgcn.ds.fmin(float addrspace(3)* nocapture, float, i32 immarg, i32 immarg, i1 immarg) #0 + +attributes #0 = { argmemonly nounwind willreturn } Index: llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.ds.fmax.mir =================================================================== --- llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.ds.fmax.mir +++ /dev/null @@ -1,83 +0,0 @@ -# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py -# RUN: llc -march=amdgcn -mcpu=fiji -run-pass=regbankselect -regbankselect-fast -verify-machineinstrs %s -o - | FileCheck %s -# RUN: llc -march=amdgcn -mcpu=fiji -run-pass=regbankselect -regbankselect-greedy -verify-machineinstrs %s -o - | FileCheck %s - ---- -name: ds_fmax_ss -legalized: true -tracksRegLiveness: true -body: | - bb.0: - liveins: $sgpr0, $sgpr1 - - ; CHECK-LABEL: name: ds_fmax_ss - ; CHECK: liveins: $sgpr0, $sgpr1 - ; CHECK: [[COPY:%[0-9]+]]:sgpr(p3) = COPY $sgpr0 - ; CHECK: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 - ; CHECK: [[COPY2:%[0-9]+]]:vgpr(p3) = COPY [[COPY]](p3) - ; CHECK: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; CHECK: [[INT:%[0-9]+]]:vgpr(s32) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.ds.fmax), [[COPY2]](p3), [[COPY3]](s32), 0, 0, 0 - %0:_(p3) = COPY $sgpr0 - %1:_(s32) = COPY $sgpr1 - %2:_(s32) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.ds.fmax), %0, %1, 0, 0, 0 - -... - ---- -name: ds_fmax_sv -legalized: true -tracksRegLiveness: true -body: | - bb.0: - liveins: $sgpr0, $vgpr0 - - ; CHECK-LABEL: name: ds_fmax_sv - ; CHECK: liveins: $sgpr0, $vgpr0 - ; CHECK: [[COPY:%[0-9]+]]:sgpr(p3) = COPY $sgpr0 - ; CHECK: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK: [[COPY2:%[0-9]+]]:vgpr(p3) = COPY [[COPY]](p3) - ; CHECK: [[INT:%[0-9]+]]:vgpr(s32) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.ds.fmax), [[COPY2]](p3), [[COPY1]](s32), 0, 0, 0 - %0:_(p3) = COPY $sgpr0 - %1:_(s32) = COPY $vgpr0 - %2:_(s32) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.ds.fmax), %0, %1, 0, 0, 0 - -... - ---- -name: ds_fmax_vs -legalized: true -tracksRegLiveness: true -body: | - bb.0: - liveins: $vgpr0, $sgpr0 - - ; CHECK-LABEL: name: ds_fmax_vs - ; CHECK: liveins: $vgpr0, $sgpr0 - ; CHECK: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0 - ; CHECK: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; CHECK: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; CHECK: [[INT:%[0-9]+]]:vgpr(s32) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.ds.fmax), [[COPY]](p3), [[COPY2]](s32), 0, 0, 0 - %0:_(p3) = COPY $vgpr0 - %1:_(s32) = COPY $sgpr0 - %2:_(s32) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.ds.fmax), %0, %1, 0, 0, 0 - -... - ---- -name: ds_fmax_vv -legalized: true -tracksRegLiveness: true -body: | - bb.0: - liveins: $vgpr0, $vgpr1 - - ; CHECK-LABEL: name: ds_fmax_vv - ; CHECK: liveins: $vgpr0, $vgpr1 - ; CHECK: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0 - ; CHECK: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 - ; CHECK: [[INT:%[0-9]+]]:vgpr(s32) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.ds.fmax), [[COPY]](p3), [[COPY1]](s32), 0, 0, 0 - %0:_(p3) = COPY $vgpr0 - %1:_(s32) = COPY $vgpr1 - %2:_(s32) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.ds.fmax), %0, %1, 0, 0, 0 - -... Index: llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.ds.fmin.mir =================================================================== --- llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.ds.fmin.mir +++ /dev/null @@ -1,83 +0,0 @@ -# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py -# RUN: llc -march=amdgcn -mcpu=fiji -run-pass=regbankselect -regbankselect-fast -verify-machineinstrs %s -o - | FileCheck %s -# RUN: llc -march=amdgcn -mcpu=fiji -run-pass=regbankselect -regbankselect-greedy -verify-machineinstrs %s -o - | FileCheck %s - ---- -name: ds_fmin_ss -legalized: true -tracksRegLiveness: true -body: | - bb.0: - liveins: $sgpr0, $sgpr1 - - ; CHECK-LABEL: name: ds_fmin_ss - ; CHECK: liveins: $sgpr0, $sgpr1 - ; CHECK: [[COPY:%[0-9]+]]:sgpr(p3) = COPY $sgpr0 - ; CHECK: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 - ; CHECK: [[COPY2:%[0-9]+]]:vgpr(p3) = COPY [[COPY]](p3) - ; CHECK: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; CHECK: [[INT:%[0-9]+]]:vgpr(s32) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.ds.fmin), [[COPY2]](p3), [[COPY3]](s32), 0, 0, 0 - %0:_(p3) = COPY $sgpr0 - %1:_(s32) = COPY $sgpr1 - %2:_(s32) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.ds.fmin), %0, %1, 0, 0, 0 - -... - ---- -name: ds_fmin_sv -legalized: true -tracksRegLiveness: true -body: | - bb.0: - liveins: $sgpr0, $vgpr0 - - ; CHECK-LABEL: name: ds_fmin_sv - ; CHECK: liveins: $sgpr0, $vgpr0 - ; CHECK: [[COPY:%[0-9]+]]:sgpr(p3) = COPY $sgpr0 - ; CHECK: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK: [[COPY2:%[0-9]+]]:vgpr(p3) = COPY [[COPY]](p3) - ; CHECK: [[INT:%[0-9]+]]:vgpr(s32) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.ds.fmin), [[COPY2]](p3), [[COPY1]](s32), 0, 0, 0 - %0:_(p3) = COPY $sgpr0 - %1:_(s32) = COPY $vgpr0 - %2:_(s32) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.ds.fmin), %0, %1, 0, 0, 0 - -... - ---- -name: ds_fmin_vs -legalized: true -tracksRegLiveness: true -body: | - bb.0: - liveins: $vgpr0, $sgpr0 - - ; CHECK-LABEL: name: ds_fmin_vs - ; CHECK: liveins: $vgpr0, $sgpr0 - ; CHECK: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0 - ; CHECK: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; CHECK: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; CHECK: [[INT:%[0-9]+]]:vgpr(s32) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.ds.fmin), [[COPY]](p3), [[COPY2]](s32), 0, 0, 0 - %0:_(p3) = COPY $vgpr0 - %1:_(s32) = COPY $sgpr0 - %2:_(s32) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.ds.fmin), %0, %1, 0, 0, 0 - -... - ---- -name: ds_fmin_vv -legalized: true -tracksRegLiveness: true -body: | - bb.0: - liveins: $vgpr0, $vgpr1 - - ; CHECK-LABEL: name: ds_fmin_vv - ; CHECK: liveins: $vgpr0, $vgpr1 - ; CHECK: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0 - ; CHECK: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 - ; CHECK: [[INT:%[0-9]+]]:vgpr(s32) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.ds.fmin), [[COPY]](p3), [[COPY1]](s32), 0, 0, 0 - %0:_(p3) = COPY $vgpr0 - %1:_(s32) = COPY $vgpr1 - %2:_(s32) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.ds.fmin), %0, %1, 0, 0, 0 - -...