Diff 300311

llvm/lib/Target/AMDGPU/AMDGPUCombine.td

Show All 37 Lines	def cvt_f32_ubyteN : GICombineRule<
(apply [{ applyCvtF32UByteN(*${cvt_f32_ubyteN}, ${matchinfo}); }])>;		(apply [{ applyCvtF32UByteN(*${cvt_f32_ubyteN}, ${matchinfo}); }])>;

def minmax_to_med3_matchdata : GIDefMatchData<"MinMaxToMed3MatchInfo">;		def minmax_to_med3_matchdata : GIDefMatchData<"MinMaxToMed3MatchInfo">;
def minmax_to_med3 : GICombineRule<		def minmax_to_med3 : GICombineRule<
(defs root:$min_or_max, minmax_to_med3_matchdata:$matchinfo),		(defs root:$min_or_max, minmax_to_med3_matchdata:$matchinfo),
(match (wip_match_opcode G_SMAX,		(match (wip_match_opcode G_SMAX,
G_SMIN,		G_SMIN,
G_UMAX,		G_UMAX,
G_UMIN):$min_or_max,		G_UMIN,
		G_FMINNUM,
		G_FMAXNUM):$min_or_max,
[{ return matchMinMaxToMed3(*${min_or_max}, MRI, ${matchinfo}); }]),		[{ return matchMinMaxToMed3(*${min_or_max}, MRI, ${matchinfo}); }]),
(apply [{ applyMinMaxToMed3(*${min_or_max}, ${matchinfo}); }])>;		(apply [{ applyMinMaxToMed3(*${min_or_max}, ${matchinfo}); }])>;

// Combines which should only apply on SI/VI		// Combines which should only apply on SI/VI
def gfx6gfx7_combines : GICombineGroup<[fcmp_select_to_fmin_fmax_legacy]>;		def gfx6gfx7_combines : GICombineGroup<[fcmp_select_to_fmin_fmax_legacy]>;


def AMDGPUPreLegalizerCombinerHelper: GICombinerHelper<		def AMDGPUPreLegalizerCombinerHelper: GICombinerHelper<
Show All 15 Lines

llvm/lib/Target/AMDGPU/AMDGPUGISel.td

	Show First 20 Lines • Show All 206 Lines • ▼ Show 20 Lines
	def : GINodeEquiv<G_AMDGPU_BUFFER_ATOMIC_XOR, SIbuffer_atomic_xor>;			def : GINodeEquiv<G_AMDGPU_BUFFER_ATOMIC_XOR, SIbuffer_atomic_xor>;
	def : GINodeEquiv<G_AMDGPU_BUFFER_ATOMIC_INC, SIbuffer_atomic_inc>;			def : GINodeEquiv<G_AMDGPU_BUFFER_ATOMIC_INC, SIbuffer_atomic_inc>;
	def : GINodeEquiv<G_AMDGPU_BUFFER_ATOMIC_DEC, SIbuffer_atomic_dec>;			def : GINodeEquiv<G_AMDGPU_BUFFER_ATOMIC_DEC, SIbuffer_atomic_dec>;
	def : GINodeEquiv<G_AMDGPU_BUFFER_ATOMIC_FADD, SIbuffer_atomic_fadd>;			def : GINodeEquiv<G_AMDGPU_BUFFER_ATOMIC_FADD, SIbuffer_atomic_fadd>;
	def : GINodeEquiv<G_AMDGPU_BUFFER_ATOMIC_CMPSWAP, SIbuffer_atomic_cmpswap>;			def : GINodeEquiv<G_AMDGPU_BUFFER_ATOMIC_CMPSWAP, SIbuffer_atomic_cmpswap>;
	def : GINodeEquiv<G_AMDGPU_S_BUFFER_LOAD, SIsbuffer_load>;			def : GINodeEquiv<G_AMDGPU_S_BUFFER_LOAD, SIsbuffer_load>;
	def : GINodeEquiv<G_AMDGPU_SMED3, AMDGPUsmed3>;			def : GINodeEquiv<G_AMDGPU_SMED3, AMDGPUsmed3>;
	def : GINodeEquiv<G_AMDGPU_UMED3, AMDGPUumed3>;			def : GINodeEquiv<G_AMDGPU_UMED3, AMDGPUumed3>;
				def : GINodeEquiv<G_AMDGPU_FMED3, AMDGPUfmed3_impl>;

	class GISelSop2Pat <			class GISelSop2Pat <
	SDPatternOperator node,			SDPatternOperator node,
	Instruction inst,			Instruction inst,
	ValueType dst_vt,			ValueType dst_vt,
	ValueType src0_vt = dst_vt, ValueType src1_vt = src0_vt> : GCNPat <			ValueType src0_vt = dst_vt, ValueType src1_vt = src0_vt> : GCNPat <

	(dst_vt (node (src0_vt SReg_32:$src0), (src1_vt SReg_32:$src1))),			(dst_vt (node (src0_vt SReg_32:$src0), (src1_vt SReg_32:$src1))),
	▲ Show 20 Lines • Show All 111 Lines • Show Last 20 Lines

llvm/lib/Target/AMDGPU/AMDGPUPostLegalizerCombiner.cpp

Show All 17 Lines
#include "llvm/CodeGen/GlobalISel/CombinerInfo.h"		#include "llvm/CodeGen/GlobalISel/CombinerInfo.h"
#include "llvm/CodeGen/GlobalISel/GISelKnownBits.h"		#include "llvm/CodeGen/GlobalISel/GISelKnownBits.h"
#include "llvm/CodeGen/GlobalISel/MIPatternMatch.h"		#include "llvm/CodeGen/GlobalISel/MIPatternMatch.h"
#include "llvm/CodeGen/MachineDominators.h"		#include "llvm/CodeGen/MachineDominators.h"
#include "llvm/CodeGen/MachineFunctionPass.h"		#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/TargetPassConfig.h"		#include "llvm/CodeGen/TargetPassConfig.h"
#include "llvm/Support/Debug.h"		#include "llvm/Support/Debug.h"
#include "MCTargetDesc/AMDGPUMCTargetDesc.h"		#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
		#include "SIMachineFunctionInfo.h"

#define DEBUG_TYPE "amdgpu-postlegalizer-combiner"		#define DEBUG_TYPE "amdgpu-postlegalizer-combiner"

using namespace llvm;		using namespace llvm;
using namespace MIPatternMatch;		using namespace MIPatternMatch;

namespace {		namespace {

struct MinMaxMedOpc {		struct MinMaxMedOpc {
unsigned Min, Max, Med;		unsigned Min, Max, Med;
};		};

using MIPtr = const MachineInstr *;		using MIPtr = const MachineInstr *;
/// Returns true and stores \p MI in \p Cst if it represents constant.		/// Returns true and stores \p MI in \p Cst if it represents constant.
bool isConst(MIPtr MI, MachineRegisterInfo &MRI, MIPtr &Cst) {		bool isConst(MIPtr MI, MachineRegisterInfo &MRI, MIPtr &Cst) {
unsigned Opc = MI->getOpcode();		unsigned Opc = MI->getOpcode();
if (Opc == AMDGPU::G_CONSTANT) {		if (Opc == AMDGPU::G_CONSTANT \|\| Opc == AMDGPU::G_FCONSTANT) {
Cst = MI;		Cst = MI;
return true;		return true;
}		}
// TODO: Check for fp vector splat constants(consider ignoring undef in splat		// TODO: Check for fp vector splat constants(consider ignoring undef in splat
// check). Used for clamp in f16 packed instructions.		// check). Used for clamp in f16 packed instructions.
return false;		return false;
}		}

▲ Show 20 Lines • Show All 42 Lines • ▼ Show 20 Lines	MinMaxMedOpc getMinMaxPair(unsigned Opc) {
default:		default:
return {0, 0, 0};		return {0, 0, 0};
case AMDGPU::G_SMAX:		case AMDGPU::G_SMAX:
case AMDGPU::G_SMIN:		case AMDGPU::G_SMIN:
return {AMDGPU::G_SMIN, AMDGPU::G_SMAX, AMDGPU::G_AMDGPU_SMED3};		return {AMDGPU::G_SMIN, AMDGPU::G_SMAX, AMDGPU::G_AMDGPU_SMED3};
case AMDGPU::G_UMAX:		case AMDGPU::G_UMAX:
case AMDGPU::G_UMIN:		case AMDGPU::G_UMIN:
return {AMDGPU::G_UMIN, AMDGPU::G_UMAX, AMDGPU::G_AMDGPU_UMED3};		return {AMDGPU::G_UMIN, AMDGPU::G_UMAX, AMDGPU::G_AMDGPU_UMED3};
		case AMDGPU::G_FMINNUM:
		case AMDGPU::G_FMAXNUM:
		return {AMDGPU::G_FMINNUM, AMDGPU::G_FMAXNUM, AMDGPU::G_AMDGPU_FMED3};
}		}
}		}

bool matchMed(MachineInstr &MI, MachineRegisterInfo &MRI, MinMaxMedOpc MMMOpc,		bool matchMed(MachineInstr &MI, MachineRegisterInfo &MRI, MinMaxMedOpc MMMOpc,
MIPtr &Val, MIPtr &InnerInst, MIPtr &K0, MIPtr &K1) {		MIPtr &Val, MIPtr &InnerInst, MIPtr &K0, MIPtr &K1) {
// 4 operand commutes of: min(max(Val, K0), K1)		// 4 operand commutes of: min(max(Val, K0), K1)
if (MI.getOpcode() == MMMOpc.Min)		if (MI.getOpcode() == MMMOpc.Min)
// Find K1 from outer instruction: min(max(...), K1) or min(K1, max(...))		// Find K1 from outer instruction: min(max(...), K1) or min(K1, max(...))
▲ Show 20 Lines • Show All 44 Lines • ▼ Show 20 Lines	if (OpcodeTriple.Med == AMDGPU::G_AMDGPU_SMED3 \|\|
if (OpcodeTriple.Med == AMDGPU::G_AMDGPU_SMED3 && KO_Imm.sge(K1_Imm))		if (OpcodeTriple.Med == AMDGPU::G_AMDGPU_SMED3 && KO_Imm.sge(K1_Imm))
return false;		return false;
if (OpcodeTriple.Med == AMDGPU::G_AMDGPU_UMED3 && KO_Imm.uge(K1_Imm))		if (OpcodeTriple.Med == AMDGPU::G_AMDGPU_UMED3 && KO_Imm.uge(K1_Imm))
return false;		return false;
MatchInfo = {OpcodeTriple.Med, ValDef, K0Def, K1Def};		MatchInfo = {OpcodeTriple.Med, ValDef, K0Def, K1Def};
return true;		return true;
}		}

		if (OpcodeTriple.Med == AMDGPU::G_AMDGPU_FMED3) {
		const APFloat &KO_FPImm = K0->getOperand(1).getFPImm()->getValue();
		const APFloat &K1_FPImm = K1->getOperand(1).getFPImm()->getValue();
		if (KO_FPImm >= K1_FPImm)
		return false;

		const MachineFunction *MF = MI.getMF();
		const SIMachineFunctionInfo *Info = MF->getInfo<SIMachineFunctionInfo>();
		foadUnsubmitted Not Done Reply Inline Actions This will probably warn that Info is unused in a Release build. foad: This will probably warn that Info is unused in a Release build.

		// TODO: Add G_FMINNUM_IEEE (requires some additional checks for possible
		// SNaN input).
		arsenmUnsubmitted Not Done Reply Inline Actions Might as well handle this now arsenm: Might as well handle this now
		Petar.AvramovicAuthorUnsubmitted Done Reply Inline Actions I am not really sure I know correct way to do it. Also this combine heavily depends on legalizer and primarily completing remaining of SNaN checks in isKnownNeverSNaN. Petar.Avramovic: I am not really sure I know correct way to do it. Also this combine heavily depends on…
		if (OpcodeTriple.Min == AMDGPU::G_FMINNUM) {
		assert(!Info->getMode().IEEE && "G_FMINNUM was supposed to be lowered to "
		"G_FMINNUM_IEEE with IEEE=true");

		const SIInstrInfo *TII = MF->getSubtarget<GCNSubtarget>().getInstrInfo();
		APInt KObits = KO_FPImm.bitcastToAPInt();
		APInt K1bits = K1_FPImm.bitcastToAPInt();
		arsenmUnsubmitted Not Done Reply Inline Actions You can directly pass the APFloat to is the overload of isInlineConstant arsenm: You can directly pass the APFloat to is the overload of isInlineConstant
		if ((!MRI.hasOneNonDBGUse(K0Def) \|\| TII->isInlineConstant(KObits)) &&
		(!MRI.hasOneNonDBGUse(K1Def) \|\| TII->isInlineConstant(K1bits))) {
		MatchInfo = {OpcodeTriple.Med, ValDef, K0Def, K1Def};
		return true;
		}
		}
		}

return false;		return false;
}		}

static void applyMinMaxToMed3(MachineInstr &MI,		static void applyMinMaxToMed3(MachineInstr &MI,
MinMaxToMed3MatchInfo &MatchInfo) {		MinMaxToMed3MatchInfo &MatchInfo) {
MachineIRBuilder B(MI);		MachineIRBuilder B(MI);
B.buildInstr(MatchInfo.Opc, {MI.getOperand(0)},		B.buildInstr(MatchInfo.Opc, {MI.getOperand(0)},
{MatchInfo.Val0, MatchInfo.Val1, MatchInfo.Val2});		{MatchInfo.Val0, MatchInfo.Val1, MatchInfo.Val2});
▲ Show 20 Lines • Show All 331 Lines • Show Last 20 Lines

llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp

Show First 20 Lines • Show All 3,636 Lines • ▼ Show 20 Lines	AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
case AMDGPU::G_AMDGPU_FMAX_LEGACY:		case AMDGPU::G_AMDGPU_FMAX_LEGACY:
case AMDGPU::G_AMDGPU_RCP_IFLAG:		case AMDGPU::G_AMDGPU_RCP_IFLAG:
case AMDGPU::G_AMDGPU_CVT_F32_UBYTE0:		case AMDGPU::G_AMDGPU_CVT_F32_UBYTE0:
case AMDGPU::G_AMDGPU_CVT_F32_UBYTE1:		case AMDGPU::G_AMDGPU_CVT_F32_UBYTE1:
case AMDGPU::G_AMDGPU_CVT_F32_UBYTE2:		case AMDGPU::G_AMDGPU_CVT_F32_UBYTE2:
case AMDGPU::G_AMDGPU_CVT_F32_UBYTE3:		case AMDGPU::G_AMDGPU_CVT_F32_UBYTE3:
case AMDGPU::G_AMDGPU_SMED3:		case AMDGPU::G_AMDGPU_SMED3:
case AMDGPU::G_AMDGPU_UMED3:		case AMDGPU::G_AMDGPU_UMED3:
		case AMDGPU::G_AMDGPU_FMED3:
return getDefaultMappingVOP(MI);		return getDefaultMappingVOP(MI);
case AMDGPU::G_UMULH:		case AMDGPU::G_UMULH:
case AMDGPU::G_SMULH: {		case AMDGPU::G_SMULH: {
if (Subtarget.hasScalarMulHiInsts() && isSALUMapping(MI))		if (Subtarget.hasScalarMulHiInsts() && isSALUMapping(MI))
return getDefaultMappingSOP(MI);		return getDefaultMappingSOP(MI);
return getDefaultMappingVOP(MI);		return getDefaultMappingVOP(MI);
}		}
case AMDGPU::G_IMPLICIT_DEF: {		case AMDGPU::G_IMPLICIT_DEF: {
▲ Show 20 Lines • Show All 836 Lines • Show Last 20 Lines

llvm/lib/Target/AMDGPU/SIInstructions.td

Show First 20 Lines • Show All 2,687 Lines • ▼ Show 20 Lines	def G_AMDGPU_SMED3 : AMDGPUGenericInstruction {
let hasSideEffects = 0;		let hasSideEffects = 0;
}		}

def G_AMDGPU_UMED3 : AMDGPUGenericInstruction {		def G_AMDGPU_UMED3 : AMDGPUGenericInstruction {
let OutOperandList = (outs type0:$dst);		let OutOperandList = (outs type0:$dst);
let InOperandList = (ins type0:$src0, type0:$src1, type0:$src2);		let InOperandList = (ins type0:$src0, type0:$src1, type0:$src2);
let hasSideEffects = 0;		let hasSideEffects = 0;
}		}

		def G_AMDGPU_FMED3 : AMDGPUGenericInstruction {
		let OutOperandList = (outs type0:$dst);
		let InOperandList = (ins type0:$src0, type0:$src1, type0:$src2);
		let hasSideEffects = 0;
		}

llvm/test/CodeGen/AMDGPU/GlobalISel/fmed3.ll

Show First 20 Lines • Show All 637 Lines • ▼ Show 20 Lines	; GFX9-NEXT: s_endpgm
store volatile float %tmp0, float addrspace(1)* undef		store volatile float %tmp0, float addrspace(1)* undef
%tmp1 = call float @llvm.maxnum.f32(float %a, float %b)		%tmp1 = call float @llvm.maxnum.f32(float %a, float %b)
%tmp2 = call float @llvm.minnum.f32(float %tmp1, float %c)		%tmp2 = call float @llvm.minnum.f32(float %tmp1, float %c)
%med3 = call float @llvm.maxnum.f32(float %tmp0, float %tmp2)		%med3 = call float @llvm.maxnum.f32(float %tmp0, float %tmp2)
store float %med3, float addrspace(1)* %outgep		store float %med3, float addrspace(1)* %outgep
ret void		ret void
}		}

		define amdgpu_ps float @test_min_max_ValK0_K1_u32(float %a) {
		; GCN-LABEL: test_min_max_ValK0_K1_u32:
		; GCN: ; %bb.0:
		; GCN-NEXT: v_med3_f32 v0, v0, 2.0, 4.0
		; GCN-NEXT: ; return to shader part epilog
		%maxnum = call float @llvm.maxnum.f32(float %a, float 2.0)
		%fmed = call float @llvm.minnum.f32(float %maxnum, float 4.0)
		ret float %fmed
		}

		define amdgpu_ps float @test_non_inline_const(float %a) {
		; GCN-LABEL: test_non_inline_const:
		; GCN: ; %bb.0:
		; GCN-NEXT: v_max_f32_e32 v0, 2.0, v0
		; GCN-NEXT: v_min_f32_e32 v0, 0x41000000, v0
		; GCN-NEXT: ; return to shader part epilog
		%maxnum = call float @llvm.maxnum.f32(float %a, float 2.0)
		%fmed = call float @llvm.minnum.f32(float %maxnum, float 8.0)
		ret float %fmed
		}

		define amdgpu_ps float @min_max_ValK0_K1_float(float %a) {
		; GCN-LABEL: min_max_ValK0_K1_float:
		; GCN: ; %bb.0:
		; GCN-NEXT: v_med3_f32 v0, v0, 2.0, 4.0
		; GCN-NEXT: ; return to shader part epilog
		%maxnum = call float @llvm.maxnum.f32(float 2.0, float %a)
		%fmed = call float @llvm.minnum.f32(float %maxnum, float 4.0)
		ret float %fmed
		}

		define amdgpu_ps float @test_min_K1max_ValK0__u32(float %a) {
		; GCN-LABEL: test_min_K1max_ValK0__u32:
		; GCN: ; %bb.0:
		; GCN-NEXT: v_med3_f32 v0, v0, 2.0, 4.0
		; GCN-NEXT: ; return to shader part epilog
		%maxnum = call float @llvm.maxnum.f32(float %a, float 2.0)
		%fmed = call float @llvm.minnum.f32(float 4.0, float %maxnum)
		ret float %fmed
		}

		define amdgpu_ps float @test_min_K1max_K0Val__u32(float %a) {
		; GCN-LABEL: test_min_K1max_K0Val__u32:
		; GCN: ; %bb.0:
		; GCN-NEXT: v_med3_f32 v0, v0, 2.0, 4.0
		; GCN-NEXT: ; return to shader part epilog
		%maxnum = call float @llvm.maxnum.f32(float 2.0, float %a)
		%fmed = call float @llvm.minnum.f32(float 4.0, float %maxnum)
		ret float %fmed
		}

		define amdgpu_ps float @test_max_min_ValK1_K0_u32(float %a) {
		; GCN-LABEL: test_max_min_ValK1_K0_u32:
		; GCN: ; %bb.0:
		; GCN-NEXT: v_med3_f32 v0, v0, 2.0, 4.0
		; GCN-NEXT: ; return to shader part epilog
		%minnum = call float @llvm.minnum.f32(float %a, float 4.0)
		%fmed = call float @llvm.maxnum.f32(float %minnum, float 2.0)
		ret float %fmed
		}

		define amdgpu_ps float @test_max_min_K1Val_K0_u32(float %a) {
		; GCN-LABEL: test_max_min_K1Val_K0_u32:
		; GCN: ; %bb.0:
		; GCN-NEXT: v_med3_f32 v0, v0, 2.0, 4.0
		; GCN-NEXT: ; return to shader part epilog
		%minnum = call float @llvm.minnum.f32(float 4.0, float %a)
		%fmed = call float @llvm.maxnum.f32(float %minnum, float 2.0)
		ret float %fmed
		}

		define amdgpu_ps float @test_max_K0min_ValK1__u32(float %a) {
		; GCN-LABEL: test_max_K0min_ValK1__u32:
		; GCN: ; %bb.0:
		; GCN-NEXT: v_med3_f32 v0, v0, 2.0, 4.0
		; GCN-NEXT: ; return to shader part epilog
		%minnum = call float @llvm.minnum.f32(float %a, float 4.0)
		%fmed = call float @llvm.maxnum.f32(float 2.0, float %minnum)
		ret float %fmed
		}

		define amdgpu_ps float @test_max_K0min_K1Val__u32(float %a) {
		; GCN-LABEL: test_max_K0min_K1Val__u32:
		; GCN: ; %bb.0:
		; GCN-NEXT: v_med3_f32 v0, v0, 2.0, 4.0
		; GCN-NEXT: ; return to shader part epilog
		%minnum = call float @llvm.minnum.f32(float 4.0, float %a)
		%fmed = call float @llvm.maxnum.f32(float 2.0, float %minnum)
		ret float %fmed
		}

declare i32 @llvm.amdgcn.workitem.id.x() #0		declare i32 @llvm.amdgcn.workitem.id.x() #0
declare float @llvm.fabs.f32(float) #0		declare float @llvm.fabs.f32(float) #0
declare float @llvm.minnum.f32(float, float) #0		declare float @llvm.minnum.f32(float, float) #0
declare float @llvm.maxnum.f32(float, float) #0		declare float @llvm.maxnum.f32(float, float) #0
declare double @llvm.minnum.f64(double, double) #0		declare double @llvm.minnum.f64(double, double) #0
declare double @llvm.maxnum.f64(double, double) #0		declare double @llvm.maxnum.f64(double, double) #0
declare half @llvm.fabs.f16(half) #0		declare half @llvm.fabs.f16(half) #0
declare half @llvm.minnum.f16(half, half) #0		declare half @llvm.minnum.f16(half, half) #0
declare half @llvm.maxnum.f16(half, half) #0		declare half @llvm.maxnum.f16(half, half) #0

attributes #0 = { nounwind readnone }		attributes #0 = { nounwind readnone }
attributes #1 = { nounwind "unsafe-fp-math"="false" "no-nans-fp-math"="false" }		attributes #1 = { nounwind "unsafe-fp-math"="false" "no-nans-fp-math"="false" }
attributes #2 = { nounwind "unsafe-fp-math"="false" "no-nans-fp-math"="true" }		attributes #2 = { nounwind "unsafe-fp-math"="false" "no-nans-fp-math"="true" }

llvm/test/CodeGen/AMDGPU/GlobalISel/postlegalizercombiner-fmed3.mir

This file was added.

				# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
				# RUN: llc -mtriple=amdgcn-amd-mesa3d -mcpu=gfx1010 -run-pass=amdgpu-postlegalizer-combiner -verify-machineinstrs %s -o - \| FileCheck %s

				---
				name: test_min_max_ValK0_K1_u32
				legalized: true
				tracksRegLiveness: true
				machineFunctionInfo:
				mode:
				ieee: false
				body: \|
				bb.1:
				liveins: $vgpr0

				; CHECK-LABEL: name: test_min_max_ValK0_K1_u32
				; CHECK: liveins: $vgpr0
				; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
				; CHECK: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 2.000000e+00
				; CHECK: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 4.000000e+00
				; CHECK: [[AMDGPU_FMED3_:%[0-9]+]]:_(s32) = G_AMDGPU_FMED3 [[COPY]], [[C]], [[C1]]
				; CHECK: $vgpr0 = COPY [[AMDGPU_FMED3_]](s32)
				; CHECK: SI_RETURN_TO_EPILOG implicit $vgpr0
				%0:_(s32) = COPY $vgpr0
				%3:_(s32) = G_FCONSTANT float 2.000000e+00
				%4:_(s32) = G_FMAXNUM %0, %3
				%5:_(s32) = G_FCONSTANT float 4.000000e+00
				%6:_(s32) = G_FMINNUM %4, %5
				$vgpr0 = COPY %6(s32)
				SI_RETURN_TO_EPILOG implicit $vgpr0
				...

				---
				name: test_non_inline_const
				legalized: true
				tracksRegLiveness: true
				machineFunctionInfo:
				mode:
				ieee: false
				body: \|
				bb.1:
				liveins: $vgpr0

				; CHECK-LABEL: name: test_non_inline_const
				; CHECK: liveins: $vgpr0
				; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
				; CHECK: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 2.000000e+00
				; CHECK: [[FMAXNUM:%[0-9]+]]:_(s32) = G_FMAXNUM [[COPY]], [[C]]
				; CHECK: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 8.000000e+00
				; CHECK: [[FMINNUM:%[0-9]+]]:_(s32) = G_FMINNUM [[FMAXNUM]], [[C1]]
				; CHECK: $vgpr0 = COPY [[FMINNUM]](s32)
				; CHECK: SI_RETURN_TO_EPILOG implicit $vgpr0
				%0:_(s32) = COPY $vgpr0
				%3:_(s32) = G_FCONSTANT float 2.000000e+00
				%4:_(s32) = G_FMAXNUM %0, %3
				%5:_(s32) = G_FCONSTANT float 8.000000e+00
				%6:_(s32) = G_FMINNUM %4, %5
				$vgpr0 = COPY %6(s32)
				SI_RETURN_TO_EPILOG implicit $vgpr0
				...

				---
				name: min_max_ValK0_K1_float
				legalized: true
				tracksRegLiveness: true
				machineFunctionInfo:
				mode:
				ieee: false
				body: \|
				bb.1:
				liveins: $vgpr0

				; CHECK-LABEL: name: min_max_ValK0_K1_float
				; CHECK: liveins: $vgpr0
				; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
				; CHECK: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 2.000000e+00
				; CHECK: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 4.000000e+00
				; CHECK: [[AMDGPU_FMED3_:%[0-9]+]]:_(s32) = G_AMDGPU_FMED3 [[COPY]], [[C]], [[C1]]
				; CHECK: $vgpr0 = COPY [[AMDGPU_FMED3_]](s32)
				; CHECK: SI_RETURN_TO_EPILOG implicit $vgpr0
				%0:_(s32) = COPY $vgpr0
				%3:_(s32) = G_FCONSTANT float 2.000000e+00
				%4:_(s32) = G_FMAXNUM %3, %0
				%5:_(s32) = G_FCONSTANT float 4.000000e+00
				%6:_(s32) = G_FMINNUM %4, %5
				$vgpr0 = COPY %6(s32)
				SI_RETURN_TO_EPILOG implicit $vgpr0
				...

				---
				name: test_min_K1max_ValK0__u32
				legalized: true
				tracksRegLiveness: true
				machineFunctionInfo:
				mode:
				ieee: false
				body: \|
				bb.1:
				liveins: $vgpr0

				; CHECK-LABEL: name: test_min_K1max_ValK0__u32
				; CHECK: liveins: $vgpr0
				; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
				; CHECK: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 2.000000e+00
				; CHECK: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 4.000000e+00
				; CHECK: [[AMDGPU_FMED3_:%[0-9]+]]:_(s32) = G_AMDGPU_FMED3 [[COPY]], [[C]], [[C1]]
				; CHECK: $vgpr0 = COPY [[AMDGPU_FMED3_]](s32)
				; CHECK: SI_RETURN_TO_EPILOG implicit $vgpr0
				%0:_(s32) = COPY $vgpr0
				%3:_(s32) = G_FCONSTANT float 2.000000e+00
				%4:_(s32) = G_FMAXNUM %0, %3
				%5:_(s32) = G_FCONSTANT float 4.000000e+00
				%6:_(s32) = G_FMINNUM %5, %4
				$vgpr0 = COPY %6(s32)
				SI_RETURN_TO_EPILOG implicit $vgpr0
				...

				---
				name: test_min_K1max_K0Val__u32
				legalized: true
				tracksRegLiveness: true
				machineFunctionInfo:
				mode:
				ieee: false
				body: \|
				bb.1:
				liveins: $vgpr0

				; CHECK-LABEL: name: test_min_K1max_K0Val__u32
				; CHECK: liveins: $vgpr0
				; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
				; CHECK: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 2.000000e+00
				; CHECK: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 4.000000e+00
				; CHECK: [[AMDGPU_FMED3_:%[0-9]+]]:_(s32) = G_AMDGPU_FMED3 [[COPY]], [[C]], [[C1]]
				; CHECK: $vgpr0 = COPY [[AMDGPU_FMED3_]](s32)
				; CHECK: SI_RETURN_TO_EPILOG implicit $vgpr0
				%0:_(s32) = COPY $vgpr0
				%3:_(s32) = G_FCONSTANT float 2.000000e+00
				%4:_(s32) = G_FMAXNUM %3, %0
				%5:_(s32) = G_FCONSTANT float 4.000000e+00
				%6:_(s32) = G_FMINNUM %5, %4
				$vgpr0 = COPY %6(s32)
				SI_RETURN_TO_EPILOG implicit $vgpr0
				...

				---
				name: test_max_min_ValK1_K0_u32
				legalized: true
				tracksRegLiveness: true
				machineFunctionInfo:
				mode:
				ieee: false
				body: \|
				bb.1:
				liveins: $vgpr0

				; CHECK-LABEL: name: test_max_min_ValK1_K0_u32
				; CHECK: liveins: $vgpr0
				; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
				; CHECK: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 4.000000e+00
				; CHECK: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 2.000000e+00
				; CHECK: [[AMDGPU_FMED3_:%[0-9]+]]:_(s32) = G_AMDGPU_FMED3 [[COPY]], [[C1]], [[C]]
				; CHECK: $vgpr0 = COPY [[AMDGPU_FMED3_]](s32)
				; CHECK: SI_RETURN_TO_EPILOG implicit $vgpr0
				%0:_(s32) = COPY $vgpr0
				%3:_(s32) = G_FCONSTANT float 4.000000e+00
				%4:_(s32) = G_FMINNUM %0, %3
				%5:_(s32) = G_FCONSTANT float 2.000000e+00
				%6:_(s32) = G_FMAXNUM %4, %5
				$vgpr0 = COPY %6(s32)
				SI_RETURN_TO_EPILOG implicit $vgpr0
				...

				---
				name: test_max_min_K1Val_K0_u32
				legalized: true
				tracksRegLiveness: true
				machineFunctionInfo:
				mode:
				ieee: false
				body: \|
				bb.1:
				liveins: $vgpr0

				; CHECK-LABEL: name: test_max_min_K1Val_K0_u32
				; CHECK: liveins: $vgpr0
				; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
				; CHECK: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 4.000000e+00
				; CHECK: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 2.000000e+00
				; CHECK: [[AMDGPU_FMED3_:%[0-9]+]]:_(s32) = G_AMDGPU_FMED3 [[COPY]], [[C1]], [[C]]
				; CHECK: $vgpr0 = COPY [[AMDGPU_FMED3_]](s32)
				; CHECK: SI_RETURN_TO_EPILOG implicit $vgpr0
				%0:_(s32) = COPY $vgpr0
				%3:_(s32) = G_FCONSTANT float 4.000000e+00
				%4:_(s32) = G_FMINNUM %3, %0
				%5:_(s32) = G_FCONSTANT float 2.000000e+00
				%6:_(s32) = G_FMAXNUM %4, %5
				$vgpr0 = COPY %6(s32)
				SI_RETURN_TO_EPILOG implicit $vgpr0
				...

				---
				name: test_max_K0min_ValK1__u32
				legalized: true
				tracksRegLiveness: true
				machineFunctionInfo:
				mode:
				ieee: false
				body: \|
				bb.1:
				liveins: $vgpr0

				; CHECK-LABEL: name: test_max_K0min_ValK1__u32
				; CHECK: liveins: $vgpr0
				; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
				; CHECK: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 4.000000e+00
				; CHECK: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 2.000000e+00
				; CHECK: [[AMDGPU_FMED3_:%[0-9]+]]:_(s32) = G_AMDGPU_FMED3 [[COPY]], [[C1]], [[C]]
				; CHECK: $vgpr0 = COPY [[AMDGPU_FMED3_]](s32)
				; CHECK: SI_RETURN_TO_EPILOG implicit $vgpr0
				%0:_(s32) = COPY $vgpr0
				%3:_(s32) = G_FCONSTANT float 4.000000e+00
				%4:_(s32) = G_FMINNUM %0, %3
				%5:_(s32) = G_FCONSTANT float 2.000000e+00
				%6:_(s32) = G_FMAXNUM %5, %4
				$vgpr0 = COPY %6(s32)
				SI_RETURN_TO_EPILOG implicit $vgpr0
				...

				---
				name: test_max_K0min_K1Val__u32
				legalized: true
				tracksRegLiveness: true
				machineFunctionInfo:
				mode:
				ieee: false
				body: \|
				bb.1:
				liveins: $vgpr0

				; CHECK-LABEL: name: test_max_K0min_K1Val__u32
				; CHECK: liveins: $vgpr0
				; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
				; CHECK: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 4.000000e+00
				; CHECK: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 2.000000e+00
				; CHECK: [[AMDGPU_FMED3_:%[0-9]+]]:_(s32) = G_AMDGPU_FMED3 [[COPY]], [[C1]], [[C]]
				; CHECK: $vgpr0 = COPY [[AMDGPU_FMED3_]](s32)
				; CHECK: SI_RETURN_TO_EPILOG implicit $vgpr0
				%0:_(s32) = COPY $vgpr0
				%3:_(s32) = G_FCONSTANT float 4.000000e+00
				%4:_(s32) = G_FMINNUM %3, %0
				%5:_(s32) = G_FCONSTANT float 2.000000e+00
				%6:_(s32) = G_FMAXNUM %5, %4
				$vgpr0 = COPY %6(s32)
				SI_RETURN_TO_EPILOG implicit $vgpr0
				...

This is an archive of the discontinued LLVM Phabricator instance.

AMDGPU/GlobalISel: Add floating point med3 combine
ClosedPublic

Details

Diff Detail

Event Timeline

Revision Contents

Diff 300311

llvm/lib/Target/AMDGPU/AMDGPUCombine.td

llvm/lib/Target/AMDGPU/AMDGPUGISel.td

llvm/lib/Target/AMDGPU/AMDGPUPostLegalizerCombiner.cpp

llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp

llvm/lib/Target/AMDGPU/SIInstructions.td

llvm/test/CodeGen/AMDGPU/GlobalISel/fmed3.ll

llvm/test/CodeGen/AMDGPU/GlobalISel/postlegalizercombiner-fmed3.mir

This is an archive of the discontinued LLVM Phabricator instance.

AMDGPU/GlobalISel: Add floating point med3 combineClosedPublic

Details

Diff Detail

Event Timeline

Revision Contents

Diff 300311

llvm/lib/Target/AMDGPU/AMDGPUCombine.td

llvm/lib/Target/AMDGPU/AMDGPUGISel.td

llvm/lib/Target/AMDGPU/AMDGPUPostLegalizerCombiner.cpp

llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp

llvm/lib/Target/AMDGPU/SIInstructions.td

llvm/test/CodeGen/AMDGPU/GlobalISel/fmed3.ll

llvm/test/CodeGen/AMDGPU/GlobalISel/postlegalizercombiner-fmed3.mir

AMDGPU/GlobalISel: Add floating point med3 combine
ClosedPublic