This is an archive of the discontinued LLVM Phabricator instance.

[GlobalISel] Remove scalar src from non-sequential fadd/fmul reductions.
ClosedPublic

Authored by aemerson on Oct 9 2020, 10:44 AM.

Download Raw Diff

Details

Reviewers

arsenm
paquette

Commits

rGc2551c1f4058: [GlobalISel] Remove scalar src from non-sequential fadd/fmul reductions.

Summary

It's probably better to split these into separate G_FADD/G_FMUL + G_VECREDUCE operations in the translator rather than carrying the scalar around. The majority of the time it'll get simplified away as the scalars are probably identity values.

Diff Detail

Repository: rG LLVM Github Monorepo

Event Timeline

aemerson created this revision.Oct 9 2020, 10:44 AM

Herald added subscribers: hiraditya, rovka. · View Herald TranscriptOct 9 2020, 10:45 AM

aemerson requested review of this revision.Oct 9 2020, 10:45 AM

Herald added a subscriber: wdng. · View Herald TranscriptOct 9 2020, 10:45 AM

Only do this for the relaxed ordering ones. If we were to split the op for the sequential variants, the commutative G_FADD would lose the associativity of the intrinsic.

Harbormaster completed remote builds in B74617: Diff 297280.Oct 9 2020, 11:18 AM

Harbormaster completed remote builds in B74619: Diff 297282.Oct 9 2020, 11:32 AM

aemerson added a child revision: D89028: [GlobalISel] Add translation support for vector reduction intrinsics.Oct 9 2020, 12:12 PM

ping

LGTM

This revision is now accepted and ready to land.Oct 15 2020, 1:25 PM

This revision was landed with ongoing or failed builds.Oct 15 2020, 4:02 PM

Closed by commit rGc2551c1f4058: [GlobalISel] Remove scalar src from non-sequential fadd/fmul reductions. (authored by aemerson). · Explain Why

This revision was automatically updated to reflect the committed changes.

aemerson added a commit: rGc2551c1f4058: [GlobalISel] Remove scalar src from non-sequential fadd/fmul reductions..

Revision Contents

Path

Size

llvm/

include/

llvm/

Target/

GenericOpcodes.td

21 lines

lib/

CodeGen/

MachineVerifier.cpp

10 lines

test/

CodeGen/

AArch64/

GlobalISel/

legalizer-info-validation.mir

4 lines

MachineVerifier/

test_vector_reductions.mir

4 lines

Diff 298498

llvm/include/llvm/Target/GenericOpcodes.td

Show First 20 Lines • Show All 1,259 Lines • ▼ Show 20 Lines	def G_SHUFFLE_VECTOR: GenericInstruction {
let InOperandList = (ins type1:$v1, type1:$v2, unknown:$mask);		let InOperandList = (ins type1:$v1, type1:$v2, unknown:$mask);
let hasSideEffects = 0;		let hasSideEffects = 0;
}		}

//------------------------------------------------------------------------------		//------------------------------------------------------------------------------
// Vector reductions		// Vector reductions
//------------------------------------------------------------------------------		//------------------------------------------------------------------------------

def G_VECREDUCE_SEQ_FADD : GenericInstruction {		class VectorReduction : GenericInstruction {
let OutOperandList = (outs type0:$dst);
let InOperandList = (ins type1:$acc, type2:$v);
let hasSideEffects = 0;
}

def G_VECREDUCE_SEQ_FMUL : GenericInstruction {
let OutOperandList = (outs type0:$dst);		let OutOperandList = (outs type0:$dst);
let InOperandList = (ins type1:$acc, type2:$v);		let InOperandList = (ins type1:$v);
let hasSideEffects = 0;		let hasSideEffects = 0;
}		}

def G_VECREDUCE_FADD : GenericInstruction {		def G_VECREDUCE_SEQ_FADD : GenericInstruction {
let OutOperandList = (outs type0:$dst);		let OutOperandList = (outs type0:$dst);
let InOperandList = (ins type1:$acc, type2:$v);		let InOperandList = (ins type1:$acc, type2:$v);
let hasSideEffects = 0;		let hasSideEffects = 0;
}		}

def G_VECREDUCE_FMUL : GenericInstruction {		def G_VECREDUCE_SEQ_FMUL : GenericInstruction {
let OutOperandList = (outs type0:$dst);		let OutOperandList = (outs type0:$dst);
let InOperandList = (ins type1:$acc, type2:$v);		let InOperandList = (ins type1:$acc, type2:$v);
let hasSideEffects = 0;		let hasSideEffects = 0;
}		}

class VectorReduction : GenericInstruction {		def G_VECREDUCE_FADD : VectorReduction;
let OutOperandList = (outs type0:$dst);		def G_VECREDUCE_FMUL : VectorReduction;
let InOperandList = (ins type1:$v);
let hasSideEffects = 0;
}

def G_VECREDUCE_FMAX : VectorReduction;		def G_VECREDUCE_FMAX : VectorReduction;
def G_VECREDUCE_FMIN : VectorReduction;		def G_VECREDUCE_FMIN : VectorReduction;

def G_VECREDUCE_ADD : VectorReduction;		def G_VECREDUCE_ADD : VectorReduction;
def G_VECREDUCE_MUL : VectorReduction;		def G_VECREDUCE_MUL : VectorReduction;
def G_VECREDUCE_AND : VectorReduction;		def G_VECREDUCE_AND : VectorReduction;
def G_VECREDUCE_OR : VectorReduction;		def G_VECREDUCE_OR : VectorReduction;
▲ Show 20 Lines • Show All 44 Lines • Show Last 20 Lines

llvm/lib/CodeGen/MachineVerifier.cpp

Show First 20 Lines • Show All 1,483 Lines • ▼ Show 20 Lines	case TargetOpcode::G_MEMSET: {
}		}

if (DstPtrTy.getAddressSpace() != MMOs[0]->getAddrSpace())		if (DstPtrTy.getAddressSpace() != MMOs[0]->getAddrSpace())
report("inconsistent memset address space", MI);		report("inconsistent memset address space", MI);

break;		break;
}		}
case TargetOpcode::G_VECREDUCE_SEQ_FADD:		case TargetOpcode::G_VECREDUCE_SEQ_FADD:
case TargetOpcode::G_VECREDUCE_SEQ_FMUL:		case TargetOpcode::G_VECREDUCE_SEQ_FMUL: {
case TargetOpcode::G_VECREDUCE_FADD:
case TargetOpcode::G_VECREDUCE_FMUL: {
LLT DstTy = MRI->getType(MI->getOperand(0).getReg());		LLT DstTy = MRI->getType(MI->getOperand(0).getReg());
LLT Src1Ty = MRI->getType(MI->getOperand(1).getReg());		LLT Src1Ty = MRI->getType(MI->getOperand(1).getReg());
LLT Src2Ty = MRI->getType(MI->getOperand(2).getReg());		LLT Src2Ty = MRI->getType(MI->getOperand(2).getReg());
if (!DstTy.isScalar())		if (!DstTy.isScalar())
report("Vector reduction requires a scalar destination type", MI);		report("Vector reduction requires a scalar destination type", MI);
if (!Src1Ty.isScalar())		if (!Src1Ty.isScalar())
report("FADD/FMUL vector reduction requires a scalar 1st operand", MI);		report("Sequential FADD/FMUL vector reduction requires a scalar 1st operand", MI);
if (!Src2Ty.isVector())		if (!Src2Ty.isVector())
report("FADD/FMUL vector reduction must have a vector 2nd operand", MI);		report("Sequential FADD/FMUL vector reduction must have a vector 2nd operand", MI);
break;		break;
}		}
		case TargetOpcode::G_VECREDUCE_FADD:
		case TargetOpcode::G_VECREDUCE_FMUL:
case TargetOpcode::G_VECREDUCE_FMAX:		case TargetOpcode::G_VECREDUCE_FMAX:
case TargetOpcode::G_VECREDUCE_FMIN:		case TargetOpcode::G_VECREDUCE_FMIN:
case TargetOpcode::G_VECREDUCE_ADD:		case TargetOpcode::G_VECREDUCE_ADD:
case TargetOpcode::G_VECREDUCE_MUL:		case TargetOpcode::G_VECREDUCE_MUL:
case TargetOpcode::G_VECREDUCE_AND:		case TargetOpcode::G_VECREDUCE_AND:
case TargetOpcode::G_VECREDUCE_OR:		case TargetOpcode::G_VECREDUCE_OR:
case TargetOpcode::G_VECREDUCE_XOR:		case TargetOpcode::G_VECREDUCE_XOR:
case TargetOpcode::G_VECREDUCE_SMAX:		case TargetOpcode::G_VECREDUCE_SMAX:
▲ Show 20 Lines • Show All 1,559 Lines • Show Last 20 Lines

llvm/test/CodeGen/AArch64/GlobalISel/legalizer-info-validation.mir

	Show First 20 Lines • Show All 613 Lines • ▼ Show 20 Lines
	# DEBUG-NEXT: .. type index coverage check SKIPPED: user-defined predicate detected			# DEBUG-NEXT: .. type index coverage check SKIPPED: user-defined predicate detected
	# DEBUG-NEXT: .. imm index coverage check SKIPPED: user-defined predicate detected			# DEBUG-NEXT: .. imm index coverage check SKIPPED: user-defined predicate detected
	# DEBUG-NEXT: G_VECREDUCE_SEQ_FADD (opcode {{[0-9]+}}): 3 type indices, 0 imm indices			# DEBUG-NEXT: G_VECREDUCE_SEQ_FADD (opcode {{[0-9]+}}): 3 type indices, 0 imm indices
	# DEBUG-NEXT: .. type index coverage check SKIPPED: no rules defined			# DEBUG-NEXT: .. type index coverage check SKIPPED: no rules defined
	# DEBUG-NEXT: .. imm index coverage check SKIPPED: no rules defined			# DEBUG-NEXT: .. imm index coverage check SKIPPED: no rules defined
	# DEBUG-NEXT: G_VECREDUCE_SEQ_FMUL (opcode {{[0-9]+}}): 3 type indices, 0 imm indices			# DEBUG-NEXT: G_VECREDUCE_SEQ_FMUL (opcode {{[0-9]+}}): 3 type indices, 0 imm indices
	# DEBUG-NEXT: .. type index coverage check SKIPPED: no rules defined			# DEBUG-NEXT: .. type index coverage check SKIPPED: no rules defined
	# DEBUG-NEXT: .. imm index coverage check SKIPPED: no rules defined			# DEBUG-NEXT: .. imm index coverage check SKIPPED: no rules defined
	# DEBUG-NEXT: G_VECREDUCE_FADD (opcode {{[0-9]+}}): 3 type indices, 0 imm indices			# DEBUG-NEXT: G_VECREDUCE_FADD (opcode {{[0-9]+}}): 2 type indices, 0 imm indices
	# DEBUG-NEXT: .. type index coverage check SKIPPED: no rules defined			# DEBUG-NEXT: .. type index coverage check SKIPPED: no rules defined
	# DEBUG-NEXT: .. imm index coverage check SKIPPED: no rules defined			# DEBUG-NEXT: .. imm index coverage check SKIPPED: no rules defined
	# DEBUG-NEXT: G_VECREDUCE_FMUL (opcode {{[0-9]+}}): 3 type indices, 0 imm indices			# DEBUG-NEXT: G_VECREDUCE_FMUL (opcode {{[0-9]+}}): 2 type indices, 0 imm indices
	# DEBUG-NEXT: .. type index coverage check SKIPPED: no rules defined			# DEBUG-NEXT: .. type index coverage check SKIPPED: no rules defined
	# DEBUG-NEXT: .. imm index coverage check SKIPPED: no rules defined			# DEBUG-NEXT: .. imm index coverage check SKIPPED: no rules defined
	# DEBUG-NEXT: G_VECREDUCE_FMAX (opcode {{[0-9]+}}): 2 type indices, 0 imm indices			# DEBUG-NEXT: G_VECREDUCE_FMAX (opcode {{[0-9]+}}): 2 type indices, 0 imm indices
	# DEBUG-NEXT: .. type index coverage check SKIPPED: no rules defined			# DEBUG-NEXT: .. type index coverage check SKIPPED: no rules defined
	# DEBUG-NEXT: .. imm index coverage check SKIPPED: no rules defined			# DEBUG-NEXT: .. imm index coverage check SKIPPED: no rules defined
	# DEBUG-NEXT: G_VECREDUCE_FMIN (opcode {{[0-9]+}}): 2 type indices, 0 imm indices			# DEBUG-NEXT: G_VECREDUCE_FMIN (opcode {{[0-9]+}}): 2 type indices, 0 imm indices
	# DEBUG-NEXT: .. type index coverage check SKIPPED: no rules defined			# DEBUG-NEXT: .. type index coverage check SKIPPED: no rules defined
	# DEBUG-NEXT: .. imm index coverage check SKIPPED: no rules defined			# DEBUG-NEXT: .. imm index coverage check SKIPPED: no rules defined
	Show All 35 Lines

llvm/test/MachineVerifier/test_vector_reductions.mir

Show All 19 Lines	bb.0:

%vec_v2s64:_(<2 x s64>) = IMPLICIT_DEF		%vec_v2s64:_(<2 x s64>) = IMPLICIT_DEF
%scalar_s64:_(s64) = IMPLICIT_DEF		%scalar_s64:_(s64) = IMPLICIT_DEF

%seq_fadd:_(<2 x s64>) = G_VECREDUCE_SEQ_FADD %scalar_s64, %vec_v2s64		%seq_fadd:_(<2 x s64>) = G_VECREDUCE_SEQ_FADD %scalar_s64, %vec_v2s64
; CHECK: Bad machine code: Vector reduction requires a scalar destination type		; CHECK: Bad machine code: Vector reduction requires a scalar destination type

%dst:_(s64) = G_VECREDUCE_SEQ_FADD %vec_v2s64, %vec_v2s64		%dst:_(s64) = G_VECREDUCE_SEQ_FADD %vec_v2s64, %vec_v2s64
; CHECK: Bad machine code: FADD/FMUL vector reduction requires a scalar 1st operand		; CHECK: Bad machine code: Sequential FADD/FMUL vector reduction requires a scalar 1st operand

%dst:_(s64) = G_VECREDUCE_SEQ_FADD %scalar_s64, %scalar_s64		%dst:_(s64) = G_VECREDUCE_SEQ_FADD %scalar_s64, %scalar_s64
; CHECK: Bad machine code: FADD/FMUL vector reduction must have a vector 2nd operand		; CHECK: Bad machine code: Sequential FADD/FMUL vector reduction must have a vector 2nd operand

%dst2:_(s64) = G_VECREDUCE_MUL %scalar_s64		%dst2:_(s64) = G_VECREDUCE_MUL %scalar_s64
; CHECK: Bad machine code: Vector reduction requires vector source		; CHECK: Bad machine code: Vector reduction requires vector source
...		...