diff --git a/llvm/docs/GlobalISel/GenericOpcode.rst b/llvm/docs/GlobalISel/GenericOpcode.rst --- a/llvm/docs/GlobalISel/GenericOpcode.rst +++ b/llvm/docs/GlobalISel/GenericOpcode.rst @@ -545,6 +545,45 @@ The mask operand should be an IR Constant which exactly matches the corresponding mask for the IR shufflevector instruction. +Vector Reduction Operations +--------------------------- + +These operations represent horizontal vector reduction, producing a scalar result. + +G_VECREDUCE_SEQ_FADD, G_VECREDUCE_SEQ_FMUL +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +The SEQ variants perform reductions in sequential order. The first operand is +an initial scalar accumulator value, and the second operand is the vector to reduce. + +G_VECREDUCE_FADD, G_VECREDUCE_FMUL +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +These reductions are relaxed variants which may reduce the elements in any order. + +G_VECREDUCE_FMAX, G_VECREDUCE_FMIN +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +FMIN/FMAX nodes can have flags, for NaN/NoNaN variants. + + +Integer/bitwise reductions +^^^^^^^^^^^^^^^^^^^^^^^^^^ + +* G_VECREDUCE_ADD +* G_VECREDUCE_MUL +* G_VECREDUCE_AND +* G_VECREDUCE_OR +* G_VECREDUCE_XOR +* G_VECREDUCE_SMAX +* G_VECREDUCE_SMIN +* G_VECREDUCE_UMAX +* G_VECREDUCE_UMIN + +Integer reductions may have a result type larger than the vector element type. +However, the reduction is performed using the vector element type and the value +in the top bits is unspecified. + Memory Operations ----------------- diff --git a/llvm/include/llvm/CodeGen/GlobalISel/MachineIRBuilder.h b/llvm/include/llvm/CodeGen/GlobalISel/MachineIRBuilder.h --- a/llvm/include/llvm/CodeGen/GlobalISel/MachineIRBuilder.h +++ b/llvm/include/llvm/CodeGen/GlobalISel/MachineIRBuilder.h @@ -18,6 +18,7 @@ #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/TargetOpcodes.h" #include "llvm/IR/Constants.h" #include "llvm/IR/DebugLoc.h" #include "llvm/IR/Module.h" @@ -1679,6 +1680,101 @@ /// \return a MachineInstrBuilder for the newly created instruction. MachineInstrBuilder buildJumpTable(const LLT PtrTy, unsigned JTI); + /// Build and insert \p Res = G_VECREDUCE_SEQ_FADD \p ScalarIn, \p VecIn + /// + /// \p ScalarIn is the scalar accumulator input to start the sequential + /// reduction operation of \p VecIn. + MachineInstrBuilder buildVecReduceSeqFAdd(const DstOp &Dst, + const SrcOp &ScalarIn, + const SrcOp &VecIn) { + return buildInstr(TargetOpcode::G_VECREDUCE_SEQ_FADD, {Dst}, + {ScalarIn, {VecIn}}); + } + + /// Build and insert \p Res = G_VECREDUCE_SEQ_FMUL \p ScalarIn, \p VecIn + /// + /// \p ScalarIn is the scalar accumulator input to start the sequential + /// reduction operation of \p VecIn. + MachineInstrBuilder buildVecReduceSeqFMul(const DstOp &Dst, + const SrcOp &ScalarIn, + const SrcOp &VecIn) { + return buildInstr(TargetOpcode::G_VECREDUCE_SEQ_FMUL, {Dst}, + {ScalarIn, {VecIn}}); + } + + /// Build and insert \p Res = G_VECREDUCE_FADD \p Src + /// + /// \p ScalarIn is the scalar accumulator input to the reduction operation of + /// \p VecIn. + MachineInstrBuilder buildVecReduceFAdd(const DstOp &Dst, + const SrcOp &ScalarIn, + const SrcOp &VecIn) { + return buildInstr(TargetOpcode::G_VECREDUCE_FADD, {Dst}, {ScalarIn, VecIn}); + } + + /// Build and insert \p Res = G_VECREDUCE_FMUL \p Src + /// + /// \p ScalarIn is the scalar accumulator input to the reduction operation of + /// \p VecIn. + MachineInstrBuilder buildVecReduceFMul(const DstOp &Dst, + const SrcOp &ScalarIn, + const SrcOp &VecIn) { + return buildInstr(TargetOpcode::G_VECREDUCE_FMUL, {Dst}, {ScalarIn, VecIn}); + } + + /// Build and insert \p Res = G_VECREDUCE_FMAX \p Src + MachineInstrBuilder buildVecReduceFMax(const DstOp &Dst, const SrcOp &Src) { + return buildInstr(TargetOpcode::G_VECREDUCE_FMAX, {Dst}, {Src}); + } + + /// Build and insert \p Res = G_VECREDUCE_FMIN \p Src + MachineInstrBuilder buildVecReduceFMin(const DstOp &Dst, const SrcOp &Src) { + return buildInstr(TargetOpcode::G_VECREDUCE_FMIN, {Dst}, {Src}); + } + /// Build and insert \p Res = G_VECREDUCE_ADD \p Src + MachineInstrBuilder buildVecReduceAdd(const DstOp &Dst, const SrcOp &Src) { + return buildInstr(TargetOpcode::G_VECREDUCE_ADD, {Dst}, {Src}); + } + + /// Build and insert \p Res = G_VECREDUCE_MUL \p Src + MachineInstrBuilder buildVecReduceMul(const DstOp &Dst, const SrcOp &Src) { + return buildInstr(TargetOpcode::G_VECREDUCE_MUL, {Dst}, {Src}); + } + + /// Build and insert \p Res = G_VECREDUCE_AND \p Src + MachineInstrBuilder buildVecReduceAnd(const DstOp &Dst, const SrcOp &Src) { + return buildInstr(TargetOpcode::G_VECREDUCE_AND, {Dst}, {Src}); + } + + /// Build and insert \p Res = G_VECREDUCE_OR \p Src + MachineInstrBuilder buildVecReduceOr(const DstOp &Dst, const SrcOp &Src) { + return buildInstr(TargetOpcode::G_VECREDUCE_OR, {Dst}, {Src}); + } + + /// Build and insert \p Res = G_VECREDUCE_XOR \p Src + MachineInstrBuilder buildVecReduceXor(const DstOp &Dst, const SrcOp &Src) { + return buildInstr(TargetOpcode::G_VECREDUCE_XOR, {Dst}, {Src}); + } + + /// Build and insert \p Res = G_VECREDUCE_SMAX \p Src + MachineInstrBuilder buildVecReduceSMax(const DstOp &Dst, const SrcOp &Src) { + return buildInstr(TargetOpcode::G_VECREDUCE_SMAX, {Dst}, {Src}); + } + + /// Build and insert \p Res = G_VECREDUCE_SMIN \p Src + MachineInstrBuilder buildVecReduceSMin(const DstOp &Dst, const SrcOp &Src) { + return buildInstr(TargetOpcode::G_VECREDUCE_SMIN, {Dst}, {Src}); + } + + /// Build and insert \p Res = G_VECREDUCE_UMAX \p Src + MachineInstrBuilder buildVecReduceUMax(const DstOp &Dst, const SrcOp &Src) { + return buildInstr(TargetOpcode::G_VECREDUCE_UMAX, {Dst}, {Src}); + } + + /// Build and insert \p Res = G_VECREDUCE_UMIN \p Src + MachineInstrBuilder buildVecReduceUMin(const DstOp &Dst, const SrcOp &Src) { + return buildInstr(TargetOpcode::G_VECREDUCE_UMIN, {Dst}, {Src}); + } virtual MachineInstrBuilder buildInstr(unsigned Opc, ArrayRef DstOps, ArrayRef SrcOps, Optional Flags = None); diff --git a/llvm/include/llvm/Support/TargetOpcodes.def b/llvm/include/llvm/Support/TargetOpcodes.def --- a/llvm/include/llvm/Support/TargetOpcodes.def +++ b/llvm/include/llvm/Support/TargetOpcodes.def @@ -710,10 +710,27 @@ /// llvm.memset intrinsic HANDLE_TARGET_OPCODE(G_MEMSET) +/// Vector reductions +HANDLE_TARGET_OPCODE(G_VECREDUCE_SEQ_FADD) +HANDLE_TARGET_OPCODE(G_VECREDUCE_SEQ_FMUL) +HANDLE_TARGET_OPCODE(G_VECREDUCE_FADD) +HANDLE_TARGET_OPCODE(G_VECREDUCE_FMUL) +HANDLE_TARGET_OPCODE(G_VECREDUCE_FMAX) +HANDLE_TARGET_OPCODE(G_VECREDUCE_FMIN) +HANDLE_TARGET_OPCODE(G_VECREDUCE_ADD) +HANDLE_TARGET_OPCODE(G_VECREDUCE_MUL) +HANDLE_TARGET_OPCODE(G_VECREDUCE_AND) +HANDLE_TARGET_OPCODE(G_VECREDUCE_OR) +HANDLE_TARGET_OPCODE(G_VECREDUCE_XOR) +HANDLE_TARGET_OPCODE(G_VECREDUCE_SMAX) +HANDLE_TARGET_OPCODE(G_VECREDUCE_SMIN) +HANDLE_TARGET_OPCODE(G_VECREDUCE_UMAX) +HANDLE_TARGET_OPCODE(G_VECREDUCE_UMIN) + /// Marker for the end of the generic opcode. /// This is used to check if an opcode is in the range of the /// generic opcodes. -HANDLE_TARGET_OPCODE_MARKER(PRE_ISEL_GENERIC_OPCODE_END, G_MEMSET) +HANDLE_TARGET_OPCODE_MARKER(PRE_ISEL_GENERIC_OPCODE_END, G_VECREDUCE_UMIN) /// BUILTIN_OP_END - This must be the last enum value in this list. /// The target-specific post-isel opcode values start here. diff --git a/llvm/include/llvm/Target/GenericOpcodes.td b/llvm/include/llvm/Target/GenericOpcodes.td --- a/llvm/include/llvm/Target/GenericOpcodes.td +++ b/llvm/include/llvm/Target/GenericOpcodes.td @@ -1261,6 +1261,53 @@ let hasSideEffects = 0; } +//------------------------------------------------------------------------------ +// Vector reductions +//------------------------------------------------------------------------------ + +def G_VECREDUCE_SEQ_FADD : GenericInstruction { + let OutOperandList = (outs type0:$dst); + let InOperandList = (ins type1:$acc, type2:$v); + let hasSideEffects = 0; +} + +def G_VECREDUCE_SEQ_FMUL : GenericInstruction { + let OutOperandList = (outs type0:$dst); + let InOperandList = (ins type1:$acc, type2:$v); + let hasSideEffects = 0; +} + +def G_VECREDUCE_FADD : GenericInstruction { + let OutOperandList = (outs type0:$dst); + let InOperandList = (ins type1:$acc, type2:$v); + let hasSideEffects = 0; +} + +def G_VECREDUCE_FMUL : GenericInstruction { + let OutOperandList = (outs type0:$dst); + let InOperandList = (ins type1:$acc, type2:$v); + let hasSideEffects = 0; +} + +class VectorReduction : GenericInstruction { + let OutOperandList = (outs type0:$dst); + let InOperandList = (ins type1:$v); + let hasSideEffects = 0; +} + +def G_VECREDUCE_FMAX : VectorReduction; +def G_VECREDUCE_FMIN : VectorReduction; + +def G_VECREDUCE_ADD : VectorReduction; +def G_VECREDUCE_MUL : VectorReduction; +def G_VECREDUCE_AND : VectorReduction; +def G_VECREDUCE_OR : VectorReduction; +def G_VECREDUCE_XOR : VectorReduction; +def G_VECREDUCE_SMAX : VectorReduction; +def G_VECREDUCE_SMIN : VectorReduction; +def G_VECREDUCE_UMAX : VectorReduction; +def G_VECREDUCE_UMIN : VectorReduction; + //------------------------------------------------------------------------------ // Constrained floating point ops //------------------------------------------------------------------------------ diff --git a/llvm/lib/CodeGen/MachineVerifier.cpp b/llvm/lib/CodeGen/MachineVerifier.cpp --- a/llvm/lib/CodeGen/MachineVerifier.cpp +++ b/llvm/lib/CodeGen/MachineVerifier.cpp @@ -1488,6 +1488,40 @@ break; } + case TargetOpcode::G_VECREDUCE_SEQ_FADD: + case TargetOpcode::G_VECREDUCE_SEQ_FMUL: + case TargetOpcode::G_VECREDUCE_FADD: + case TargetOpcode::G_VECREDUCE_FMUL: { + LLT DstTy = MRI->getType(MI->getOperand(0).getReg()); + LLT Src1Ty = MRI->getType(MI->getOperand(1).getReg()); + LLT Src2Ty = MRI->getType(MI->getOperand(2).getReg()); + if (!DstTy.isScalar()) + report("Vector reduction requires a scalar destination type", MI); + if (!Src1Ty.isScalar()) + report("FADD/FMUL vector reduction requires a scalar 1st operand", MI); + if (!Src2Ty.isVector()) + report("FADD/FMUL vector reduction must have a vector 2nd operand", MI); + break; + } + case TargetOpcode::G_VECREDUCE_FMAX: + case TargetOpcode::G_VECREDUCE_FMIN: + case TargetOpcode::G_VECREDUCE_ADD: + case TargetOpcode::G_VECREDUCE_MUL: + case TargetOpcode::G_VECREDUCE_AND: + case TargetOpcode::G_VECREDUCE_OR: + case TargetOpcode::G_VECREDUCE_XOR: + case TargetOpcode::G_VECREDUCE_SMAX: + case TargetOpcode::G_VECREDUCE_SMIN: + case TargetOpcode::G_VECREDUCE_UMAX: + case TargetOpcode::G_VECREDUCE_UMIN: { + LLT DstTy = MRI->getType(MI->getOperand(0).getReg()); + LLT SrcTy = MRI->getType(MI->getOperand(1).getReg()); + if (!DstTy.isScalar()) + report("Vector reduction requires a scalar destination type", MI); + if (!SrcTy.isVector()) + report("Vector reduction requires vector source=", MI); + break; + } default: break; } diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalizer-info-validation.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalizer-info-validation.mir --- a/llvm/test/CodeGen/AArch64/GlobalISel/legalizer-info-validation.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalizer-info-validation.mir @@ -613,6 +613,51 @@ # DEBUG-NEXT: .. opcode {{[0-9]+}} is aliased to {{[0-9]+}} # DEBUG-NEXT: .. type index coverage check SKIPPED: user-defined predicate detected # DEBUG-NEXT: .. imm index coverage check SKIPPED: user-defined predicate detected +# DEBUG-NEXT: G_VECREDUCE_SEQ_FADD (opcode {{[0-9]+}}): 3 type indices, 0 imm indices +# DEBUG-NEXT: .. type index coverage check SKIPPED: no rules defined +# DEBUG-NEXT: .. imm index coverage check SKIPPED: no rules defined +# DEBUG-NEXT: G_VECREDUCE_SEQ_FMUL (opcode {{[0-9]+}}): 3 type indices, 0 imm indices +# DEBUG-NEXT: .. type index coverage check SKIPPED: no rules defined +# DEBUG-NEXT: .. imm index coverage check SKIPPED: no rules defined +# DEBUG-NEXT: G_VECREDUCE_FADD (opcode {{[0-9]+}}): 3 type indices, 0 imm indices +# DEBUG-NEXT: .. type index coverage check SKIPPED: no rules defined +# DEBUG-NEXT: .. imm index coverage check SKIPPED: no rules defined +# DEBUG-NEXT: G_VECREDUCE_FMUL (opcode {{[0-9]+}}): 3 type indices, 0 imm indices +# DEBUG-NEXT: .. type index coverage check SKIPPED: no rules defined +# DEBUG-NEXT: .. imm index coverage check SKIPPED: no rules defined +# DEBUG-NEXT: G_VECREDUCE_FMAX (opcode {{[0-9]+}}): 2 type indices, 0 imm indices +# DEBUG-NEXT: .. type index coverage check SKIPPED: no rules defined +# DEBUG-NEXT: .. imm index coverage check SKIPPED: no rules defined +# DEBUG-NEXT: G_VECREDUCE_FMIN (opcode {{[0-9]+}}): 2 type indices, 0 imm indices +# DEBUG-NEXT: .. type index coverage check SKIPPED: no rules defined +# DEBUG-NEXT: .. imm index coverage check SKIPPED: no rules defined +# DEBUG-NEXT: G_VECREDUCE_ADD (opcode {{[0-9]+}}): 2 type indices, 0 imm indices +# DEBUG-NEXT: .. type index coverage check SKIPPED: no rules defined +# DEBUG-NEXT: .. imm index coverage check SKIPPED: no rules defined +# DEBUG-NEXT: G_VECREDUCE_MUL (opcode {{[0-9]+}}): 2 type indices, 0 imm indices +# DEBUG-NEXT: .. type index coverage check SKIPPED: no rules defined +# DEBUG-NEXT: .. imm index coverage check SKIPPED: no rules defined +# DEBUG-NEXT: G_VECREDUCE_AND (opcode {{[0-9]+}}): 2 type indices, 0 imm indices +# DEBUG-NEXT: .. type index coverage check SKIPPED: no rules defined +# DEBUG-NEXT: .. imm index coverage check SKIPPED: no rules defined +# DEBUG-NEXT: G_VECREDUCE_OR (opcode {{[0-9]+}}): 2 type indices, 0 imm indices +# DEBUG-NEXT: .. type index coverage check SKIPPED: no rules defined +# DEBUG-NEXT: .. imm index coverage check SKIPPED: no rules defined +# DEBUG-NEXT: G_VECREDUCE_XOR (opcode {{[0-9]+}}): 2 type indices, 0 imm indices +# DEBUG-NEXT: .. type index coverage check SKIPPED: no rules defined +# DEBUG-NEXT: .. imm index coverage check SKIPPED: no rules defined +# DEBUG-NEXT: G_VECREDUCE_SMAX (opcode {{[0-9]+}}): 2 type indices, 0 imm indices +# DEBUG-NEXT: .. type index coverage check SKIPPED: no rules defined +# DEBUG-NEXT: .. imm index coverage check SKIPPED: no rules defined +# DEBUG-NEXT: G_VECREDUCE_SMIN (opcode {{[0-9]+}}): 2 type indices, 0 imm indices +# DEBUG-NEXT: .. type index coverage check SKIPPED: no rules defined +# DEBUG-NEXT: .. imm index coverage check SKIPPED: no rules defined +# DEBUG-NEXT: G_VECREDUCE_UMAX (opcode {{[0-9]+}}): 2 type indices, 0 imm indices +# DEBUG-NEXT: .. type index coverage check SKIPPED: no rules defined +# DEBUG-NEXT: .. imm index coverage check SKIPPED: no rules defined +# DEBUG-NEXT: G_VECREDUCE_UMIN (opcode {{[0-9]+}}): 2 type indices, 0 imm indices +# DEBUG-NEXT: .. type index coverage check SKIPPED: no rules defined +# DEBUG-NEXT: .. imm index coverage check SKIPPED: no rules defined # CHECK-NOT: ill-defined diff --git a/llvm/test/MachineVerifier/test_vector_reductions.mir b/llvm/test/MachineVerifier/test_vector_reductions.mir new file mode 100644 --- /dev/null +++ b/llvm/test/MachineVerifier/test_vector_reductions.mir @@ -0,0 +1,35 @@ +# RUN: not --crash llc -o - -global-isel -run-pass=none -verify-machineinstrs %s 2>&1 | FileCheck %s +# REQUIRES: aarch64-registered-target +--- | + target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128" + target triple = "aarch64-unknown-unknown" + + define i32 @vector_reductions() { + ret i32 0 + } + +... +--- +name: vector_reductions +legalized: true +regBankSelected: false +selected: false +tracksRegLiveness: true +body: | + bb.0: + + %vec_v2s64:_(<2 x s64>) = IMPLICIT_DEF + %scalar_s64:_(s64) = IMPLICIT_DEF + + %seq_fadd:_(<2 x s64>) = G_VECREDUCE_SEQ_FADD %scalar_s64, %vec_v2s64 + ; CHECK: Bad machine code: Vector reduction requires a scalar destination type + + %dst:_(s64) = G_VECREDUCE_SEQ_FADD %vec_v2s64, %vec_v2s64 + ; CHECK: Bad machine code: FADD/FMUL vector reduction requires a scalar 1st operand + + %dst:_(s64) = G_VECREDUCE_SEQ_FADD %scalar_s64, %scalar_s64 + ; CHECK: Bad machine code: FADD/FMUL vector reduction must have a vector 2nd operand + + %dst2:_(s64) = G_VECREDUCE_MUL %scalar_s64 + ; CHECK: Bad machine code: Vector reduction requires vector source +...