Index: llvm/include/llvm/CodeGen/GlobalISel/InstructionSelector.h =================================================================== --- llvm/include/llvm/CodeGen/GlobalISel/InstructionSelector.h +++ llvm/include/llvm/CodeGen/GlobalISel/InstructionSelector.h @@ -164,6 +164,15 @@ GIM_CheckMemorySizeEqualToLLT, GIM_CheckMemorySizeLessThanLLT, GIM_CheckMemorySizeGreaterThanLLT, + + /// Check if this is a vector that can be treated as a vector splat + /// constant. This is valid for both G_BUILD_VECTOR as well as + /// G_BUILD_VECTOR_TRUNC. For AllOnes refers to individual bits, so a -1 + /// element. + /// - InsnID - Instruction ID + GIM_CheckIsBuildVectorAllOnes, + GIM_CheckIsBuildVectorAllZeros, + /// Check a generic C++ instruction predicate /// - InsnID - Instruction ID /// - PredicateID - The ID of the predicate function to call Index: llvm/include/llvm/CodeGen/GlobalISel/InstructionSelectorImpl.h =================================================================== --- llvm/include/llvm/CodeGen/GlobalISel/InstructionSelectorImpl.h +++ llvm/include/llvm/CodeGen/GlobalISel/InstructionSelectorImpl.h @@ -321,6 +321,35 @@ return false; break; } + case GIM_CheckIsBuildVectorAllOnes: + case GIM_CheckIsBuildVectorAllZeros: { + int64_t InsnID = MatchTable[CurrentIdx++]; + + DEBUG_WITH_TYPE(TgtInstructionSelector::getName(), + dbgs() << CurrentIdx + << ": GIM_CheckBuildVectorAll{Zeros|Ones}(MIs[" + << InsnID << "])\n"); + assert(State.MIs[InsnID] != nullptr && "Used insn before defined"); + + const MachineInstr *MI = State.MIs[InsnID]; + assert((MI->getOpcode() == TargetOpcode::G_BUILD_VECTOR || + MI->getOpcode() == TargetOpcode::G_BUILD_VECTOR_TRUNC) && + "Expected G_BUILD_VECTOR or G_BUILD_VECTOR_TRUNC"); + + if (MatcherOpcode == GIM_CheckIsBuildVectorAllOnes) { + if (!isBuildVectorAllOnes(*MI, MRI)) { + if (handleReject() == RejectAndGiveUp) + return false; + } + } else { + if (!isBuildVectorAllZeros(*MI, MRI)) { + if (handleReject() == RejectAndGiveUp) + return false; + } + } + + break; + } case GIM_CheckCxxInsnPredicate: { int64_t InsnID = MatchTable[CurrentIdx++]; int64_t Predicate = MatchTable[CurrentIdx++]; Index: llvm/include/llvm/CodeGen/GlobalISel/Utils.h =================================================================== --- llvm/include/llvm/CodeGen/GlobalISel/Utils.h +++ llvm/include/llvm/CodeGen/GlobalISel/Utils.h @@ -217,5 +217,15 @@ /// If \p MI is not a splat, returns None. Optional getSplatIndex(MachineInstr &MI); +/// Return true if the specified instruction is a G_BUILD_VECTOR or +/// G_BUILD_VECTOR_TRUNC where all of the elements are 0 or undef. +bool isBuildVectorAllZeros(const MachineInstr &MI, + const MachineRegisterInfo &MRI); + +/// Return true if the specified instruction is a G_BUILD_VECTOR or +/// G_BUILD_VECTOR_TRUNC where all of the elements are ~0 or undef. +bool isBuildVectorAllOnes(const MachineInstr &MI, + const MachineRegisterInfo &MRI); + } // End namespace llvm. #endif Index: llvm/lib/CodeGen/GlobalISel/Utils.cpp =================================================================== --- llvm/lib/CodeGen/GlobalISel/Utils.cpp +++ llvm/lib/CodeGen/GlobalISel/Utils.cpp @@ -13,6 +13,7 @@ #include "llvm/ADT/APFloat.h" #include "llvm/ADT/Twine.h" #include "llvm/CodeGen/GlobalISel/GISelChangeObserver.h" +#include "llvm/CodeGen/GlobalISel/MIPatternMatch.h" #include "llvm/CodeGen/GlobalISel/RegisterBankInfo.h" #include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineInstrBuilder.h" @@ -27,6 +28,7 @@ #define DEBUG_TYPE "globalisel-utils" using namespace llvm; +using namespace MIPatternMatch; Register llvm::constrainRegToClass(MachineRegisterInfo &MRI, const TargetInstrInfo &TII, @@ -625,3 +627,37 @@ return SplatValue; } + +static bool isBuildVectorOp(unsigned Opcode) { + return Opcode == TargetOpcode::G_BUILD_VECTOR || + Opcode == TargetOpcode::G_BUILD_VECTOR_TRUNC; +} + +// TODO: Handle mixed undef elements. +static bool isBuildVectorConstantSplat(const MachineInstr &MI, + const MachineRegisterInfo &MRI, + int64_t SplatValue) { + if (!isBuildVectorOp(MI.getOpcode())) + return false; + + const unsigned NumOps = MI.getNumOperands(); + for (unsigned I = 1; I != NumOps; ++I) { + Register Element = MI.getOperand(I).getReg(); + int64_t ElementValue; + if (!mi_match(Element, MRI, m_ICst(ElementValue)) || + ElementValue != SplatValue) + return false; + } + + return true; +} + +bool llvm::isBuildVectorAllZeros(const MachineInstr &MI, + const MachineRegisterInfo &MRI) { + return isBuildVectorConstantSplat(MI, MRI, 0); +} + +bool llvm::isBuildVectorAllOnes(const MachineInstr &MI, + const MachineRegisterInfo &MRI) { + return isBuildVectorConstantSplat(MI, MRI, -1); +} Index: llvm/test/CodeGen/AArch64/arm64-vabs.ll =================================================================== --- llvm/test/CodeGen/AArch64/arm64-vabs.ll +++ llvm/test/CodeGen/AArch64/arm64-vabs.ll @@ -956,8 +956,8 @@ ; DAG: abs.2s ; DAG-NEXT: ret +; GISEL: neg.2s ; GISEL: cmge.2s -; GISEL: sub.2s ; GISEL: fcsel ; GISEL: fcsel %tmp1neg = sub <2 x i32> zeroinitializer, %a @@ -1038,8 +1038,8 @@ ; DAG: abs.2d ; DAG-NEXT: ret +; GISEL: neg.2d ; GISEL: cmge.2d -; GISEL: sub.2d ; GISEL: fcsel ; GISEL: fcsel %tmp1neg = sub <2 x i64> zeroinitializer, %a Index: llvm/test/CodeGen/AMDGPU/GlobalISel/andn2.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/GlobalISel/andn2.ll +++ llvm/test/CodeGen/AMDGPU/GlobalISel/andn2.ll @@ -178,10 +178,7 @@ define amdgpu_ps <2 x i32> @s_andn2_v2i32(<2 x i32> inreg %src0, <2 x i32> inreg %src1) { ; GCN-LABEL: s_andn2_v2i32: ; GCN: ; %bb.0: -; GCN-NEXT: s_mov_b32 s0, -1 -; GCN-NEXT: s_mov_b32 s1, s0 -; GCN-NEXT: s_xor_b64 s[0:1], s[4:5], s[0:1] -; GCN-NEXT: s_and_b64 s[0:1], s[2:3], s[0:1] +; GCN-NEXT: s_andn2_b64 s[0:1], s[2:3], s[4:5] ; GCN-NEXT: ; return to shader part epilog %not.src1 = xor <2 x i32> %src1, %and = and <2 x i32> %src0, %not.src1 @@ -191,10 +188,7 @@ define amdgpu_ps <2 x i32> @s_andn2_v2i32_commute(<2 x i32> inreg %src0, <2 x i32> inreg %src1) { ; GCN-LABEL: s_andn2_v2i32_commute: ; GCN: ; %bb.0: -; GCN-NEXT: s_mov_b32 s0, -1 -; GCN-NEXT: s_mov_b32 s1, s0 -; GCN-NEXT: s_xor_b64 s[0:1], s[4:5], s[0:1] -; GCN-NEXT: s_and_b64 s[0:1], s[0:1], s[2:3] +; GCN-NEXT: s_andn2_b64 s[0:1], s[2:3], s[4:5] ; GCN-NEXT: ; return to shader part epilog %not.src1 = xor <2 x i32> %src1, %and = and <2 x i32> %not.src1, %src0 Index: llvm/test/CodeGen/AMDGPU/GlobalISel/orn2.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/GlobalISel/orn2.ll +++ llvm/test/CodeGen/AMDGPU/GlobalISel/orn2.ll @@ -178,10 +178,7 @@ define amdgpu_ps <2 x i32> @s_orn2_v2i32(<2 x i32> inreg %src0, <2 x i32> inreg %src1) { ; GCN-LABEL: s_orn2_v2i32: ; GCN: ; %bb.0: -; GCN-NEXT: s_mov_b32 s0, -1 -; GCN-NEXT: s_mov_b32 s1, s0 -; GCN-NEXT: s_xor_b64 s[0:1], s[4:5], s[0:1] -; GCN-NEXT: s_or_b64 s[0:1], s[2:3], s[0:1] +; GCN-NEXT: s_orn2_b64 s[0:1], s[2:3], s[4:5] ; GCN-NEXT: ; return to shader part epilog %not.src1 = xor <2 x i32> %src1, %or = or <2 x i32> %src0, %not.src1 @@ -191,10 +188,7 @@ define amdgpu_ps <2 x i32> @s_orn2_v2i32_commute(<2 x i32> inreg %src0, <2 x i32> inreg %src1) { ; GCN-LABEL: s_orn2_v2i32_commute: ; GCN: ; %bb.0: -; GCN-NEXT: s_mov_b32 s0, -1 -; GCN-NEXT: s_mov_b32 s1, s0 -; GCN-NEXT: s_xor_b64 s[0:1], s[4:5], s[0:1] -; GCN-NEXT: s_or_b64 s[0:1], s[0:1], s[2:3] +; GCN-NEXT: s_orn2_b64 s[0:1], s[2:3], s[4:5] ; GCN-NEXT: ; return to shader part epilog %not.src1 = xor <2 x i32> %src1, %or = or <2 x i32> %not.src1, %src0 Index: llvm/test/TableGen/Common/GlobalISelEmitterCommon.td =================================================================== --- llvm/test/TableGen/Common/GlobalISelEmitterCommon.td +++ llvm/test/TableGen/Common/GlobalISelEmitterCommon.td @@ -15,6 +15,10 @@ def B0 : Register<"b0"> { let Namespace = "MyTarget"; } def GPR8 : RegisterClass<"MyTarget", [i8], 8, (add B0)>; + +def V0 : Register<"v0"> { let Namespace = "MyTarget"; } +def VecReg128 : RegisterClass<"MyTarget", [v4i32], 128, (add V0)>; + def p0 : PtrValueType ; class I Pat> Index: llvm/test/TableGen/GlobalISelEmitter-immAllZeroOne.td =================================================================== --- /dev/null +++ llvm/test/TableGen/GlobalISelEmitter-immAllZeroOne.td @@ -0,0 +1,45 @@ +// RUN: llvm-tblgen -gen-global-isel -warn-on-skipped-patterns -optimize-match-table=false -I %p/../../include -I %p/Common %s -o - | FileCheck -check-prefixes=GISEL-NOOPT,GISEL %s +// RUN: llvm-tblgen -gen-global-isel -warn-on-skipped-patterns -optimize-match-table=true -I %p/../../include -I %p/Common %s -o - | FileCheck -check-prefixes=GISEL-OPT,GISEL %s + +include "llvm/Target/Target.td" +include "GlobalISelEmitterCommon.td" + + +// GISEL-OPT: GIM_CheckType, /*MI*/0, /*Op*/2, /*Type*/GILLT_v4s16, +// GISEL-OPT: GIM_CheckOpcode, /*MI*/1, TargetOpcode::G_BUILD_VECTOR, +// GISEL-OPT: GIM_CheckIsBuildVectorAllZeros, /*MI*/1, + +// GISEL-OPT: GIM_CheckType, /*MI*/0, /*Op*/2, /*Type*/GILLT_v4s16, +// GISEL-OPT: GIM_CheckOpcode, /*MI*/1, TargetOpcode::G_BUILD_VECTOR, +// GISEL-OPT: GIM_CheckIsBuildVectorAllOnes, /*MI*/1, + + + +// GISEL-NOOPT: GIM_CheckOpcode, /*MI*/0, TargetOpcode::G_LSHR, +// GISEL-NOOPT: // MIs[0] Operand 2 +// GISEL-NOOPT-NEXT: GIM_CheckType, /*MI*/0, /*Op*/2, /*Type*/GILLT_v4s16, +// GISEL-NOOPT-NEXT: GIM_RecordInsn, /*DefineMI*/1, /*MI*/0, /*OpIdx*/2, // MIs[1] +// GISEL-NOOPT-NEXT: GIM_CheckOpcode, /*MI*/1, TargetOpcode::G_BUILD_VECTOR, +// GISEL-NOOPT-NEXT: GIM_CheckIsBuildVectorAllOnes, /*MI*/1, +// GISEL-NOOPT-NEXT: // MIs[1] Operand 0 +// GISEL-NOOPT-NEXT: GIM_CheckType, /*MI*/1, /*Op*/0, /*Type*/GILLT_v4s16, +// GISEL-NOOPT-NEXT: GIM_CheckIsSafeToFold, /*InsnID*/1, +// GISEL-NOOPT-NEXT: // (srl:{ *:[v4i32] } v4i32:{ *:[v4i32] }:$src0, immAllOnesV:{ *:[v4i16] }) => (VFOOONES:{ *:[v4i32] } v4i32:{ *:[v4i32] }:$src0) +def VFOOONES : I<(outs VecReg128:$dst), (ins VecReg128:$src0), + [(set v4i32:$dst, (srl v4i32:$src0, (v4i16 immAllOnesV)))] +>; + + +// GISEL-NOOPT: GIM_CheckOpcode, /*MI*/0, TargetOpcode::G_SHL, +// GISEL-NOOPT: // MIs[0] Operand 2 +// GISEL-NOOPT-NEXT: GIM_CheckType, /*MI*/0, /*Op*/2, /*Type*/GILLT_v4s16, +// GISEL-NOOPT-NEXT: GIM_RecordInsn, /*DefineMI*/1, /*MI*/0, /*OpIdx*/2, // MIs[1] +// GISEL-NOOPT-NEXT: GIM_CheckOpcode, /*MI*/1, TargetOpcode::G_BUILD_VECTOR, +// GISEL-NOOPT-NEXT: GIM_CheckIsBuildVectorAllZeros, /*MI*/1, +// GISEL-NOOPT-NEXT: // MIs[1] Operand 0 +// GISEL-NOOPT-NEXT: GIM_CheckType, /*MI*/1, /*Op*/0, /*Type*/GILLT_v4s16, +// GISEL-NOOPT-NEXT: GIM_CheckIsSafeToFold, /*InsnID*/1, +// GISEL-NOOPT-NEXT: // (shl:{ *:[v4i32] } v4i32:{ *:[v4i32] }:$src0, immAllZerosV:{ *:[v4i16] }) => (VFOOZERO:{ *:[v4i32] } v4i32:{ *:[v4i32] }:$src0) +def VFOOZERO : I<(outs VecReg128:$dst), (ins VecReg128:$src0), + [(set v4i32:$dst, (shl v4i32:$src0, (v4i16 immAllZerosV)))] +>; Index: llvm/utils/TableGen/GlobalISelEmitter.cpp =================================================================== --- llvm/utils/TableGen/GlobalISelEmitter.cpp +++ llvm/utils/TableGen/GlobalISelEmitter.cpp @@ -1089,6 +1089,7 @@ IPM_MemoryVsLLTSize, IPM_MemoryAddressSpace, IPM_MemoryAlignment, + IPM_VectorSplatImm, IPM_GenericPredicate, OPM_SameOperand, OPM_ComplexPattern, @@ -2021,6 +2022,42 @@ } }; +// Matcher for immAllOnesV/immAllZerosV +class VectorSplatImmPredicateMatcher : public InstructionPredicateMatcher { +public: + enum SplatKind { + AllZeros, + AllOnes + }; + +private: + SplatKind Kind; + +public: + VectorSplatImmPredicateMatcher(unsigned InsnVarID, SplatKind K) + : InstructionPredicateMatcher(IPM_VectorSplatImm, InsnVarID), Kind(K) {} + + static bool classof(const PredicateMatcher *P) { + return P->getKind() == IPM_VectorSplatImm; + } + + bool isIdentical(const PredicateMatcher &B) const override { + return InstructionPredicateMatcher::isIdentical(B) && + Kind == static_cast(B).Kind; + } + + void emitPredicateOpcodes(MatchTable &Table, + RuleMatcher &Rule) const override { + if (Kind == AllOnes) + Table << MatchTable::Opcode("GIM_CheckIsBuildVectorAllOnes"); + else + Table << MatchTable::Opcode("GIM_CheckIsBuildVectorAllZeros"); + + Table << MatchTable::Comment("MI") << MatchTable::IntValue(InsnVarID); + Table << MatchTable::LineBreak; + } +}; + /// Generates code to check an arbitrary C++ instruction predicate. class GenericInstructionPredicateMatcher : public InstructionPredicateMatcher { protected: @@ -2077,8 +2114,9 @@ SmallVector, 2> PhysRegInputs; public: - InstructionMatcher(RuleMatcher &Rule, StringRef SymbolicName) - : Rule(Rule), SymbolicName(SymbolicName) { + InstructionMatcher(RuleMatcher &Rule, StringRef SymbolicName, + bool NumOpsCheck = true) + : Rule(Rule), NumOperandsCheck(NumOpsCheck), SymbolicName(SymbolicName) { // We create a new instruction matcher. // Get a new ID for that instruction. InsnVarID = Rule.implicitlyDefineInsnVar(*this); @@ -2267,9 +2305,10 @@ public: InstructionOperandMatcher(unsigned InsnVarID, unsigned OpIdx, - RuleMatcher &Rule, StringRef SymbolicName) + RuleMatcher &Rule, StringRef SymbolicName, + bool NumOpsCheck = true) : OperandPredicateMatcher(OPM_Instruction, InsnVarID, OpIdx), - InsnMatcher(new InstructionMatcher(Rule, SymbolicName)) {} + InsnMatcher(new InstructionMatcher(Rule, SymbolicName, NumOpsCheck)) {} static bool classof(const PredicateMatcher *P) { return P->getKind() == OPM_Instruction; @@ -4054,6 +4093,32 @@ if (ChildRec->getName() == "srcvalue") return Error::success(); + const bool ImmAllOnesV = ChildRec->getName() == "immAllOnesV"; + if (ImmAllOnesV || ChildRec->getName() == "immAllZerosV") { + auto MaybeInsnOperand = OM.addPredicate( + InsnMatcher.getRuleMatcher(), SrcChild->getName(), false); + InstructionOperandMatcher &InsnOperand = **MaybeInsnOperand; + + ValueTypeByHwMode VTy = ChildTypes.front().getValueTypeByHwMode(); + InsnOperand.getInsnMatcher().addPredicate( + &Target.getInstruction(RK.getDef("G_BUILD_VECTOR"))); + + // TODO: Handle both G_BUILD_VECTOR and G_BUILD_VECTOR_TRUNC We could + // theoretically not emit any opcode check, but getOpcodeMatcher currently + // has to succeed. + OperandMatcher &OM = + InsnOperand.getInsnMatcher().addOperand(0, "", TempOpIdx); + if (auto Error = + OM.addTypeCheckPredicate(VTy, false /* OperandIsAPointer */)) + return failedImport(toString(std::move(Error)) + + " for result of Src pattern operator"); + + InsnOperand.getInsnMatcher().addPredicate( + ImmAllOnesV ? VectorSplatImmPredicateMatcher::AllOnes + : VectorSplatImmPredicateMatcher::AllZeros); + return Error::success(); + } + return failedImport( "Src pattern child def is an unsupported tablegen class"); }