diff --git a/llvm/include/llvm/CodeGen/GlobalISel/Utils.h b/llvm/include/llvm/CodeGen/GlobalISel/Utils.h --- a/llvm/include/llvm/CodeGen/GlobalISel/Utils.h +++ b/llvm/include/llvm/CodeGen/GlobalISel/Utils.h @@ -260,6 +260,31 @@ LLVM_READNONE LLT getGCDType(LLT OrigTy, LLT TargetTy); +/// Represents a value which can be a Register or a constant. +/// +/// This is useful in situations where an instruction may have an interesting +/// register operand or interesting constant operand. For a concrete example, +/// \see getVectorSplat. +class RegOrConstant { + int64_t Cst; + Register Reg; + bool IsReg; + +public: + explicit RegOrConstant(Register Reg) : Reg(Reg), IsReg(true) {} + explicit RegOrConstant(int64_t Cst) : Cst(Cst), IsReg(false) {} + bool isReg() const { return IsReg; } + bool isCst() const { return !IsReg; } + Register getReg() const { + assert(isReg() && "Expected a register!"); + return Reg; + } + int64_t getCst() const { + assert(isCst() && "Expected a constant!"); + return Cst; + } +}; + /// \returns The splat index of a G_SHUFFLE_VECTOR \p MI when \p MI is a splat. /// If \p MI is not a splat, returns None. Optional getSplatIndex(MachineInstr &MI); @@ -278,6 +303,28 @@ bool isBuildVectorAllOnes(const MachineInstr &MI, const MachineRegisterInfo &MRI); +/// \returns a value when \p MI is a vector splat. The splat can be either a +/// Register or a constant. +/// +/// Examples: +/// +/// \code +/// %reg = COPY $physreg +/// %reg_splat = G_BUILD_VECTOR %reg, %reg, ..., %reg +/// \endcode +/// +/// If called on the G_BUILD_VECTOR above, this will return a RegOrConstant +/// containing %reg. +/// +/// \code +/// %cst = G_CONSTANT iN 4 +/// %constant_splat = G_BUILD_VECTOR %cst, %cst, ..., %cst +/// \endcode +/// +/// In the above case, this will return a RegOrConstant containing 4. +Optional getVectorSplat(const MachineInstr &MI, + const MachineRegisterInfo &MRI); + /// Returns true if given the TargetLowering's boolean contents information, /// the value \p Val contains a true value. bool isConstTrueVal(const TargetLowering &TLI, int64_t Val, bool IsVector, diff --git a/llvm/lib/CodeGen/GlobalISel/Utils.cpp b/llvm/lib/CodeGen/GlobalISel/Utils.cpp --- a/llvm/lib/CodeGen/GlobalISel/Utils.cpp +++ b/llvm/lib/CodeGen/GlobalISel/Utils.cpp @@ -828,6 +828,20 @@ return isBuildVectorConstantSplat(MI, MRI, -1); } +Optional llvm::getVectorSplat(const MachineInstr &MI, + const MachineRegisterInfo &MRI) { + unsigned Opc = MI.getOpcode(); + if (!isBuildVectorOp(Opc)) + return None; + if (auto Splat = getBuildVectorConstantSplat(MI, MRI)) + return RegOrConstant(*Splat); + auto Reg = MI.getOperand(1).getReg(); + if (any_of(make_range(MI.operands_begin() + 2, MI.operands_end()), + [&Reg](const MachineOperand &Op) { return Op.getReg() != Reg; })) + return None; + return RegOrConstant(Reg); +} + bool llvm::isConstTrueVal(const TargetLowering &TLI, int64_t Val, bool IsVector, bool IsFP) { switch (TLI.getBooleanContents(IsVector, IsFP)) { diff --git a/llvm/lib/Target/AArch64/AArch64Combine.td b/llvm/lib/Target/AArch64/AArch64Combine.td --- a/llvm/lib/Target/AArch64/AArch64Combine.td +++ b/llvm/lib/Target/AArch64/AArch64Combine.td @@ -135,13 +135,22 @@ (apply [{ applyAArch64MulConstCombine(*${root}, MRI, B, ${matchinfo}); }]) >; +def build_vector_to_dup : GICombineRule< + (defs root:$root), + (match (wip_match_opcode G_BUILD_VECTOR):$root, + [{ return matchBuildVectorToDup(*${root}, MRI); }]), + (apply [{ return applyBuildVectorToDup(*${root}, MRI, B); }]) +>; + +def build_vector_lowering : GICombineGroup<[build_vector_to_dup]>; + // Post-legalization combines which should happen at all optimization levels. // (E.g. ones that facilitate matching for the selector) For example, matching // pseudos. def AArch64PostLegalizerLoweringHelper : GICombinerHelper<"AArch64GenPostLegalizerLoweringHelper", [shuffle_vector_lowering, vashr_vlshr_imm, - icmp_lowering]> { + icmp_lowering, build_vector_lowering]> { let DisableRuleOption = "aarch64postlegalizerlowering-disable-rule"; } diff --git a/llvm/lib/Target/AArch64/CMakeLists.txt b/llvm/lib/Target/AArch64/CMakeLists.txt --- a/llvm/lib/Target/AArch64/CMakeLists.txt +++ b/llvm/lib/Target/AArch64/CMakeLists.txt @@ -29,6 +29,7 @@ add_llvm_target(AArch64CodeGen GISel/AArch64CallLowering.cpp + GISel/AArch64GlobalISelUtils.cpp GISel/AArch64InstructionSelector.cpp GISel/AArch64LegalizerInfo.cpp GISel/AArch64PreLegalizerCombiner.cpp diff --git a/llvm/lib/Target/AArch64/GISel/AArch64GlobalISelUtils.h b/llvm/lib/Target/AArch64/GISel/AArch64GlobalISelUtils.h --- a/llvm/lib/Target/AArch64/GISel/AArch64GlobalISelUtils.h +++ b/llvm/lib/Target/AArch64/GISel/AArch64GlobalISelUtils.h @@ -12,6 +12,9 @@ #ifndef LLVM_LIB_TARGET_AARCH64_GISEL_AARCH64GLOBALISELUTILS_H #define LLVM_LIB_TARGET_AARCH64_GISEL_AARCH64GLOBALISELUTILS_H +#include "llvm/ADT/Optional.h" +#include "llvm/CodeGen/GlobalISel/Utils.h" +#include "llvm/CodeGen/Register.h" #include namespace llvm { @@ -23,6 +26,16 @@ return (C >> 12 == 0) || ((C & 0xFFFULL) == 0 && C >> 24 == 0); } +/// \returns A value when \p MI is a vector splat of a Register or constant. +/// Checks for generic opcodes and AArch64-specific generic opcodes. +Optional getAArch64VectorSplat(const MachineInstr &MI, + const MachineRegisterInfo &MRI); + +/// \returns A value when \p MI is a constant vector splat. +/// Checks for generic opcodes and AArch64-specific generic opcodes. +Optional getAArch64VectorSplatScalar(const MachineInstr &MI, + const MachineRegisterInfo &MRI); + } // namespace AArch64GISelUtils } // namespace llvm diff --git a/llvm/lib/Target/AArch64/GISel/AArch64GlobalISelUtils.cpp b/llvm/lib/Target/AArch64/GISel/AArch64GlobalISelUtils.cpp new file mode 100644 --- /dev/null +++ b/llvm/lib/Target/AArch64/GISel/AArch64GlobalISelUtils.cpp @@ -0,0 +1,38 @@ +//===- AArch64GlobalISelUtils.cpp --------------------------------*- C++ -*-==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// \file Implementations of AArch64-specific helper functions used in the +/// GlobalISel pipeline. +//===----------------------------------------------------------------------===// +#include "AArch64GlobalISelUtils.h" +#include "AArch64InstrInfo.h" +#include "llvm/CodeGen/GlobalISel/Utils.h" +#include "llvm/Support/raw_ostream.h" + +using namespace llvm; + +Optional +AArch64GISelUtils::getAArch64VectorSplat(const MachineInstr &MI, + const MachineRegisterInfo &MRI) { + if (auto Splat = getVectorSplat(MI, MRI)) + return Splat; + if (MI.getOpcode() != AArch64::G_DUP) + return None; + Register Src = MI.getOperand(1).getReg(); + if (auto ValAndVReg = + getConstantVRegValWithLookThrough(MI.getOperand(1).getReg(), MRI)) + return RegOrConstant(ValAndVReg->Value.getSExtValue()); + return RegOrConstant(Src); +} + +Optional AArch64GISelUtils::getAArch64VectorSplatScalar( + const MachineInstr &MI, const MachineRegisterInfo &MRI) { + auto Splat = getAArch64VectorSplat(MI, MRI); + if (!Splat || Splat->isReg()) + return None; + return Splat->getCst(); +} diff --git a/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp b/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp --- a/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp +++ b/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp @@ -11,6 +11,7 @@ /// \todo This should be generated by TableGen. //===----------------------------------------------------------------------===// +#include "AArch64GlobalISelUtils.h" #include "AArch64InstrInfo.h" #include "AArch64MachineFunctionInfo.h" #include "AArch64RegisterBankInfo.h" @@ -24,7 +25,6 @@ #include "llvm/CodeGen/GlobalISel/InstructionSelectorImpl.h" #include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h" #include "llvm/CodeGen/GlobalISel/MIPatternMatch.h" -#include "llvm/CodeGen/GlobalISel/Utils.h" #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineConstantPool.h" #include "llvm/CodeGen/MachineFunction.h" @@ -34,6 +34,7 @@ #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/TargetOpcodes.h" #include "llvm/IR/Constants.h" +#include "llvm/IR/DerivedTypes.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/PatternMatch.h" #include "llvm/IR/Type.h" @@ -46,6 +47,7 @@ using namespace llvm; using namespace MIPatternMatch; +using namespace AArch64GISelUtils; namespace llvm { class BlockFrequencyInfo; @@ -145,6 +147,16 @@ Register EltReg, unsigned LaneIdx, const RegisterBank &RB, MachineIRBuilder &MIRBuilder) const; + + /// Emit a sequence of instructions representing a constant \p CV for a + /// vector register \p Dst. (E.g. a MOV, or a load from a constant pool.) + /// + /// \returns the last instruction in the sequence on success, and nullptr + /// otherwise. + MachineInstr *emitConstantVector(Register Dst, Constant *CV, + MachineIRBuilder &MIRBuilder, + MachineRegisterInfo &MRI) const; + bool selectInsertElt(MachineInstr &I, MachineRegisterInfo &MRI) const; bool tryOptConstantBuildVec(MachineInstr &MI, LLT DstTy, MachineRegisterInfo &MRI) const; @@ -1659,23 +1671,7 @@ assert(MRI.getType(Reg).isVector() && "Expected a *vector* shift operand"); MachineInstr *OpMI = MRI.getVRegDef(Reg); assert(OpMI && "Expected to find a vreg def for vector shift operand"); - if (OpMI->getOpcode() != TargetOpcode::G_BUILD_VECTOR) - return None; - - // Check all operands are identical immediates. - int64_t ImmVal = 0; - for (unsigned Idx = 1; Idx < OpMI->getNumOperands(); ++Idx) { - auto VRegAndVal = getConstantVRegValWithLookThrough(OpMI->getOperand(Idx).getReg(), MRI); - if (!VRegAndVal) - return None; - - if (Idx == 1) - ImmVal = VRegAndVal->Value.getSExtValue(); - if (ImmVal != VRegAndVal->Value.getSExtValue()) - return None; - } - - return ImmVal; + return getAArch64VectorSplatScalar(*OpMI, MRI); } /// Matches and returns the shift immediate value for a SHL instruction given @@ -1963,7 +1959,7 @@ auto NewSrc = MIB.buildCopy(LLT::scalar(64), I.getOperand(1).getReg()); MRI.setType(I.getOperand(0).getReg(), DstTy.changeElementType(LLT::scalar(64))); - MRI.setRegBank(NewSrc.getReg(0), RBI.getRegBank(AArch64::GPRRegBankID)); + MRI.setRegClass(NewSrc.getReg(0), &AArch64::GPR64RegClass); I.getOperand(1).setReg(NewSrc.getReg(0)); return true; } @@ -2125,6 +2121,25 @@ MachineRegisterInfo &MRI = MF.getRegInfo(); switch (I.getOpcode()) { + case AArch64::G_DUP: { + // Before selecting a DUP instruction, check if it is better selected as a + // MOV or load from a constant pool. + Register Src = I.getOperand(1).getReg(); + auto ValAndVReg = getConstantVRegValWithLookThrough(Src, MRI); + if (!ValAndVReg) + return false; + LLVMContext &Ctx = MF.getFunction().getContext(); + Register Dst = I.getOperand(0).getReg(); + auto *CV = ConstantDataVector::getSplat( + MRI.getType(Dst).getNumElements(), + ConstantInt::get(Type::getIntNTy(Ctx, MRI.getType(Src).getSizeInBits()), + ValAndVReg->Value)); + MachineIRBuilder MIRBuilder(I); + if (!emitConstantVector(Dst, CV, MIRBuilder, MRI)) + return false; + I.eraseFromParent(); + return true; + } case TargetOpcode::G_BR: { // If the branch jumps to the fallthrough block, don't bother emitting it. // Only do this for -O0 for a good code size improvement, because when @@ -4811,6 +4826,44 @@ return true; } +MachineInstr * +AArch64InstructionSelector::emitConstantVector(Register Dst, Constant *CV, + MachineIRBuilder &MIRBuilder, + MachineRegisterInfo &MRI) const { + LLT DstTy = MRI.getType(Dst); + unsigned DstSize = DstTy.getSizeInBits(); + if (CV->isNullValue()) { + if (DstSize == 128) { + auto Mov = + MIRBuilder.buildInstr(AArch64::MOVIv2d_ns, {Dst}, {}).addImm(0); + constrainSelectedInstRegOperands(*Mov, TII, TRI, RBI); + return &*Mov; + } + + if (DstSize == 64) { + auto Mov = + MIRBuilder + .buildInstr(AArch64::MOVIv2d_ns, {&AArch64::FPR128RegClass}, {}) + .addImm(0); + auto Copy = MIRBuilder.buildInstr(TargetOpcode::COPY, {Dst}, {}) + .addReg(Mov.getReg(0), 0, AArch64::dsub); + RBI.constrainGenericRegister(Dst, AArch64::FPR64RegClass, MRI); + return &*Copy; + } + } + + auto *CPLoad = emitLoadFromConstantPool(CV, MIRBuilder); + if (!CPLoad) { + LLVM_DEBUG(dbgs() << "Could not generate cp load for constant vector!"); + return nullptr; + } + + auto Copy = MIRBuilder.buildCopy(Dst, CPLoad->getOperand(0)); + RBI.constrainGenericRegister( + Dst, *MRI.getRegClass(CPLoad->getOperand(0).getReg()), MRI); + return &*Copy; +} + bool AArch64InstructionSelector::tryOptConstantBuildVec( MachineInstr &I, LLT DstTy, MachineRegisterInfo &MRI) const { assert(I.getOpcode() == TargetOpcode::G_BUILD_VECTOR); @@ -4837,33 +4890,8 @@ } Constant *CV = ConstantVector::get(Csts); MachineIRBuilder MIB(I); - if (CV->isNullValue()) { - // Until the importer can support immAllZerosV in pattern leaf nodes, - // select a zero move manually here. - Register DstReg = I.getOperand(0).getReg(); - if (DstSize == 128) { - auto Mov = MIB.buildInstr(AArch64::MOVIv2d_ns, {DstReg}, {}).addImm(0); - I.eraseFromParent(); - return constrainSelectedInstRegOperands(*Mov, TII, TRI, RBI); - } else if (DstSize == 64) { - auto Mov = - MIB.buildInstr(AArch64::MOVIv2d_ns, {&AArch64::FPR128RegClass}, {}) - .addImm(0); - MIB.buildInstr(TargetOpcode::COPY, {DstReg}, {}) - .addReg(Mov.getReg(0), 0, AArch64::dsub); - I.eraseFromParent(); - return RBI.constrainGenericRegister(DstReg, AArch64::FPR64RegClass, MRI); - } - } - auto *CPLoad = emitLoadFromConstantPool(CV, MIB); - if (!CPLoad) { - LLVM_DEBUG(dbgs() << "Could not generate cp load for build_vector"); + if (!emitConstantVector(I.getOperand(0).getReg(), CV, MIB, MRI)) return false; - } - MIB.buildCopy(I.getOperand(0), CPLoad->getOperand(0)); - RBI.constrainGenericRegister(I.getOperand(0).getReg(), - *MRI.getRegClass(CPLoad->getOperand(0).getReg()), - MRI); I.eraseFromParent(); return true; } diff --git a/llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerLowering.cpp b/llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerLowering.cpp --- a/llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerLowering.cpp +++ b/llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerLowering.cpp @@ -471,7 +471,7 @@ int64_t &Cnt) { assert(Ty.isVector() && "vector shift count is not a vector type"); MachineInstr *MI = MRI.getVRegDef(Reg); - auto Cst = getBuildVectorConstantSplat(*MI, MRI); + auto Cst = getAArch64VectorSplatScalar(*MI, MRI); if (!Cst) return false; Cnt = *Cst; @@ -696,6 +696,29 @@ return true; } +static bool matchBuildVectorToDup(MachineInstr &MI, MachineRegisterInfo &MRI) { + assert(MI.getOpcode() == TargetOpcode::G_BUILD_VECTOR); + auto Splat = getAArch64VectorSplat(MI, MRI); + if (!Splat) + return false; + if (Splat->isReg()) + return true; + // Later, during selection, we'll try to match imported patterns using + // immAllOnesV and immAllZerosV. These require G_BUILD_VECTOR. Don't lower + // G_BUILD_VECTORs which could match those patterns. + int64_t Cst = Splat->getCst(); + return (Cst != 0 && Cst != -1); +} + +static bool applyBuildVectorToDup(MachineInstr &MI, MachineRegisterInfo &MRI, + MachineIRBuilder &B) { + B.setInstrAndDebugLoc(MI); + B.buildInstr(AArch64::G_DUP, {MI.getOperand(0).getReg()}, + {MI.getOperand(1).getReg()}); + MI.eraseFromParent(); + return true; +} + #define AARCH64POSTLEGALIZERLOWERINGHELPER_GENCOMBINERHELPER_DEPS #include "AArch64GenPostLegalizeGILowering.inc" #undef AARCH64POSTLEGALIZERLOWERINGHELPER_GENCOMBINERHELPER_DEPS diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/postlegalizer-lowering-build-vector-to-dup.mir b/llvm/test/CodeGen/AArch64/GlobalISel/postlegalizer-lowering-build-vector-to-dup.mir new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/GlobalISel/postlegalizer-lowering-build-vector-to-dup.mir @@ -0,0 +1,181 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -mtriple aarch64 -run-pass=aarch64-postlegalizer-lowering -verify-machineinstrs %s -o - | FileCheck %s --check-prefix=LOWER +# RUN: llc -mtriple aarch64 -O2 -start-before=aarch64-postlegalizer-lowering -stop-after=instruction-select -global-isel -verify-machineinstrs %s -o - | FileCheck %s --check-prefix=SELECT +... +--- +name: same_reg +legalized: true +tracksRegLiveness: true +body: | + bb.0: + liveins: $d0 + ; LOWER-LABEL: name: same_reg + ; LOWER: liveins: $d0 + ; LOWER: %r:_(s8) = G_IMPLICIT_DEF + ; LOWER: %build_vector:_(<8 x s8>) = G_DUP %r(s8) + ; LOWER: $d0 = COPY %build_vector(<8 x s8>) + ; LOWER: RET_ReallyLR implicit $d0 + ; SELECT-LABEL: name: same_reg + ; SELECT: liveins: $d0 + ; SELECT: %r:gpr32 = IMPLICIT_DEF + ; SELECT: %build_vector:fpr64 = DUPv8i8gpr %r + ; SELECT: $d0 = COPY %build_vector + ; SELECT: RET_ReallyLR implicit $d0 + %r:_(s8) = G_IMPLICIT_DEF + %build_vector:_(<8 x s8>) = G_BUILD_VECTOR %r, %r, %r, %r, %r, %r, %r, %r + $d0 = COPY %build_vector(<8 x s8>) + RET_ReallyLR implicit $d0 + +... +--- +name: dont_combine_different_reg +legalized: true +tracksRegLiveness: true +body: | + bb.0: + liveins: $d0, $w0, $w1 + ; LOWER-LABEL: name: dont_combine_different_reg + ; LOWER: liveins: $d0, $w0, $w1 + ; LOWER: %r:_(s32) = COPY $w0 + ; LOWER: %q:_(s32) = COPY $w1 + ; LOWER: %build_vector:_(<2 x s32>) = G_BUILD_VECTOR %r(s32), %q(s32) + ; LOWER: $d0 = COPY %build_vector(<2 x s32>) + ; LOWER: RET_ReallyLR implicit $d0 + ; SELECT-LABEL: name: dont_combine_different_reg + ; SELECT: liveins: $d0, $w0, $w1 + ; SELECT: %r:gpr32all = COPY $w0 + ; SELECT: %q:gpr32 = COPY $w1 + ; SELECT: [[DEF:%[0-9]+]]:fpr128 = IMPLICIT_DEF + ; SELECT: [[INSERT_SUBREG:%[0-9]+]]:fpr128 = INSERT_SUBREG [[DEF]], %r, %subreg.ssub + ; SELECT: [[INSvi32gpr:%[0-9]+]]:fpr128 = INSvi32gpr [[INSERT_SUBREG]], 1, %q + ; SELECT: %build_vector:fpr64 = COPY [[INSvi32gpr]].dsub + ; SELECT: $d0 = COPY %build_vector + ; SELECT: RET_ReallyLR implicit $d0 + %r:_(s32) = COPY $w0 + %q:_(s32) = COPY $w1 + %build_vector:_(<2 x s32>) = G_BUILD_VECTOR %r, %q + $d0 = COPY %build_vector(<2 x s32>) + RET_ReallyLR implicit $d0 + +... +--- +name: dont_combine_zero +legalized: true +tracksRegLiveness: true +body: | + bb.0: + liveins: $d0 + ; Don't combine with 0. We want to avoid blocking immAllZerosV selection + ; patterns. + + ; LOWER-LABEL: name: dont_combine_zero + ; LOWER: liveins: $d0 + ; LOWER: %r:_(s8) = G_CONSTANT i8 0 + ; LOWER: %build_vector:_(<8 x s8>) = G_BUILD_VECTOR %r(s8), %r(s8), %r(s8), %r(s8), %r(s8), %r(s8), %r(s8), %r(s8) + ; LOWER: $d0 = COPY %build_vector(<8 x s8>) + ; LOWER: RET_ReallyLR implicit $d0 + ; SELECT-LABEL: name: dont_combine_zero + ; SELECT: liveins: $d0 + ; SELECT: [[MOVIv2d_ns:%[0-9]+]]:fpr128 = MOVIv2d_ns 0 + ; SELECT: %build_vector:fpr64 = COPY [[MOVIv2d_ns]].dsub + ; SELECT: $d0 = COPY %build_vector + ; SELECT: RET_ReallyLR implicit $d0 + %r:_(s8) = G_CONSTANT i8 0 + %build_vector:_(<8 x s8>) = G_BUILD_VECTOR %r, %r, %r, %r, %r, %r, %r, %r + $d0 = COPY %build_vector(<8 x s8>) + RET_ReallyLR implicit $d0 + +... +--- +name: dont_combine_all_ones +legalized: true +tracksRegLiveness: true +body: | + bb.0: + liveins: $d0 + ; Don't combine with -1. We want to avoid blocking immAllOnesV selection + ; patterns. + + ; LOWER-LABEL: name: dont_combine_all_ones + ; LOWER: liveins: $d0 + ; LOWER: %r:_(s8) = G_CONSTANT i8 -1 + ; LOWER: %build_vector:_(<8 x s8>) = G_BUILD_VECTOR %r(s8), %r(s8), %r(s8), %r(s8), %r(s8), %r(s8), %r(s8), %r(s8) + ; LOWER: $d0 = COPY %build_vector(<8 x s8>) + ; LOWER: RET_ReallyLR implicit $d0 + ; SELECT-LABEL: name: dont_combine_all_ones + ; SELECT: liveins: $d0 + ; SELECT: [[ADRP:%[0-9]+]]:gpr64common = ADRP target-flags(aarch64-page) %const.0 + ; SELECT: [[LDRDui:%[0-9]+]]:fpr64 = LDRDui [[ADRP]], target-flags(aarch64-pageoff, aarch64-nc) %const.0 + ; SELECT: $d0 = COPY [[LDRDui]] + ; SELECT: RET_ReallyLR implicit $d0 + %r:_(s8) = G_CONSTANT i8 -1 + %build_vector:_(<8 x s8>) = G_BUILD_VECTOR %r, %r, %r, %r, %r, %r, %r, %r + $d0 = COPY %build_vector(<8 x s8>) + RET_ReallyLR implicit $d0 + +... +--- +name: all_zeros_pat_example +legalized: true +tracksRegLiveness: true +body: | + bb.0: + liveins: $d0 + ; We should get a NEGv2i32 here. + + ; LOWER-LABEL: name: all_zeros_pat_example + ; LOWER: liveins: $d0 + ; LOWER: %v:_(<2 x s32>) = COPY $d0 + ; LOWER: %cst:_(s32) = G_CONSTANT i32 0 + ; LOWER: %build_vector:_(<2 x s32>) = G_BUILD_VECTOR %cst(s32), %cst(s32) + ; LOWER: %sub:_(<2 x s32>) = G_SUB %build_vector, %v + ; LOWER: $d0 = COPY %sub(<2 x s32>) + ; LOWER: RET_ReallyLR implicit $d0 + ; SELECT-LABEL: name: all_zeros_pat_example + ; SELECT: liveins: $d0 + ; SELECT: %v:fpr64 = COPY $d0 + ; SELECT: %sub:fpr64 = NEGv2i32 %v + ; SELECT: $d0 = COPY %sub + ; SELECT: RET_ReallyLR implicit $d0 + %v:_(<2 x s32>) = COPY $d0 + %cst:_(s32) = G_CONSTANT i32 0 + %build_vector:_(<2 x s32>) = G_BUILD_VECTOR %cst, %cst + %sub:_(<2 x s32>) = G_SUB %build_vector, %v + $d0 = COPY %sub(<2 x s32>) + RET_ReallyLR implicit $d0 + +... +--- +name: all_ones_pat_example +legalized: true +tracksRegLiveness: true +body: | + bb.0: + liveins: $d0, $d1 + ; We should get a BICv8i8 here. + + ; LOWER-LABEL: name: all_ones_pat_example + ; LOWER: liveins: $d0, $d1 + ; LOWER: %v0:_(<2 x s32>) = COPY $d0 + ; LOWER: %v1:_(<2 x s32>) = COPY $d1 + ; LOWER: %cst:_(s32) = G_CONSTANT i32 -1 + ; LOWER: %build_vector:_(<2 x s32>) = G_BUILD_VECTOR %cst(s32), %cst(s32) + ; LOWER: %xor:_(<2 x s32>) = G_XOR %v0, %build_vector + ; LOWER: %and:_(<2 x s32>) = G_AND %v1, %xor + ; LOWER: $d0 = COPY %and(<2 x s32>) + ; LOWER: RET_ReallyLR implicit $d0 + ; SELECT-LABEL: name: all_ones_pat_example + ; SELECT: liveins: $d0, $d1 + ; SELECT: %v0:fpr64 = COPY $d0 + ; SELECT: %v1:fpr64 = COPY $d1 + ; SELECT: %and:fpr64 = BICv8i8 %v1, %v0 + ; SELECT: $d0 = COPY %and + ; SELECT: RET_ReallyLR implicit $d0 + %v0:_(<2 x s32>) = COPY $d0 + %v1:_(<2 x s32>) = COPY $d1 + %cst:_(s32) = G_CONSTANT i32 -1 + %build_vector:_(<2 x s32>) = G_BUILD_VECTOR %cst, %cst + %xor:_(<2 x s32>) = G_XOR %v0, %build_vector + %and:_(<2 x s32>) = G_AND %v1, %xor + $d0 = COPY %and(<2 x s32>) + RET_ReallyLR implicit $d0 diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/postlegalizer-lowering-vashr-vlshr.mir b/llvm/test/CodeGen/AArch64/GlobalISel/postlegalizer-lowering-vashr-vlshr.mir --- a/llvm/test/CodeGen/AArch64/GlobalISel/postlegalizer-lowering-vashr-vlshr.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/postlegalizer-lowering-vashr-vlshr.mir @@ -84,8 +84,8 @@ ; CHECK: liveins: $d0, $d1 ; CHECK: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $q0 ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 40 - ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32) - ; CHECK: [[LSHR:%[0-9]+]]:_(<4 x s32>) = G_LSHR [[COPY]], [[BUILD_VECTOR]](<4 x s32>) + ; CHECK: [[DUP:%[0-9]+]]:_(<4 x s32>) = G_DUP [[C]](s32) + ; CHECK: [[LSHR:%[0-9]+]]:_(<4 x s32>) = G_LSHR [[COPY]], [[DUP]](<4 x s32>) ; CHECK: $q0 = COPY [[LSHR]](<4 x s32>) ; CHECK: RET_ReallyLR implicit $q0 %0:_(<4 x s32>) = COPY $q0 diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/select-dup.mir b/llvm/test/CodeGen/AArch64/GlobalISel/select-dup.mir --- a/llvm/test/CodeGen/AArch64/GlobalISel/select-dup.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/select-dup.mir @@ -379,3 +379,61 @@ RET_ReallyLR implicit $q0 ... +--- +name: cst_v4s32 +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $w0 + ; CHECK-LABEL: name: cst_v4s32 + ; CHECK: liveins: $w0 + ; CHECK: [[ADRP:%[0-9]+]]:gpr64common = ADRP target-flags(aarch64-page) %const.0 + ; CHECK: [[LDRQui:%[0-9]+]]:fpr128 = LDRQui [[ADRP]], target-flags(aarch64-pageoff, aarch64-nc) %const.0 + ; CHECK: $q0 = COPY [[LDRQui]] + ; CHECK: RET_ReallyLR implicit $q0 + %cst:gpr(s32) = G_CONSTANT i32 3 + %dup:fpr(<4 x s32>) = G_DUP %cst(s32) + $q0 = COPY %dup(<4 x s32>) + RET_ReallyLR implicit $q0 + +... +--- +name: cst_v8s8 +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $w0 + ; CHECK-LABEL: name: cst_v8s8 + ; CHECK: liveins: $w0 + ; CHECK: [[ADRP:%[0-9]+]]:gpr64common = ADRP target-flags(aarch64-page) %const.0 + ; CHECK: [[LDRDui:%[0-9]+]]:fpr64 = LDRDui [[ADRP]], target-flags(aarch64-pageoff, aarch64-nc) %const.0 + ; CHECK: $d0 = COPY [[LDRDui]] + ; CHECK: RET_ReallyLR implicit $d0 + %cst:gpr(s8) = G_CONSTANT i8 3 + %dup:fpr(<8 x s8>) = G_DUP %cst(s8) + $d0 = COPY %dup(<8 x s8>) + RET_ReallyLR implicit $d0 +... +--- +name: cst_v2p0 +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $w0 + ; CHECK-LABEL: name: cst_v2p0 + ; CHECK: liveins: $w0 + ; CHECK: %cst:gpr64 = MOVi64imm 3 + ; CHECK: [[ADRP:%[0-9]+]]:gpr64common = ADRP target-flags(aarch64-page) %const.0 + ; CHECK: [[LDRQui:%[0-9]+]]:fpr128 = LDRQui [[ADRP]], target-flags(aarch64-pageoff, aarch64-nc) %const.0 + ; CHECK: $q0 = COPY [[LDRQui]] + ; CHECK: RET_ReallyLR implicit $q0 + %cst:gpr(p0) = G_CONSTANT i64 3 + %dup:fpr(<2 x p0>) = G_DUP %cst(p0) + $q0 = COPY %dup(<2 x p0>) + RET_ReallyLR implicit $q0 diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/select-vector-shift.mir b/llvm/test/CodeGen/AArch64/GlobalISel/select-vector-shift.mir --- a/llvm/test/CodeGen/AArch64/GlobalISel/select-vector-shift.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/select-vector-shift.mir @@ -572,3 +572,38 @@ $q0 = COPY %2(<16 x s8>) RET_ReallyLR implicit $q0 ... +--- +name: shl_v2i32_imm_dup +alignment: 4 +legalized: true +regBankSelected: true +tracksRegLiveness: true +registers: + - { id: 0, class: fpr } + - { id: 1, class: fpr } + - { id: 2, class: gpr } + - { id: 3, class: fpr } +liveins: + - { reg: '$d0' } +frameInfo: + maxAlignment: 1 +machineFunctionInfo: {} +body: | + bb.1: + liveins: $d0 + + ; Should still be able to select immediate forms using a G_DUP from a + ; constant. + + ; CHECK-LABEL: name: shl_v2i32_imm_dup + ; CHECK: liveins: $d0 + ; CHECK: [[COPY:%[0-9]+]]:fpr64 = COPY $d0 + ; CHECK: [[SHLv2i32_shift:%[0-9]+]]:fpr64 = SHLv2i32_shift [[COPY]], 24 + ; CHECK: $d0 = COPY [[SHLv2i32_shift]] + ; CHECK: RET_ReallyLR implicit $d0 + %0:fpr(<2 x s32>) = COPY $d0 + %2:gpr(s32) = G_CONSTANT i32 24 + %1:fpr(<2 x s32>) = G_DUP %2(s32) + %3:fpr(<2 x s32>) = G_SHL %0, %1(<2 x s32>) + $d0 = COPY %3(<2 x s32>) + RET_ReallyLR implicit $d0