diff --git a/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h b/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h --- a/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h +++ b/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h @@ -89,6 +89,29 @@ void applyElideBrByInvertingCond(MachineInstr &MI); bool tryElideBrByInvertingCond(MachineInstr &MI); + /// If \p MI is G_CONCAT_VECTORS, try to combine it. + /// Returns true if MI changed. + /// Right now, we support: + /// - concat_vector(undef, undef) => undef + /// - concat_vector(build_vector(A, B), build_vector(C, D)) => + /// build_vector(A, B, C, D) + /// + /// \pre MI.getOpcode() == G_CONCAT_VECTORS. + bool tryCombineConcatVectors(MachineInstr &MI); + /// Check if the G_CONCAT_VECTORS \p MI is undef or if it + /// can be flattened into a build_vector. + /// In the first case \p IsUndef will be true. + /// In the second case \p Ops will contain the operands needed + /// to produce the flattened build_vector. + /// + /// \pre MI.getOpcode() == G_CONCAT_VECTORS. + bool matchCombineConcatVectors(MachineInstr &MI, bool &IsUndef, + SmallVectorImpl &Ops); + /// Replace \p MI with a flattened build_vector with \p Ops or an + /// implicit_def if IsUndef is true. + void applyCombineConcatVectors(MachineInstr &MI, bool IsUndef, + const ArrayRef Ops); + /// Optimize memcpy intrinsics et al, e.g. constant len calls. /// /p MaxLen if non-zero specifies the max length of a mem libcall to inline. /// diff --git a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp --- a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp +++ b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp @@ -89,6 +89,90 @@ replaceRegWith(MRI, DstReg, SrcReg); } +bool CombinerHelper::tryCombineConcatVectors(MachineInstr &MI) { + bool IsUndef = false; + SmallVector Ops; + if (matchCombineConcatVectors(MI, IsUndef, Ops)) { + applyCombineConcatVectors(MI, IsUndef, Ops); + return true; + } + return false; +} + +bool CombinerHelper::matchCombineConcatVectors(MachineInstr &MI, bool &IsUndef, + SmallVectorImpl &Ops) { + assert(MI.getOpcode() == TargetOpcode::G_CONCAT_VECTORS && + "Invalid instruction"); + IsUndef = true; + MachineInstr *Undef = nullptr; + + // Walk over all the operands of concat vectors and check if they are + // build_vector themselves or undef. + // Then collect their operands in Ops. + for (const MachineOperand &MO : MI.operands()) { + // Skip the instruction definition. + if (MO.isDef()) + continue; + Register Reg = MO.getReg(); + MachineInstr *Def = MRI.getVRegDef(Reg); + assert(Def && "Operand not defined"); + switch (Def->getOpcode()) { + case TargetOpcode::G_BUILD_VECTOR: + IsUndef = false; + // Remember the operands of the build_vector to fold + // them into the yet-to-build flattened concat vectors. + for (const MachineOperand &BuildVecMO : Def->operands()) { + // Skip the definition. + if (BuildVecMO.isDef()) + continue; + Ops.push_back(BuildVecMO.getReg()); + } + break; + case TargetOpcode::G_IMPLICIT_DEF: { + LLT OpType = MRI.getType(Reg); + // Keep one undef value for all the undef operands. + if (!Undef) { + Builder.setInsertPt(*MI.getParent(), MI); + Undef = Builder.buildUndef(OpType.getScalarType()); + } + LLT UndefType = MRI.getType(Undef->getOperand(0).getReg()); + assert(UndefType == OpType.getScalarType() && + "All undefs should have the same type"); + // Break the undef vector in as many scalar elements as needed + // for the flattening. + for (unsigned EltIdx = 0, EltEnd = OpType.getNumElements(); + EltIdx != EltEnd; ++EltIdx) + Ops.push_back(Undef->getOperand(0).getReg()); + break; + } + default: + return false; + } + } + return true; +} +void CombinerHelper::applyCombineConcatVectors( + MachineInstr &MI, bool IsUndef, const ArrayRef Ops) { + // We determined that the concat_vectors can be flatten. + // Generate the flattened build_vector. + Register DstReg = MI.getOperand(0).getReg(); + Builder.setInsertPt(*MI.getParent(), MI); + Register NewDstReg = MRI.cloneVirtualRegister(DstReg); + + // Note: IsUndef is sort of redundant. We could have determine it by + // checking that at all Ops are undef. Alternatively, we could have + // generate a build_vector of undefs and rely on another combine to + // clean that up. For now, given we already gather this information + // in tryCombineConcatVectors, just save compile time and issue the + // right thing. + if (IsUndef) + Builder.buildUndef(NewDstReg); + else + Builder.buildBuildVector(NewDstReg, Ops); + MI.eraseFromParent(); + replaceRegWith(MRI, DstReg, NewDstReg); +} + namespace { /// Select a preference between two uses. CurrentUse is the current preference diff --git a/llvm/lib/Target/AArch64/AArch64PreLegalizerCombiner.cpp b/llvm/lib/Target/AArch64/AArch64PreLegalizerCombiner.cpp --- a/llvm/lib/Target/AArch64/AArch64PreLegalizerCombiner.cpp +++ b/llvm/lib/Target/AArch64/AArch64PreLegalizerCombiner.cpp @@ -58,6 +58,8 @@ CombinerHelper Helper(Observer, B, KB, MDT); switch (MI.getOpcode()) { + case TargetOpcode::G_CONCAT_VECTORS: + return Helper.tryCombineConcatVectors(MI); case TargetOpcode::G_LOAD: case TargetOpcode::G_SEXTLOAD: case TargetOpcode::G_ZEXTLOAD: { diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/prelegalizercombiner-concat-vectors.mir b/llvm/test/CodeGen/AArch64/GlobalISel/prelegalizercombiner-concat-vectors.mir new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/GlobalISel/prelegalizercombiner-concat-vectors.mir @@ -0,0 +1,141 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -mtriple aarch64-apple-ios -run-pass=aarch64-prelegalizer-combiner %s -o - | FileCheck %s + +# Check that we canonicalize concat_vectors(build_vector, build_vector) +# into build_vector. +--- +name: concat_to_build_vector +tracksRegLiveness: true +body: | + bb.1: + liveins: $x0, $x1, $x2, $x3 + + ; CHECK-LABEL: name: concat_to_build_vector + ; CHECK: liveins: $x0, $x1, $x2, $x3 + ; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY $x0 + ; CHECK: [[COPY1:%[0-9]+]]:_(s64) = COPY $x1 + ; CHECK: [[COPY2:%[0-9]+]]:_(s64) = COPY $x2 + ; CHECK: [[COPY3:%[0-9]+]]:_(s64) = COPY $x3 + ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[COPY]](s64), [[COPY1]](s64), [[COPY2]](s64), [[COPY3]](s64) + ; CHECK: RET_ReallyLR implicit [[BUILD_VECTOR]](<4 x s64>) + %0:_(s64) = COPY $x0 + %1:_(s64) = COPY $x1 + %2:_(s64) = COPY $x2 + %3:_(s64) = COPY $x3 + %4:_(<2 x s64>) = G_BUILD_VECTOR %0(s64), %1 + %5:_(<2 x s64>) = G_BUILD_VECTOR %2(s64), %3 + %6:_(<4 x s64>) = G_CONCAT_VECTORS %4(<2 x s64>), %5 + RET_ReallyLR implicit %6 +... +# Same test as concat_to_build_vector but with pointer types. +--- +name: concat_to_build_vector_ptr +tracksRegLiveness: true +body: | + bb.1: + liveins: $x0, $x1, $x2, $x3 + + ; CHECK-LABEL: name: concat_to_build_vector_ptr + ; CHECK: liveins: $x0, $x1, $x2, $x3 + ; CHECK: [[COPY:%[0-9]+]]:_(p0) = COPY $x0 + ; CHECK: [[COPY1:%[0-9]+]]:_(p0) = COPY $x1 + ; CHECK: [[COPY2:%[0-9]+]]:_(p0) = COPY $x2 + ; CHECK: [[COPY3:%[0-9]+]]:_(p0) = COPY $x3 + ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x p0>) = G_BUILD_VECTOR [[COPY]](p0), [[COPY1]](p0), [[COPY2]](p0), [[COPY3]](p0) + ; CHECK: RET_ReallyLR implicit [[BUILD_VECTOR]](<4 x p0>) + %0:_(p0) = COPY $x0 + %1:_(p0) = COPY $x1 + %2:_(p0) = COPY $x2 + %3:_(p0) = COPY $x3 + %4:_(<2 x p0>) = G_BUILD_VECTOR %0(p0), %1 + %5:_(<2 x p0>) = G_BUILD_VECTOR %2(p0), %3 + %6:_(<4 x p0>) = G_CONCAT_VECTORS %4(<2 x p0>), %5 + RET_ReallyLR implicit %6 +... +# Check that we canonicalize concat_vectors(undef, undef) into undef. +--- +name: concat_to_undef +tracksRegLiveness: true +body: | + bb.1: + ; CHECK-LABEL: name: concat_to_undef + ; CHECK: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF + ; CHECK: RET_ReallyLR implicit [[DEF]](<4 x s64>) + %4:_(<2 x s64>) = G_IMPLICIT_DEF + %5:_(<2 x s64>) = G_IMPLICIT_DEF + %6:_(<4 x s64>) = G_CONCAT_VECTORS %4(<2 x s64>), %5 + RET_ReallyLR implicit %6 +... +# Check that when combining concat_vectors(build_vector, undef) into +# build_vector, we correctly break the undef vector into a sequence +# of undef scalar. +--- +name: concat_to_build_vector_with_undef +tracksRegLiveness: true +body: | + bb.1: + liveins: $x0, $x1 + + ; CHECK-LABEL: name: concat_to_build_vector_with_undef + ; CHECK: liveins: $x0, $x1 + ; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY $x0 + ; CHECK: [[COPY1:%[0-9]+]]:_(s64) = COPY $x1 + ; CHECK: [[DEF:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF + ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[COPY]](s64), [[COPY1]](s64), [[DEF]](s64), [[DEF]](s64) + ; CHECK: RET_ReallyLR implicit [[BUILD_VECTOR]](<4 x s64>) + %0:_(s64) = COPY $x0 + %1:_(s64) = COPY $x1 + %4:_(<2 x s64>) = G_BUILD_VECTOR %0(s64), %1 + %5:_(<2 x s64>) = G_IMPLICIT_DEF + %6:_(<4 x s64>) = G_CONCAT_VECTORS %4(<2 x s64>), %5 + RET_ReallyLR implicit %6 +... +# Same as concat_to_build_vector_with_undef but with pointer types. +--- +name: concat_to_build_vector_with_undef_ptr +tracksRegLiveness: true +body: | + bb.1: + liveins: $x0, $x1 + + ; CHECK-LABEL: name: concat_to_build_vector_with_undef_ptr + ; CHECK: liveins: $x0, $x1 + ; CHECK: [[COPY:%[0-9]+]]:_(p0) = COPY $x0 + ; CHECK: [[COPY1:%[0-9]+]]:_(p0) = COPY $x1 + ; CHECK: [[DEF:%[0-9]+]]:_(p0) = G_IMPLICIT_DEF + ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x p0>) = G_BUILD_VECTOR [[COPY]](p0), [[COPY1]](p0), [[DEF]](p0), [[DEF]](p0) + ; CHECK: RET_ReallyLR implicit [[BUILD_VECTOR]](<4 x p0>) + %0:_(p0) = COPY $x0 + %1:_(p0) = COPY $x1 + %4:_(<2 x p0>) = G_BUILD_VECTOR %0(p0), %1 + %5:_(<2 x p0>) = G_IMPLICIT_DEF + %6:_(<4 x p0>) = G_CONCAT_VECTORS %4(<2 x p0>), %5 + RET_ReallyLR implicit %6 +... +# Check that we keep a concat_vectors as soon as one of the operand is +# not undef or build_vector. I.e., we cannot flatten the concat_vectors. +--- +name: concat_to_build_vector_negative_test +tracksRegLiveness: true +body: | + bb.1: + liveins: $x0, $x1 + + ; CHECK-LABEL: name: concat_to_build_vector_negative_test + ; CHECK: liveins: $x0, $x1 + ; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY $x0 + ; CHECK: [[DEF:%[0-9]+]]:_(<2 x s64>) = G_IMPLICIT_DEF + ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; CHECK: [[IVEC:%[0-9]+]]:_(<2 x s64>) = G_INSERT_VECTOR_ELT [[DEF]], [[COPY]](s64), [[C]](s32) + ; CHECK: [[DEF1:%[0-9]+]]:_(<2 x s64>) = G_IMPLICIT_DEF + ; CHECK: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s64>) = G_CONCAT_VECTORS [[IVEC]](<2 x s64>), [[DEF1]](<2 x s64>) + ; CHECK: RET_ReallyLR implicit [[CONCAT_VECTORS]](<4 x s64>) + %0:_(s64) = COPY $x0 + %1:_(s64) = COPY $x1 + %2:_(<2 x s64>) = G_IMPLICIT_DEF + %3:_(s32) = G_CONSTANT i32 1 + %4:_(<2 x s64>) = G_INSERT_VECTOR_ELT %2, %0(s64), %3(s32) + %5:_(<2 x s64>) = G_IMPLICIT_DEF + %6:_(<4 x s64>) = G_CONCAT_VECTORS %4(<2 x s64>), %5 + RET_ReallyLR implicit %6 +...