Index: llvm/trunk/include/llvm/Target/GlobalISel/SelectionDAGCompat.td =================================================================== --- llvm/trunk/include/llvm/Target/GlobalISel/SelectionDAGCompat.td +++ llvm/trunk/include/llvm/Target/GlobalISel/SelectionDAGCompat.td @@ -88,6 +88,7 @@ def : GINodeEquiv; def : GINodeEquiv; def : GINodeEquiv; +def : GINodeEquiv; def : GINodeEquiv; def : GINodeEquiv; def : GINodeEquiv; Index: llvm/trunk/lib/Target/AArch64/AArch64InstructionSelector.cpp =================================================================== --- llvm/trunk/lib/Target/AArch64/AArch64InstructionSelector.cpp +++ llvm/trunk/lib/Target/AArch64/AArch64InstructionSelector.cpp @@ -18,6 +18,7 @@ #include "AArch64Subtarget.h" #include "AArch64TargetMachine.h" #include "MCTargetDesc/AArch64AddressingModes.h" +#include "llvm/ADT/Optional.h" #include "llvm/CodeGen/GlobalISel/InstructionSelector.h" #include "llvm/CodeGen/GlobalISel/InstructionSelectorImpl.h" #include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h" @@ -91,11 +92,15 @@ SmallVectorImpl &Idxs) const; bool selectShuffleVector(MachineInstr &I, MachineRegisterInfo &MRI) const; bool selectExtractElt(MachineInstr &I, MachineRegisterInfo &MRI) const; + bool selectConcatVectors(MachineInstr &I, MachineRegisterInfo &MRI) const; unsigned emitConstantPoolEntry(Constant *CPVal, MachineFunction &MF) const; MachineInstr *emitLoadFromConstantPool(Constant *CPVal, MachineIRBuilder &MIRBuilder) const; - MachineInstr *emitVectorConcat(unsigned Op1, unsigned Op2, + + // Emit a vector concat operation. + MachineInstr *emitVectorConcat(Optional Dst, unsigned Op1, + unsigned Op2, MachineIRBuilder &MIRBuilder) const; ComplexRendererFns selectArithImmed(MachineOperand &Root) const; @@ -1726,6 +1731,8 @@ return selectExtractElt(I, MRI); case TargetOpcode::G_INSERT_VECTOR_ELT: return selectInsertElt(I, MRI); + case TargetOpcode::G_CONCAT_VECTORS: + return selectConcatVectors(I, MRI); } return false; @@ -2067,6 +2074,21 @@ return true; } +bool AArch64InstructionSelector::selectConcatVectors( + MachineInstr &I, MachineRegisterInfo &MRI) const { + assert(I.getOpcode() == TargetOpcode::G_CONCAT_VECTORS && + "Unexpected opcode"); + unsigned Dst = I.getOperand(0).getReg(); + unsigned Op1 = I.getOperand(1).getReg(); + unsigned Op2 = I.getOperand(2).getReg(); + MachineIRBuilder MIRBuilder(I); + MachineInstr *ConcatMI = emitVectorConcat(Dst, Op1, Op2, MIRBuilder); + if (!ConcatMI) + return false; + I.eraseFromParent(); + return true; +} + void AArch64InstructionSelector::collectShuffleMaskIndices( MachineInstr &I, MachineRegisterInfo &MRI, SmallVectorImpl &Idxs) const { @@ -2169,7 +2191,8 @@ } MachineInstr *AArch64InstructionSelector::emitVectorConcat( - unsigned Op1, unsigned Op2, MachineIRBuilder &MIRBuilder) const { + Optional Dst, unsigned Op1, unsigned Op2, + MachineIRBuilder &MIRBuilder) const { // We implement a vector concat by: // 1. Use scalar_to_vector to insert the lower vector into the larger dest // 2. Insert the upper vector into the destination's upper element @@ -2215,13 +2238,14 @@ std::tie(InsertOpc, InsSubRegIdx) = getInsertVecEltOpInfo(FPRBank, ScalarTy.getSizeInBits()); + if (!Dst) + Dst = MRI.createVirtualRegister(DstRC); auto InsElt = MIRBuilder - .buildInstr(InsertOpc, {DstRC}, {WidenedOp1->getOperand(0).getReg()}) + .buildInstr(InsertOpc, {*Dst}, {WidenedOp1->getOperand(0).getReg()}) .addImm(1) /* Lane index */ .addUse(WidenedOp2->getOperand(0).getReg()) .addImm(0); - constrainSelectedInstRegOperands(*InsElt, TII, TRI, RBI); return &*InsElt; } @@ -2276,7 +2300,7 @@ if (DstTy.getSizeInBits() != 128) { assert(DstTy.getSizeInBits() == 64 && "Unexpected shuffle result ty"); // This case can be done with TBL1. - MachineInstr *Concat = emitVectorConcat(Src1Reg, Src2Reg, MIRBuilder); + MachineInstr *Concat = emitVectorConcat(None, Src1Reg, Src2Reg, MIRBuilder); if (!Concat) { LLVM_DEBUG(dbgs() << "Could not do vector concat for tbl1"); return false; Index: llvm/trunk/lib/Target/AArch64/AArch64LegalizerInfo.cpp =================================================================== --- llvm/trunk/lib/Target/AArch64/AArch64LegalizerInfo.cpp +++ llvm/trunk/lib/Target/AArch64/AArch64LegalizerInfo.cpp @@ -500,6 +500,9 @@ .clampNumElements(0, v4s32, v4s32) .clampNumElements(0, v2s64, v2s64); + getActionDefinitionsBuilder(G_CONCAT_VECTORS) + .legalFor({{v4s32, v2s32}, {v8s16, v4s16}}); + computeTables(); verify(*ST.getInstrInfo()); } Index: llvm/trunk/test/CodeGen/AArch64/GlobalISel/legalize-concat-vectors.mir =================================================================== --- llvm/trunk/test/CodeGen/AArch64/GlobalISel/legalize-concat-vectors.mir +++ llvm/trunk/test/CodeGen/AArch64/GlobalISel/legalize-concat-vectors.mir @@ -0,0 +1,37 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -mtriple=aarch64-linux-gnu -O0 -run-pass=legalizer %s -global-isel-abort=1 -o - | FileCheck %s + +--- +name: legal_v4s32_v2s32 +body: | + bb.0: + liveins: $d0, $d1 + ; CHECK-LABEL: name: legal_v4s32_v2s32 + ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $d0 + ; CHECK: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $d1 + ; CHECK: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s32>) = G_CONCAT_VECTORS [[COPY]](<2 x s32>), [[COPY1]](<2 x s32>) + ; CHECK: $q0 = COPY [[CONCAT_VECTORS]](<4 x s32>) + ; CHECK: RET_ReallyLR + %0:_(<2 x s32>) = COPY $d0 + %1:_(<2 x s32>) = COPY $d1 + %2:_(<4 x s32>) = G_CONCAT_VECTORS %0(<2 x s32>), %1(<2 x s32>) + $q0 = COPY %2(<4 x s32>) + RET_ReallyLR +... +--- +name: legal_v8s16_v4s16 +body: | + bb.0: + liveins: $d0, $d1 + ; CHECK-LABEL: name: legal_v8s16_v4s16 + ; CHECK: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $d0 + ; CHECK: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $d1 + ; CHECK: [[CONCAT_VECTORS:%[0-9]+]]:_(<8 x s16>) = G_CONCAT_VECTORS [[COPY]](<4 x s16>), [[COPY1]](<4 x s16>) + ; CHECK: $q0 = COPY [[CONCAT_VECTORS]](<8 x s16>) + ; CHECK: RET_ReallyLR + %0:_(<4 x s16>) = COPY $d0 + %1:_(<4 x s16>) = COPY $d1 + %2:_(<8 x s16>) = G_CONCAT_VECTORS %0(<4 x s16>), %1(<4 x s16>) + $q0 = COPY %2(<8 x s16>) + RET_ReallyLR +... Index: llvm/trunk/test/CodeGen/AArch64/GlobalISel/legalizer-info-validation.mir =================================================================== --- llvm/trunk/test/CodeGen/AArch64/GlobalISel/legalizer-info-validation.mir +++ llvm/trunk/test/CodeGen/AArch64/GlobalISel/legalizer-info-validation.mir @@ -76,7 +76,7 @@ # DEBUG: .. type index coverage check SKIPPED: no rules defined # # DEBUG-NEXT: G_CONCAT_VECTORS (opcode {{[0-9]+}}): 2 type indices -# DEBUG: .. type index coverage check SKIPPED: no rules defined +# DEBUG: .. the first uncovered type index: 2, OK # # DEBUG-NEXT: G_PTRTOINT (opcode {{[0-9]+}}): 2 type indices # DEBUG: .. the first uncovered type index: 2, OK Index: llvm/trunk/test/CodeGen/AArch64/GlobalISel/select-concat-vectors.mir =================================================================== --- llvm/trunk/test/CodeGen/AArch64/GlobalISel/select-concat-vectors.mir +++ llvm/trunk/test/CodeGen/AArch64/GlobalISel/select-concat-vectors.mir @@ -0,0 +1,63 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -mtriple=aarch64-- -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck %s +... +--- +name: legal_v4s32_v2s32 +alignment: 2 +legalized: true +regBankSelected: true +registers: + - { id: 0, class: fpr } + - { id: 1, class: fpr } + - { id: 2, class: fpr } +frameInfo: + maxCallFrameSize: 0 +body: | + bb.0: + ; CHECK-LABEL: name: legal_v4s32_v2s32 + ; CHECK: [[COPY:%[0-9]+]]:fpr64 = COPY $d0 + ; CHECK: [[COPY1:%[0-9]+]]:fpr64 = COPY $d1 + ; CHECK: [[DEF:%[0-9]+]]:fpr128 = IMPLICIT_DEF + ; CHECK: [[INSERT_SUBREG:%[0-9]+]]:fpr128 = INSERT_SUBREG [[DEF]], [[COPY]], %subreg.dsub + ; CHECK: [[DEF1:%[0-9]+]]:fpr128 = IMPLICIT_DEF + ; CHECK: [[INSERT_SUBREG1:%[0-9]+]]:fpr128 = INSERT_SUBREG [[DEF1]], [[COPY1]], %subreg.dsub + ; CHECK: [[INSvi64lane:%[0-9]+]]:fpr128 = INSvi64lane [[INSERT_SUBREG]], 1, [[INSERT_SUBREG1]], 0 + ; CHECK: $q0 = COPY [[INSvi64lane]] + ; CHECK: RET_ReallyLR + %0:fpr(<2 x s32>) = COPY $d0 + %1:fpr(<2 x s32>) = COPY $d1 + %2:fpr(<4 x s32>) = G_CONCAT_VECTORS %0(<2 x s32>), %1(<2 x s32>) + $q0 = COPY %2(<4 x s32>) + RET_ReallyLR + +... +--- +name: legal_v8s16_v4s16 +alignment: 2 +legalized: true +regBankSelected: true +registers: + - { id: 0, class: fpr } + - { id: 1, class: fpr } + - { id: 2, class: fpr } +frameInfo: + maxCallFrameSize: 0 +body: | + bb.0: + ; CHECK-LABEL: name: legal_v8s16_v4s16 + ; CHECK: [[COPY:%[0-9]+]]:fpr64 = COPY $d0 + ; CHECK: [[COPY1:%[0-9]+]]:fpr64 = COPY $d1 + ; CHECK: [[DEF:%[0-9]+]]:fpr128 = IMPLICIT_DEF + ; CHECK: [[INSERT_SUBREG:%[0-9]+]]:fpr128 = INSERT_SUBREG [[DEF]], [[COPY]], %subreg.dsub + ; CHECK: [[DEF1:%[0-9]+]]:fpr128 = IMPLICIT_DEF + ; CHECK: [[INSERT_SUBREG1:%[0-9]+]]:fpr128 = INSERT_SUBREG [[DEF1]], [[COPY1]], %subreg.dsub + ; CHECK: [[INSvi64lane:%[0-9]+]]:fpr128 = INSvi64lane [[INSERT_SUBREG]], 1, [[INSERT_SUBREG1]], 0 + ; CHECK: $q0 = COPY [[INSvi64lane]] + ; CHECK: RET_ReallyLR + %0:fpr(<4 x s16>) = COPY $d0 + %1:fpr(<4 x s16>) = COPY $d1 + %2:fpr(<8 x s16>) = G_CONCAT_VECTORS %0(<4 x s16>), %1(<4 x s16>) + $q0 = COPY %2(<8 x s16>) + RET_ReallyLR + +...