diff --git a/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp b/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp --- a/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp +++ b/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp @@ -588,6 +588,58 @@ } } +/// Create a REG_SEQUENCE instruction using the registers in \p Regs. +/// Helper function for functions like createDTuple and createQTuple. +/// +/// \p RegClassIDs - The list of register class IDs available for some tuple of +/// a scalar class. E.g. QQRegClassID, QQQRegClassID, QQQQRegClassID. This is +/// expected to contain between 2 and 4 tuple classes. +/// +/// \p SubRegs - The list of subregister classes associated with each register +/// class ID in \p RegClassIDs. E.g., QQRegClassID should use the qsub0 +/// subregister class. The index of each subregister class is expected to +/// correspond with the index of each register class. +/// +/// \returns Either the destination register of REG_SEQUENCE instruction that +/// was created, or the 0th element of \p Regs if \p Regs contains a single +/// element. +static Register createTuple(ArrayRef Regs, + const unsigned RegClassIDs[], + const unsigned SubRegs[], MachineIRBuilder &MIB) { + unsigned NumRegs = Regs.size(); + if (NumRegs == 1) + return Regs[0]; + assert(NumRegs >= 2 && NumRegs <= 4 && + "Only support between two and 4 registers in a tuple!"); + const TargetRegisterInfo *TRI = MIB.getMF().getSubtarget().getRegisterInfo(); + auto *DesiredClass = TRI->getRegClass(RegClassIDs[NumRegs - 2]); + auto RegSequence = + MIB.buildInstr(TargetOpcode::REG_SEQUENCE, {DesiredClass}, {}); + for (unsigned I = 0, E = Regs.size(); I < E; ++I) { + RegSequence.addUse(Regs[I]); + RegSequence.addImm(SubRegs[I]); + } + return RegSequence.getReg(0); +} + +/// Create a tuple of D-registers using the registers in \p Regs. +static Register createDTuple(ArrayRef Regs, MachineIRBuilder &MIB) { + static const unsigned RegClassIDs[] = { + AArch64::DDRegClassID, AArch64::DDDRegClassID, AArch64::DDDDRegClassID}; + static const unsigned SubRegs[] = {AArch64::dsub0, AArch64::dsub1, + AArch64::dsub2, AArch64::dsub3}; + return createTuple(Regs, RegClassIDs, SubRegs, MIB); +} + +/// Create a tuple of Q-registers using the registers in \p Regs. +static Register createQTuple(ArrayRef Regs, MachineIRBuilder &MIB) { + static const unsigned RegClassIDs[] = { + AArch64::QQRegClassID, AArch64::QQQRegClassID, AArch64::QQQQRegClassID}; + static const unsigned SubRegs[] = {AArch64::qsub0, AArch64::qsub1, + AArch64::qsub2, AArch64::qsub3}; + return createTuple(Regs, RegClassIDs, SubRegs, MIB); +} + static Optional getImmedFromMO(const MachineOperand &Root) { auto &MI = *Root.getParent(); auto &MBB = *MI.getParent(); @@ -4700,15 +4752,10 @@ // For TBL2 we need to emit a REG_SEQUENCE to tie together two consecutive // Q registers for regalloc. - auto RegSeq = MIB.buildInstr(TargetOpcode::REG_SEQUENCE, - {&AArch64::QQRegClass}, {Src1Reg}) - .addImm(AArch64::qsub0) - .addUse(Src2Reg) - .addImm(AArch64::qsub1); - + SmallVector Regs = {Src1Reg, Src2Reg}; + auto RegSeq = createQTuple(Regs, MIB); auto TBL2 = MIB.buildInstr(AArch64::TBLv16i8Two, {I.getOperand(0)}, {RegSeq, IndexLoad->getOperand(0)}); - constrainSelectedInstRegOperands(*RegSeq, TII, TRI, RBI); constrainSelectedInstRegOperands(*TBL2, TII, TRI, RBI); I.eraseFromParent(); return true; @@ -5007,6 +5054,43 @@ MIB.buildInstr(AArch64::BRK, {}, {}) .addImm(I.getOperand(1).getImm() | ('U' << 8)); break; + case Intrinsic::aarch64_neon_st2: { + Register Src1 = I.getOperand(1).getReg(); + Register Src2 = I.getOperand(2).getReg(); + Register Ptr = I.getOperand(3).getReg(); + LLT Ty = MRI.getType(Src1); + const LLT S8 = LLT::scalar(8); + const LLT S16 = LLT::scalar(16); + const LLT S32 = LLT::scalar(32); + const LLT S64 = LLT::scalar(64); + const LLT P0 = LLT::pointer(0, 64); + unsigned Opc; + if (Ty == LLT::fixed_vector(8, S8)) + Opc = AArch64::ST2Twov8b; + else if (Ty == LLT::fixed_vector(16, S8)) + Opc = AArch64::ST2Twov16b; + else if (Ty == LLT::fixed_vector(4, S16)) + Opc = AArch64::ST2Twov4h; + else if (Ty == LLT::fixed_vector(8, S16)) + Opc = AArch64::ST2Twov8h; + else if (Ty == LLT::fixed_vector(2, S32)) + Opc = AArch64::ST2Twov2s; + else if (Ty == LLT::fixed_vector(4, S32)) + Opc = AArch64::ST2Twov4s; + else if (Ty == LLT::fixed_vector(2, S64) || Ty == LLT::fixed_vector(2, P0)) + Opc = AArch64::ST2Twov2d; + else if (Ty == S64 | Ty == P0) + Opc = AArch64::ST1Twov1d; + else + llvm_unreachable("Unexpected type for st2!"); + SmallVector Regs = {Src1, Src2}; + Register Tuple = Ty.getSizeInBits() == 128 ? createQTuple(Regs, MIB) + : createDTuple(Regs, MIB); + auto Store = MIB.buildInstr(Opc, {}, {Tuple, Ptr}); + Store.cloneMemRefs(I); + constrainSelectedInstRegOperands(*Store, TII, TRI, RBI); + break; + } } I.eraseFromParent(); diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/select-st2.mir b/llvm/test/CodeGen/AArch64/GlobalISel/select-st2.mir new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/GlobalISel/select-st2.mir @@ -0,0 +1,247 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -mtriple=aarch64 -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck %s + +... +--- +name: v8i8_ST2Twov8b +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.0: + liveins: $d0, $d1, $x0 + + ; CHECK-LABEL: name: v8i8_ST2Twov8b + ; CHECK: liveins: $d0, $d1, $x0 + ; CHECK: %ptr:gpr64sp = COPY $x0 + ; CHECK: %src1:fpr64 = COPY $d0 + ; CHECK: %src2:fpr64 = COPY $d1 + ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:dd = REG_SEQUENCE %src1, %subreg.dsub0, %src2, %subreg.dsub1 + ; CHECK: ST2Twov8b [[REG_SEQUENCE]], %ptr :: (store (<2 x s64>)) + ; CHECK: RET_ReallyLR + %ptr:gpr(p0) = COPY $x0 + %src1:fpr(<8 x s8>) = COPY $d0 + %src2:fpr(<8 x s8>) = COPY $d1 + G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.aarch64.neon.st2), %src1(<8 x s8>), %src2(<8 x s8>), %ptr(p0) :: (store (<2 x s64>)) + RET_ReallyLR + +... +--- +name: v16i8_ST2Twov16b + +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.0: + liveins: $q0, $q1, $x0 + + ; CHECK-LABEL: name: v16i8_ST2Twov16b + ; CHECK: liveins: $q0, $q1, $x0 + ; CHECK: %ptr:gpr64sp = COPY $x0 + ; CHECK: %src1:fpr128 = COPY $q0 + ; CHECK: %src2:fpr128 = COPY $q1 + ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:qq = REG_SEQUENCE %src1, %subreg.qsub0, %src2, %subreg.qsub1 + ; CHECK: ST2Twov16b [[REG_SEQUENCE]], %ptr :: (store (<4 x s64>)) + ; CHECK: RET_ReallyLR + %ptr:gpr(p0) = COPY $x0 + %src1:fpr(<16 x s8>) = COPY $q0 + %src2:fpr(<16 x s8>) = COPY $q1 + G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.aarch64.neon.st2), %src1(<16 x s8>), %src2(<16 x s8>), %ptr(p0) :: (store (<4 x s64>)) + RET_ReallyLR + +... +--- +name: v4i16_ST2Twov4h + +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.0: + liveins: $d0, $d1, $x0 + + ; CHECK-LABEL: name: v4i16_ST2Twov4h + ; CHECK: liveins: $d0, $d1, $x0 + ; CHECK: %ptr:gpr64sp = COPY $x0 + ; CHECK: %src1:fpr64 = COPY $d0 + ; CHECK: %src2:fpr64 = COPY $d1 + ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:dd = REG_SEQUENCE %src1, %subreg.dsub0, %src2, %subreg.dsub1 + ; CHECK: ST2Twov4h [[REG_SEQUENCE]], %ptr :: (store (<2 x s64>)) + ; CHECK: RET_ReallyLR + %ptr:gpr(p0) = COPY $x0 + %src1:fpr(<4 x s16>) = COPY $d0 + %src2:fpr(<4 x s16>) = COPY $d1 + G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.aarch64.neon.st2), %src1(<4 x s16>), %src2(<4 x s16>), %ptr(p0) :: (store (<2 x s64>)) + RET_ReallyLR + +... +--- +name: v8i16_ST2Twov8h + +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.0: + liveins: $q0, $q1, $x0 + + ; CHECK-LABEL: name: v8i16_ST2Twov8h + ; CHECK: liveins: $q0, $q1, $x0 + ; CHECK: %ptr:gpr64sp = COPY $x0 + ; CHECK: %src1:fpr128 = COPY $q0 + ; CHECK: %src2:fpr128 = COPY $q1 + ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:qq = REG_SEQUENCE %src1, %subreg.qsub0, %src2, %subreg.qsub1 + ; CHECK: ST2Twov8h [[REG_SEQUENCE]], %ptr :: (store (<4 x s64>)) + ; CHECK: RET_ReallyLR + %ptr:gpr(p0) = COPY $x0 + %src1:fpr(<8 x s16>) = COPY $q0 + %src2:fpr(<8 x s16>) = COPY $q1 + G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.aarch64.neon.st2), %src1(<8 x s16>), %src2(<8 x s16>), %ptr(p0) :: (store (<4 x s64>)) + RET_ReallyLR + +... +--- +name: v2i32_ST2Twov2s +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.0: + liveins: $d0, $d1, $x0 + + ; CHECK-LABEL: name: v2i32_ST2Twov2s + ; CHECK: liveins: $d0, $d1, $x0 + ; CHECK: %ptr:gpr64sp = COPY $x0 + ; CHECK: %src1:fpr64 = COPY $d0 + ; CHECK: %src2:fpr64 = COPY $d1 + ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:dd = REG_SEQUENCE %src1, %subreg.dsub0, %src2, %subreg.dsub1 + ; CHECK: ST2Twov2s [[REG_SEQUENCE]], %ptr :: (store (<2 x s64>)) + ; CHECK: RET_ReallyLR + %ptr:gpr(p0) = COPY $x0 + %src1:fpr(<2 x s32>) = COPY $d0 + %src2:fpr(<2 x s32>) = COPY $d1 + G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.aarch64.neon.st2), %src1(<2 x s32>), %src2(<2 x s32>), %ptr(p0) :: (store (<2 x s64>)) + RET_ReallyLR + +... +--- +name: v4i32_ST2Twov4s +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.0: + liveins: $q0, $q1, $x0 + + ; CHECK-LABEL: name: v4i32_ST2Twov4s + ; CHECK: liveins: $q0, $q1, $x0 + ; CHECK: %ptr:gpr64sp = COPY $x0 + ; CHECK: %src1:fpr128 = COPY $q0 + ; CHECK: %src2:fpr128 = COPY $q1 + ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:qq = REG_SEQUENCE %src1, %subreg.qsub0, %src2, %subreg.qsub1 + ; CHECK: ST2Twov4s [[REG_SEQUENCE]], %ptr :: (store (<4 x s64>)) + ; CHECK: RET_ReallyLR + %ptr:gpr(p0) = COPY $x0 + %src1:fpr(<4 x s32>) = COPY $q0 + %src2:fpr(<4 x s32>) = COPY $q1 + G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.aarch64.neon.st2), %src1(<4 x s32>), %src2(<4 x s32>), %ptr(p0) :: (store (<4 x s64>)) + RET_ReallyLR + +... +--- +name: v2i64_ST2Twov2d_s64_elts +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.0: + liveins: $q0, $q1, $x0 + + ; CHECK-LABEL: name: v2i64_ST2Twov2d_s64_elts + ; CHECK: liveins: $q0, $q1, $x0 + ; CHECK: %ptr:gpr64sp = COPY $x0 + ; CHECK: %src1:fpr128 = COPY $q0 + ; CHECK: %src2:fpr128 = COPY $q1 + ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:qq = REG_SEQUENCE %src1, %subreg.qsub0, %src2, %subreg.qsub1 + ; CHECK: ST2Twov2d [[REG_SEQUENCE]], %ptr :: (store (<4 x s64>)) + ; CHECK: RET_ReallyLR + %ptr:gpr(p0) = COPY $x0 + %src1:fpr(<2 x s64>) = COPY $q0 + %src2:fpr(<2 x s64>) = COPY $q1 + G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.aarch64.neon.st2), %src1(<2 x s64>), %src2(<2 x s64>), %ptr(p0) :: (store (<4 x s64>)) + RET_ReallyLR + +... +--- +name: v2i64_ST2Twov2d_s64_p0_elts +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.0: + liveins: $q0, $q1, $x0 + + ; CHECK-LABEL: name: v2i64_ST2Twov2d_s64_p0_elts + ; CHECK: liveins: $q0, $q1, $x0 + ; CHECK: %ptr:gpr64sp = COPY $x0 + ; CHECK: %src1:fpr128 = COPY $q0 + ; CHECK: %src2:fpr128 = COPY $q1 + ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:qq = REG_SEQUENCE %src1, %subreg.qsub0, %src2, %subreg.qsub1 + ; CHECK: ST2Twov2d [[REG_SEQUENCE]], %ptr :: (store (<4 x s64>)) + ; CHECK: RET_ReallyLR + %ptr:gpr(p0) = COPY $x0 + %src1:fpr(<2 x p0>) = COPY $q0 + %src2:fpr(<2 x p0>) = COPY $q1 + G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.aarch64.neon.st2), %src1(<2 x p0>), %src2(<2 x p0>), %ptr(p0) :: (store (<4 x s64>)) + RET_ReallyLR + +... +--- +name: v1i64_ST1Twov1d_s64 +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.0: + liveins: $x0, $x1, $x2 + + ; CHECK-LABEL: name: v1i64_ST1Twov1d_s64 + ; CHECK: liveins: $x0, $x1, $x2 + ; CHECK: %ptr:gpr64sp = COPY $x0 + ; CHECK: %src1:gpr64all = COPY $x0 + ; CHECK: %src2:gpr64all = COPY $x1 + ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:dd = REG_SEQUENCE %src1, %subreg.dsub0, %src2, %subreg.dsub1 + ; CHECK: ST1Twov1d [[REG_SEQUENCE]], %ptr :: (store (<2 x s64>)) + ; CHECK: RET_ReallyLR + %ptr:gpr(p0) = COPY $x0 + %src1:gpr(s64) = COPY $x0 + %src2:gpr(s64) = COPY $x1 + G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.aarch64.neon.st2), %src1(s64), %src2(s64), %ptr(p0) :: (store (<2 x s64>)) + RET_ReallyLR + +... +--- +name: v1i64_ST1Twov1d_p0 +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.0: + liveins: $x0, $x1, $x2 + + ; CHECK-LABEL: name: v1i64_ST1Twov1d_p0 + ; CHECK: liveins: $x0, $x1, $x2 + ; CHECK: %ptr:gpr64sp = COPY $x0 + ; CHECK: %src1:gpr64all = COPY $x0 + ; CHECK: %src2:gpr64all = COPY $x1 + ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:dd = REG_SEQUENCE %src1, %subreg.dsub0, %src2, %subreg.dsub1 + ; CHECK: ST1Twov1d [[REG_SEQUENCE]], %ptr :: (store (<2 x s64>)) + ; CHECK: RET_ReallyLR + %ptr:gpr(p0) = COPY $x0 + %src1:gpr(p0) = COPY $x0 + %src2:gpr(p0) = COPY $x1 + G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.aarch64.neon.st2), %src1(p0), %src2(p0), %ptr(p0) :: (store (<2 x s64>)) + RET_ReallyLR + +...