diff --git a/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp b/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp --- a/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp +++ b/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp @@ -5057,6 +5057,11 @@ if (!IntrinID) return false; + const LLT S8 = LLT::scalar(8); + const LLT S16 = LLT::scalar(16); + const LLT S32 = LLT::scalar(32); + const LLT S64 = LLT::scalar(64); + const LLT P0 = LLT::pointer(0, 64); // Select the instruction. switch (IntrinID) { default: @@ -5081,16 +5086,54 @@ MIB.buildInstr(AArch64::BRK, {}, {}) .addImm(I.getOperand(1).getImm() | ('U' << 8)); break; + case Intrinsic::aarch64_neon_ld2: { + Register Dst1 = I.getOperand(0).getReg(); + Register Dst2 = I.getOperand(1).getReg(); + Register Ptr = I.getOperand(3).getReg(); + LLT Ty = MRI.getType(Dst1); + unsigned Opc = 0; + if (Ty == LLT::fixed_vector(8, S8)) + Opc = AArch64::LD2Twov8b; + else if (Ty == LLT::fixed_vector(16, S8)) + Opc = AArch64::LD2Twov16b; + else if (Ty == LLT::fixed_vector(4, S16)) + Opc = AArch64::LD2Twov4h; + else if (Ty == LLT::fixed_vector(8, S16)) + Opc = AArch64::LD2Twov8h; + else if (Ty == LLT::fixed_vector(2, S32)) + Opc = AArch64::LD2Twov2s; + else if (Ty == LLT::fixed_vector(4, S32)) + Opc = AArch64::LD2Twov4s; + else if (Ty == LLT::fixed_vector(2, S64) || Ty == LLT::fixed_vector(2, P0)) + Opc = AArch64::LD2Twov2d; + else if (Ty == S64 || Ty == P0) + Opc = AArch64::LD1Twov1d; + else + llvm_unreachable("Unexpected type for ld2!"); + unsigned SubReg = + Ty.getSizeInBits() == 64 ? AArch64::dsub0 : AArch64::qsub0; + // This will be selected as a load into a wide register, which is broken + // into two vectors subregister copies. + auto Load = MIB.buildInstr(Opc, {Ty}, {Ptr}); + Load.cloneMemRefs(I); + constrainSelectedInstRegOperands(*Load, TII, TRI, RBI); + Register SelectedLoadDst = Load->getOperand(0).getReg(); + // Emit the subreg copies and immediately select them. + // FIXME: We should refactor our copy code into an emitCopy helper and + // clean up uses of this pattern elsewhere in the selector. + auto Vec1 = MIB.buildInstr(TargetOpcode::COPY, {Dst1}, {}) + .addReg(SelectedLoadDst, 0, SubReg); + auto Vec2 = MIB.buildInstr(AArch64::COPY, {Dst2}, {}) + .addReg(SelectedLoadDst, 0, SubReg + 1); + selectCopy(*Vec1, TII, MRI, TRI, RBI); + selectCopy(*Vec2, TII, MRI, TRI, RBI); + break; + } case Intrinsic::aarch64_neon_st2: { Register Src1 = I.getOperand(1).getReg(); Register Src2 = I.getOperand(2).getReg(); Register Ptr = I.getOperand(3).getReg(); LLT Ty = MRI.getType(Src1); - const LLT S8 = LLT::scalar(8); - const LLT S16 = LLT::scalar(16); - const LLT S32 = LLT::scalar(32); - const LLT S64 = LLT::scalar(64); - const LLT P0 = LLT::pointer(0, 64); unsigned Opc; if (Ty == LLT::fixed_vector(8, S8)) Opc = AArch64::ST2Twov8b; diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/select-ld2.mir b/llvm/test/CodeGen/AArch64/GlobalISel/select-ld2.mir new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/GlobalISel/select-ld2.mir @@ -0,0 +1,232 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -mtriple=aarch64 -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck %s +... +--- +name: LD2Twov8b +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.0: + liveins: $x0, $x1, $x2 + ; CHECK-LABEL: name: LD2Twov8b + ; CHECK: liveins: $x0, $x1, $x2 + ; CHECK: %ptr:gpr64sp = COPY $x0 + ; CHECK: [[LD2Twov8b:%[0-9]+]]:dd = LD2Twov8b %ptr :: (load (<8 x s64>)) + ; CHECK: %dst1:fpr64 = COPY [[LD2Twov8b]].dsub0 + ; CHECK: %dst2:fpr64 = COPY [[LD2Twov8b]].dsub1 + ; CHECK: $d0 = COPY %dst1 + ; CHECK: $d1 = COPY %dst2 + ; CHECK: RET_ReallyLR implicit $d0, implicit $d1 + %ptr:gpr(p0) = COPY $x0 + %dst1:fpr(<8 x s8>), %dst2:fpr(<8 x s8>) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.aarch64.neon.ld2), %ptr(p0) :: (load (<8 x s64>)) + $d0 = COPY %dst1(<8 x s8>) + $d1 = COPY %dst2(<8 x s8>) + RET_ReallyLR implicit $d0, implicit $d1 +... +--- +name: LD2Twov16b +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.0: + liveins: $x0, $x1, $x2 + ; CHECK-LABEL: name: LD2Twov16b + ; CHECK: liveins: $x0, $x1, $x2 + ; CHECK: %ptr:gpr64sp = COPY $x0 + ; CHECK: [[LD2Twov16b:%[0-9]+]]:qq = LD2Twov16b %ptr :: (load (<16 x s64>)) + ; CHECK: %dst1:fpr128 = COPY [[LD2Twov16b]].qsub0 + ; CHECK: %dst2:fpr128 = COPY [[LD2Twov16b]].qsub1 + ; CHECK: $q0 = COPY %dst1 + ; CHECK: $q1 = COPY %dst2 + ; CHECK: RET_ReallyLR implicit $q0, implicit $q1 + %ptr:gpr(p0) = COPY $x0 + %dst1:fpr(<16 x s8>), %dst2:fpr(<16 x s8>) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.aarch64.neon.ld2), %ptr(p0) :: (load (<16 x s64>)) + $q0 = COPY %dst1(<16 x s8>) + $q1 = COPY %dst2(<16 x s8>) + RET_ReallyLR implicit $q0, implicit $q1 +... +--- +name: LD2Twov4h +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.0: + liveins: $x0, $x1, $x2 + ; CHECK-LABEL: name: LD2Twov4h + ; CHECK: liveins: $x0, $x1, $x2 + ; CHECK: %ptr:gpr64sp = COPY $x0 + ; CHECK: [[LD2Twov4h:%[0-9]+]]:dd = LD2Twov4h %ptr :: (load (<4 x s64>)) + ; CHECK: %dst1:fpr64 = COPY [[LD2Twov4h]].dsub0 + ; CHECK: %dst2:fpr64 = COPY [[LD2Twov4h]].dsub1 + ; CHECK: $d0 = COPY %dst1 + ; CHECK: $d1 = COPY %dst2 + ; CHECK: RET_ReallyLR implicit $d0, implicit $d1 + %ptr:gpr(p0) = COPY $x0 + %dst1:fpr(<4 x s16>), %dst2:fpr(<4 x s16>) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.aarch64.neon.ld2), %ptr(p0) :: (load (<4 x s64>)) + $d0 = COPY %dst1(<4 x s16>) + $d1 = COPY %dst2(<4 x s16>) + RET_ReallyLR implicit $d0, implicit $d1 +... +--- +name: LD2Twov8h +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.0: + liveins: $x0, $x1, $x2 + ; CHECK-LABEL: name: LD2Twov8h + ; CHECK: liveins: $x0, $x1, $x2 + ; CHECK: %ptr:gpr64sp = COPY $x0 + ; CHECK: [[LD2Twov8h:%[0-9]+]]:qq = LD2Twov8h %ptr :: (load (<8 x s64>)) + ; CHECK: %dst1:fpr128 = COPY [[LD2Twov8h]].qsub0 + ; CHECK: %dst2:fpr128 = COPY [[LD2Twov8h]].qsub1 + ; CHECK: $q0 = COPY %dst1 + ; CHECK: $q1 = COPY %dst2 + ; CHECK: RET_ReallyLR implicit $q0, implicit $q1 + %ptr:gpr(p0) = COPY $x0 + %dst1:fpr(<8 x s16>), %dst2:fpr(<8 x s16>) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.aarch64.neon.ld2), %ptr(p0) :: (load (<8 x s64>)) + $q0 = COPY %dst1(<8 x s16>) + $q1 = COPY %dst2(<8 x s16>) + RET_ReallyLR implicit $q0, implicit $q1 +... +--- +name: LD2Twov2s +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.0: + liveins: $x0, $x1, $x2 + ; CHECK-LABEL: name: LD2Twov2s + ; CHECK: liveins: $x0, $x1, $x2 + ; CHECK: %ptr:gpr64sp = COPY $x0 + ; CHECK: [[LD2Twov2s:%[0-9]+]]:dd = LD2Twov2s %ptr :: (load (<2 x s64>)) + ; CHECK: %dst1:fpr64 = COPY [[LD2Twov2s]].dsub0 + ; CHECK: %dst2:fpr64 = COPY [[LD2Twov2s]].dsub1 + ; CHECK: $d0 = COPY %dst1 + ; CHECK: $d1 = COPY %dst2 + ; CHECK: RET_ReallyLR implicit $d0, implicit $d1 + %ptr:gpr(p0) = COPY $x0 + %dst1:fpr(<2 x s32>), %dst2:fpr(<2 x s32>) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.aarch64.neon.ld2), %ptr(p0) :: (load (<2 x s64>)) + $d0 = COPY %dst1(<2 x s32>) + $d1 = COPY %dst2(<2 x s32>) + RET_ReallyLR implicit $d0, implicit $d1 +... +--- +name: LD2Twov4s +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.0: + liveins: $x0, $x1, $x2 + ; CHECK-LABEL: name: LD2Twov4s + ; CHECK: liveins: $x0, $x1, $x2 + ; CHECK: %ptr:gpr64sp = COPY $x0 + ; CHECK: [[LD2Twov4s:%[0-9]+]]:qq = LD2Twov4s %ptr :: (load (<4 x s64>)) + ; CHECK: %dst1:fpr128 = COPY [[LD2Twov4s]].qsub0 + ; CHECK: %dst2:fpr128 = COPY [[LD2Twov4s]].qsub1 + ; CHECK: $q0 = COPY %dst1 + ; CHECK: $q1 = COPY %dst2 + ; CHECK: RET_ReallyLR implicit $q0, implicit $q1 + %ptr:gpr(p0) = COPY $x0 + %dst1:fpr(<4 x s32>), %dst2:fpr(<4 x s32>) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.aarch64.neon.ld2), %ptr(p0) :: (load (<4 x s64>)) + $q0 = COPY %dst1(<4 x s32>) + $q1 = COPY %dst2(<4 x s32>) + RET_ReallyLR implicit $q0, implicit $q1 +... +--- +name: LD2Twov2d_s64 +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.0: + liveins: $x0, $x1, $x2 + ; CHECK-LABEL: name: LD2Twov2d_s64 + ; CHECK: liveins: $x0, $x1, $x2 + ; CHECK: %ptr:gpr64sp = COPY $x0 + ; CHECK: [[LD2Twov2d:%[0-9]+]]:qq = LD2Twov2d %ptr :: (load (<2 x s64>)) + ; CHECK: %dst1:fpr128 = COPY [[LD2Twov2d]].qsub0 + ; CHECK: %dst2:fpr128 = COPY [[LD2Twov2d]].qsub1 + ; CHECK: $q0 = COPY %dst1 + ; CHECK: $q1 = COPY %dst2 + ; CHECK: RET_ReallyLR implicit $q0, implicit $q1 + %ptr:gpr(p0) = COPY $x0 + %dst1:fpr(<2 x s64>), %dst2:fpr(<2 x s64>) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.aarch64.neon.ld2), %ptr(p0) :: (load (<2 x s64>)) + $q0 = COPY %dst1(<2 x s64>) + $q1 = COPY %dst2(<2 x s64>) + RET_ReallyLR implicit $q0, implicit $q1 +... +--- +name: LD2Twov2d_p0 +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.0: + liveins: $x0, $x1, $x2 + ; CHECK-LABEL: name: LD2Twov2d_p0 + ; CHECK: liveins: $x0, $x1, $x2 + ; CHECK: %ptr:gpr64sp = COPY $x0 + ; CHECK: [[LD2Twov2d:%[0-9]+]]:qq = LD2Twov2d %ptr :: (load (<2 x p0>)) + ; CHECK: %dst1:fpr128 = COPY [[LD2Twov2d]].qsub0 + ; CHECK: %dst2:fpr128 = COPY [[LD2Twov2d]].qsub1 + ; CHECK: $q0 = COPY %dst1 + ; CHECK: $q1 = COPY %dst2 + ; CHECK: RET_ReallyLR implicit $q0, implicit $q1 + %ptr:gpr(p0) = COPY $x0 + %dst1:fpr(<2 x p0>), %dst2:fpr(<2 x p0>) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.aarch64.neon.ld2), %ptr(p0) :: (load (<2 x p0>)) + $q0 = COPY %dst1(<2 x p0>) + $q1 = COPY %dst2(<2 x p0>) + RET_ReallyLR implicit $q0, implicit $q1 +... +--- +name: LD1Twov1d_s64 +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.0: + liveins: $x0, $x1, $x2 + ; CHECK-LABEL: name: LD1Twov1d_s64 + ; CHECK: liveins: $x0, $x1, $x2 + ; CHECK: %ptr:gpr64sp = COPY $x0 + ; CHECK: [[LD1Twov1d:%[0-9]+]]:dd = LD1Twov1d %ptr :: (load (s64)) + ; CHECK: %dst1:fpr64 = COPY [[LD1Twov1d]].dsub0 + ; CHECK: %dst2:fpr64 = COPY [[LD1Twov1d]].dsub1 + ; CHECK: $d0 = COPY %dst1 + ; CHECK: $d1 = COPY %dst2 + ; CHECK: RET_ReallyLR implicit $d0, implicit $d1 + %ptr:gpr(p0) = COPY $x0 + %dst1:fpr(s64), %dst2:fpr(s64) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.aarch64.neon.ld2), %ptr(p0) :: (load (s64)) + $d0 = COPY %dst1(s64) + $d1 = COPY %dst2(s64) + RET_ReallyLR implicit $d0, implicit $d1 +... +--- +name: LD1Twov1d_p0 +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.0: + liveins: $x0, $x1, $x2 + ; CHECK-LABEL: name: LD1Twov1d_p0 + ; CHECK: liveins: $x0, $x1, $x2 + ; CHECK: %ptr:gpr64sp = COPY $x0 + ; CHECK: [[LD1Twov1d:%[0-9]+]]:dd = LD1Twov1d %ptr :: (load (p0)) + ; CHECK: %dst1:fpr64 = COPY [[LD1Twov1d]].dsub0 + ; CHECK: %dst2:fpr64 = COPY [[LD1Twov1d]].dsub1 + ; CHECK: $d0 = COPY %dst1 + ; CHECK: $d1 = COPY %dst2 + ; CHECK: RET_ReallyLR implicit $d0, implicit $d1 + %ptr:gpr(p0) = COPY $x0 + %dst1:fpr(p0), %dst2:fpr(p0) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.aarch64.neon.ld2), %ptr(p0) :: (load (p0)) + $d0 = COPY %dst1(p0) + $d1 = COPY %dst2(p0) + RET_ReallyLR implicit $d0, implicit $d1