diff --git a/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp b/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp --- a/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp +++ b/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp @@ -164,6 +164,9 @@ bool selectInsertElt(MachineInstr &I, MachineRegisterInfo &MRI); bool tryOptConstantBuildVec(MachineInstr &MI, LLT DstTy, MachineRegisterInfo &MRI); + /// \returns true if a G_BUILD_VECTOR instruction \p MI can be selected as a + /// SUBREG_TO_REG. + bool tryOptBuildVecToSubregToReg(MachineInstr &MI, MachineRegisterInfo &MRI); bool selectBuildVector(MachineInstr &I, MachineRegisterInfo &MRI); bool selectMergeValues(MachineInstr &I, MachineRegisterInfo &MRI); bool selectUnmergeValues(MachineInstr &I, MachineRegisterInfo &MRI); @@ -4963,6 +4966,47 @@ return true; } +bool AArch64InstructionSelector::tryOptBuildVecToSubregToReg( + MachineInstr &I, MachineRegisterInfo &MRI) { + // Given: + // %vec = G_BUILD_VECTOR %elt, %undef, %undef, ... %undef + // + // Select the G_BUILD_VECTOR as a SUBREG_TO_REG from %elt. + Register Dst = I.getOperand(0).getReg(); + Register EltReg = I.getOperand(1).getReg(); + LLT EltTy = MRI.getType(EltReg); + // If the index isn't on the same bank as its elements, then this can't be a + // SUBREG_TO_REG. + const RegisterBank &EltRB = *RBI.getRegBank(EltReg, MRI, TRI); + const RegisterBank &DstRB = *RBI.getRegBank(Dst, MRI, TRI); + if (EltRB != DstRB) + return false; + if (any_of(make_range(I.operands_begin() + 2, I.operands_end()), + [&MRI](const MachineOperand &Op) { + return !getOpcodeDef(TargetOpcode::G_IMPLICIT_DEF, Op.getReg(), + MRI); + })) + return false; + unsigned SubReg; + const TargetRegisterClass *EltRC = + getMinClassForRegBank(EltRB, EltTy.getSizeInBits()); + if (!EltRC) + return false; + const TargetRegisterClass *DstRC = + getMinClassForRegBank(DstRB, MRI.getType(Dst).getSizeInBits()); + if (!DstRC) + return false; + if (!getSubRegForClass(EltRC, TRI, SubReg)) + return false; + auto SubregToReg = MIB.buildInstr(AArch64::SUBREG_TO_REG, {Dst}, {}) + .addImm(0) + .addUse(EltReg) + .addImm(SubReg); + I.eraseFromParent(); + constrainSelectedInstRegOperands(*SubregToReg, TII, TRI, RBI); + return RBI.constrainGenericRegister(Dst, *DstRC, MRI); +} + bool AArch64InstructionSelector::selectBuildVector(MachineInstr &I, MachineRegisterInfo &MRI) { assert(I.getOpcode() == TargetOpcode::G_BUILD_VECTOR); @@ -4974,6 +5018,9 @@ if (tryOptConstantBuildVec(I, DstTy, MRI)) return true; + if (tryOptBuildVecToSubregToReg(I, MRI)) + return true; + if (EltSize < 16 || EltSize > 64) return false; // Don't support all element types yet. const RegisterBank &RB = *RBI.getRegBank(I.getOperand(1).getReg(), MRI, TRI); diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/select-build-vector.mir b/llvm/test/CodeGen/AArch64/GlobalISel/select-build-vector.mir --- a/llvm/test/CodeGen/AArch64/GlobalISel/select-build-vector.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/select-build-vector.mir @@ -228,3 +228,55 @@ $d0 = COPY %1(<8 x s8>) RET_ReallyLR ... +--- +name: undef_elts_to_subreg_to_reg +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.1: + liveins: $s0 + ; We have a BUILD_VECTOR whose 0th element is a subregister of the wide + ; register class. Everything else is undef. This is a SUBREG_TO_REG. + + ; CHECK-LABEL: name: undef_elts_to_subreg_to_reg + ; CHECK: liveins: $s0 + ; CHECK: %val:fpr32 = COPY $s0 + ; CHECK: %bv:fpr128 = SUBREG_TO_REG 0, %val, %subreg.ssub + ; CHECK: $q0 = COPY %bv + ; CHECK: RET_ReallyLR implicit $q0 + %val:fpr(s32) = COPY $s0 + %undef:fpr(s32) = G_IMPLICIT_DEF + %bv:fpr(<4 x s32>) = G_BUILD_VECTOR %val(s32), %undef(s32), %undef(s32), %undef(s32) + $q0 = COPY %bv(<4 x s32>) + RET_ReallyLR implicit $q0 +... +... +--- +name: undef_elts_different_regbanks +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.1: + liveins: $w0 + ; Element is not a subregister of the wide register class. This is not a + ; SUBREG_TO_REG. + + ; CHECK-LABEL: name: undef_elts_different_regbanks + ; CHECK: liveins: $w0 + ; CHECK: %val:gpr32all = COPY $w0 + ; CHECK: %undef:gpr32 = IMPLICIT_DEF + ; CHECK: [[DEF:%[0-9]+]]:fpr128 = IMPLICIT_DEF + ; CHECK: [[INSERT_SUBREG:%[0-9]+]]:fpr128 = INSERT_SUBREG [[DEF]], %val, %subreg.ssub + ; CHECK: [[INSvi32gpr:%[0-9]+]]:fpr128 = INSvi32gpr [[INSERT_SUBREG]], 1, %undef + ; CHECK: [[INSvi32gpr1:%[0-9]+]]:fpr128 = INSvi32gpr [[INSvi32gpr]], 2, %undef + ; CHECK: %bv:fpr128 = INSvi32gpr [[INSvi32gpr1]], 3, %undef + ; CHECK: $q0 = COPY %bv + ; CHECK: RET_ReallyLR implicit $q0 + %val:gpr(s32) = COPY $w0 + %undef:gpr(s32) = G_IMPLICIT_DEF + %bv:fpr(<4 x s32>) = G_BUILD_VECTOR %val(s32), %undef(s32), %undef(s32), %undef(s32) + $q0 = COPY %bv(<4 x s32>) + RET_ReallyLR implicit $q0 +... diff --git a/llvm/test/CodeGen/AArch64/arm64-rev.ll b/llvm/test/CodeGen/AArch64/arm64-rev.ll --- a/llvm/test/CodeGen/AArch64/arm64-rev.ll +++ b/llvm/test/CodeGen/AArch64/arm64-rev.ll @@ -572,10 +572,7 @@ ; GISEL-LABEL: float_vrev64: ; GISEL: // %bb.0: // %entry ; GISEL-NEXT: movi d0, #0000000000000000 -; GISEL-NEXT: mov.s v0[1], v0[0] -; GISEL-NEXT: mov.s v0[2], v0[0] ; GISEL-NEXT: adrp x8, .LCPI28_0 -; GISEL-NEXT: mov.s v0[3], v0[0] ; GISEL-NEXT: ldr q1, [x0] ; GISEL-NEXT: ldr q2, [x8, :lo12:.LCPI28_0] ; GISEL-NEXT: tbl.16b v0, { v0, v1 }, v2