Index: llvm/lib/Target/AArch64/GISel/AArch64CallLowering.cpp =================================================================== --- llvm/lib/Target/AArch64/GISel/AArch64CallLowering.cpp +++ llvm/lib/Target/AArch64/GISel/AArch64CallLowering.cpp @@ -399,7 +399,8 @@ ExtendOp = TargetOpcode::G_ZEXT; LLT NewLLT(NewVT); - LLT OldLLT(MVT::getVT(CurArgInfo.Ty)); + MVT OldVT = MVT::getVT(CurArgInfo.Ty); + LLT OldLLT(OldVT); CurArgInfo.Ty = EVT(NewVT).getTypeForEVT(Ctx); // Instead of an extend, we might have a vector type which needs // padding with more elements, e.g. <2 x half> -> <4 x half>. @@ -410,10 +411,13 @@ CurVReg = MIRBuilder.buildPadVectorWithUndefElements(NewLLT, CurVReg) .getReg(0); - } else { + } else if (OldVT.isInteger() && NewVT.isInteger()) { // Just do a vector extend. CurVReg = MIRBuilder.buildInstr(ExtendOp, {NewLLT}, {CurVReg}) .getReg(0); + } else { + LLVM_DEBUG(dbgs() << "Could not handle float type\n"); + return false; } } else if (NewLLT.getNumElements() >= 2 && NewLLT.getNumElements() <= 8) { Index: llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp =================================================================== --- llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp +++ llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp @@ -167,6 +167,8 @@ bool selectInsertElt(MachineInstr &I, MachineRegisterInfo &MRI); bool tryOptConstantBuildVec(MachineInstr &MI, LLT DstTy, MachineRegisterInfo &MRI); + MachineInstr *tryToFoldExtendOfVectorConstant(MachineInstr &MI, LLT DstTy, + MachineRegisterInfo &MRI); /// \returns true if a G_BUILD_VECTOR instruction \p MI can be selected as a /// SUBREG_TO_REG. bool tryOptBuildVecToSubregToReg(MachineInstr &MI, MachineRegisterInfo &MRI); @@ -3199,18 +3201,23 @@ return true; const Register DstReg = I.getOperand(0).getReg(); - const Register SrcReg = I.getOperand(1).getReg(); + const LLT DstTy = MRI.getType(DstReg); + if (tryToFoldExtendOfVectorConstant(I, DstTy, MRI)) + return true; const RegisterBank &RBDst = *RBI.getRegBank(DstReg, MRI, TRI); - if (RBDst.getID() != AArch64::GPRRegBankID) { - LLVM_DEBUG(dbgs() << "G_ANYEXT on bank: " << RBDst + const Register SrcReg = I.getOperand(1).getReg(); + if (RBDst.getID() != AArch64::GPRRegBankID && + !MRI.getType(DstReg).isVector()) { + LLVM_DEBUG(dbgs() << "Scalar G_ANYEXT on bank: " << RBDst << ", expected: GPR\n"); return false; } const RegisterBank &RBSrc = *RBI.getRegBank(SrcReg, MRI, TRI); - if (RBSrc.getID() != AArch64::GPRRegBankID) { - LLVM_DEBUG(dbgs() << "G_ANYEXT on bank: " << RBSrc + if (RBSrc.getID() != AArch64::GPRRegBankID && + !MRI.getType(SrcReg).isVector()) { + LLVM_DEBUG(dbgs() << "Scalar G_ANYEXT on bank: " << RBSrc << ", expected: GPR\n"); return false; } @@ -5588,6 +5595,49 @@ return true; } +// Fold the vector const similar to SelectionDAG::FoldConstantArithmetic. +MachineInstr *AArch64InstructionSelector::tryToFoldExtendOfVectorConstant( + MachineInstr &ExtI, LLT DstTy, MachineRegisterInfo &MRI) { + assert(ExtI.getOpcode() == TargetOpcode::G_ANYEXT); + const Register SrcReg = ExtI.getOperand(1).getReg(); + MachineInstr *SrcMI = MRI.getUniqueVRegDef(SrcReg); + if (SrcMI->getOpcode() != TargetOpcode::G_BUILD_VECTOR) + return nullptr; + unsigned DstSize = DstTy.getSizeInBits(); + assert(DstSize <= 128 && "Unexpected build_vec type!"); + // Check for legalize vector type + if (DstSize < 32) + return nullptr; + // Check if we're building a constant vector, in which case we want to + // generate a constant pool load instead of a vector insert sequence. + SmallVector Csts; + LLT SVT = DstTy.getElementType(); + unsigned OpTySize = DstTy.getScalarSizeInBits(); + for (unsigned Idx = 1; Idx < SrcMI->getNumOperands(); ++Idx) { + // Try to find G_CONSTANT + auto *OpMI = getOpcodeDef(TargetOpcode::G_CONSTANT, + SrcMI->getOperand(Idx).getReg(), MRI); + // Look for G_BUILD_VECTORs with all constant source operands + if (!OpMI) + return nullptr; + + // Extend the type for each const value element. + APInt Immed = + APInt(OpTySize, OpMI->getOperand(1).getCImm()->getZExtValue()); + auto C = MIB.buildConstant(SVT, Immed); + MRI.setRegClass(C.getReg(0), &AArch64::GPR64RegClass); + Csts.emplace_back(const_cast(C->getOperand(1).getCImm())); + C->eraseFromParent(); + } + Constant *CV = ConstantVector::get(Csts); + // Try to replace ExtI with a constant vector. + MachineInstr *MaybeCVec = + emitConstantVector(ExtI.getOperand(0).getReg(), CV, MIB, MRI); + if (MaybeCVec) + ExtI.eraseFromParent(); + return MaybeCVec; +} + bool AArch64InstructionSelector::tryOptBuildVecToSubregToReg( MachineInstr &I, MachineRegisterInfo &MRI) { // Given: Index: llvm/test/CodeGen/AArch64/GlobalISel/select-neon-vector-const.mir =================================================================== --- /dev/null +++ llvm/test/CodeGen/AArch64/GlobalISel/select-neon-vector-const.mir @@ -0,0 +1,34 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 3 +# RUN: llc -mtriple=aarch64 -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck %s +--- | + target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128" + target triple = "aarch64-unknown-unknown-eabi" + + define <2 x i16> @extend_v2i16() { + ret <2 x i16> + } +... +--- +name: extend_v2i16 +legalized: true +regBankSelected: true +tracksRegLiveness: true +registers: + - { id: 0, class: fpr, preferred-register: '' } + - { id: 1, class: gpr, preferred-register: '' } + - { id: 2, class: gpr, preferred-register: '' } + - { id: 3, class: fpr, preferred-register: '' } +body: | + bb.1 (%ir-block.0): + ; CHECK-LABEL: name: extend_v2i16 + ; CHECK: [[ADRP:%[0-9]+]]:gpr64common = ADRP target-flags(aarch64-page) %const.0 + ; CHECK-NEXT: [[LDRDui:%[0-9]+]]:fpr64 = LDRDui [[ADRP]], target-flags(aarch64-pageoff, aarch64-nc) %const.0 :: (load (s64) from constant-pool) + ; CHECK-NEXT: $d0 = COPY [[LDRDui]] + ; CHECK-NEXT: RET_ReallyLR implicit $d0 + %2:gpr(s16) = G_CONSTANT i16 1 + %1:gpr(s16) = G_CONSTANT i16 0 + %0:fpr(<2 x s16>) = G_BUILD_VECTOR %1(s16), %2(s16) + %3:fpr(<2 x s32>) = G_ANYEXT %0(<2 x s16>) + $d0 = COPY %3(<2 x s32>) + RET_ReallyLR implicit $d0 +... Index: llvm/test/CodeGen/AArch64/extract-sext-zext.ll =================================================================== --- llvm/test/CodeGen/AArch64/extract-sext-zext.ll +++ llvm/test/CodeGen/AArch64/extract-sext-zext.ll @@ -494,3 +494,12 @@ %u = xor i64 %s, %t ret i64 %u } + +define <2 x i16> @extend_v2i16() { +; CHECK-LABEL: extend_v2i16: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, .LCPI42_0 +; CHECK-NEXT: ldr d0, [x8, :lo12:.LCPI42_0] +; CHECK-NEXT: ret + ret <2 x i16> +}