Index: llvm/lib/Target/AArch64/GISel/AArch64CallLowering.cpp =================================================================== --- llvm/lib/Target/AArch64/GISel/AArch64CallLowering.cpp +++ llvm/lib/Target/AArch64/GISel/AArch64CallLowering.cpp @@ -399,7 +399,8 @@ ExtendOp = TargetOpcode::G_ZEXT; LLT NewLLT(NewVT); - LLT OldLLT(MVT::getVT(CurArgInfo.Ty)); + MVT OldVT = MVT::getVT(CurArgInfo.Ty); + LLT OldLLT(OldVT); CurArgInfo.Ty = EVT(NewVT).getTypeForEVT(Ctx); // Instead of an extend, we might have a vector type which needs // padding with more elements, e.g. <2 x half> -> <4 x half>. @@ -410,10 +411,13 @@ CurVReg = MIRBuilder.buildPadVectorWithUndefElements(NewLLT, CurVReg) .getReg(0); - } else { + } else if (OldVT.isInteger() && NewVT.isInteger()) { // Just do a vector extend. CurVReg = MIRBuilder.buildInstr(ExtendOp, {NewLLT}, {CurVReg}) .getReg(0); + } else { + LLVM_DEBUG(dbgs() << "Could not handle float type\n"); + return false; } } else if (NewLLT.getNumElements() >= 2 && NewLLT.getNumElements() <= 8) { Index: llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp =================================================================== --- llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp +++ llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp @@ -167,6 +167,8 @@ bool selectInsertElt(MachineInstr &I, MachineRegisterInfo &MRI); bool tryOptConstantBuildVec(MachineInstr &MI, LLT DstTy, MachineRegisterInfo &MRI); + MachineInstr *tryToFoldExtendOfVectorConstant(MachineInstr &MI, LLT DstTy, + MachineRegisterInfo &MRI); /// \returns true if a G_BUILD_VECTOR instruction \p MI can be selected as a /// SUBREG_TO_REG. bool tryOptBuildVecToSubregToReg(MachineInstr &MI, MachineRegisterInfo &MRI); @@ -3199,17 +3201,22 @@ return true; const Register DstReg = I.getOperand(0).getReg(); - const Register SrcReg = I.getOperand(1).getReg(); + const LLT DstTy = MRI.getType(DstReg); + if (tryToFoldExtendOfVectorConstant(I, DstTy, MRI)) + return true; const RegisterBank &RBDst = *RBI.getRegBank(DstReg, MRI, TRI); - if (RBDst.getID() != AArch64::GPRRegBankID) { + const Register SrcReg = I.getOperand(1).getReg(); + if (RBDst.getID() != AArch64::GPRRegBankID && + !MRI.getType(DstReg).isVector()) { LLVM_DEBUG(dbgs() << "G_ANYEXT on bank: " << RBDst << ", expected: GPR\n"); return false; } const RegisterBank &RBSrc = *RBI.getRegBank(SrcReg, MRI, TRI); - if (RBSrc.getID() != AArch64::GPRRegBankID) { + if (RBSrc.getID() != AArch64::GPRRegBankID && + !MRI.getType(SrcReg).isVector()) { LLVM_DEBUG(dbgs() << "G_ANYEXT on bank: " << RBSrc << ", expected: GPR\n"); return false; @@ -5588,6 +5595,46 @@ return true; } +// Fold the vector const similar to SelectionDAG::FoldConstantArithmetic. +MachineInstr *AArch64InstructionSelector::tryToFoldExtendOfVectorConstant( + MachineInstr &ExtI, LLT DstTy, MachineRegisterInfo &MRI) { + assert(ExtI.getOpcode() == TargetOpcode::G_ANYEXT); + const Register SrcReg = ExtI.getOperand(1).getReg(); + MachineInstr *SrcMI = MRI.getUniqueVRegDef(SrcReg); + if (SrcMI->getOpcode() != TargetOpcode::G_BUILD_VECTOR) + return nullptr; + unsigned DstSize = DstTy.getSizeInBits(); + assert(DstSize <= 128 && "Unexpected build_vec type!"); + if (DstSize < 32) + return nullptr; + // Check if we're building a constant vector, in which case we want to + // generate a constant pool load instead of a vector insert sequence. + SmallVector Csts; + LLT SVT = DstTy.getElementType(); + unsigned OpTySize = DstTy.getScalarSizeInBits(); + for (unsigned Idx = 1; Idx < SrcMI->getNumOperands(); ++Idx) { + // Try to find G_CONSTANT + auto *OpMI = getOpcodeDef(TargetOpcode::G_CONSTANT, + SrcMI->getOperand(Idx).getReg(), MRI); + if (!OpMI) + return nullptr; + + // Extend the type for each const value element. + APInt Immed = + APInt(OpTySize, OpMI->getOperand(1).getCImm()->getZExtValue()); + auto C = MIB.buildConstant(SVT, Immed); + MRI.setRegClass(C.getReg(0), &AArch64::GPR64RegClass); + Csts.emplace_back(const_cast(C->getOperand(1).getCImm())); + } + Constant *CV = ConstantVector::get(Csts); + MachineInstr *CVec = + emitConstantVector(ExtI.getOperand(0).getReg(), CV, MIB, MRI); + if (!CVec) + return nullptr; + ExtI.eraseFromParent(); + return CVec; +} + bool AArch64InstructionSelector::tryOptBuildVecToSubregToReg( MachineInstr &I, MachineRegisterInfo &MRI) { // Given: Index: llvm/test/CodeGen/AArch64/extract-sext-zext.ll =================================================================== --- llvm/test/CodeGen/AArch64/extract-sext-zext.ll +++ llvm/test/CodeGen/AArch64/extract-sext-zext.ll @@ -494,3 +494,12 @@ %u = xor i64 %s, %t ret i64 %u } + +define <2 x i16> @extend_v2i16() { +; CHECK-LABEL: extend_v2i16: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, .LCPI42_0 +; CHECK-NEXT: ldr d0, [x8, :lo12:.LCPI42_0] +; CHECK-NEXT: ret + ret <2 x i16> +}