diff --git a/clang/lib/CodeGen/CGDeclCXX.cpp b/clang/lib/CodeGen/CGDeclCXX.cpp index bfefd7956157..3f6d0e23c685 100644 --- a/clang/lib/CodeGen/CGDeclCXX.cpp +++ b/clang/lib/CodeGen/CGDeclCXX.cpp @@ -1,833 +1,817 @@ //===--- CGDeclCXX.cpp - Emit LLVM Code for C++ declarations --------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // // This contains code dealing with code generation of C++ declarations // //===----------------------------------------------------------------------===// #include "CGCXXABI.h" #include "CGObjCRuntime.h" #include "CGOpenMPRuntime.h" #include "CodeGenFunction.h" #include "TargetInfo.h" #include "clang/AST/Attr.h" #include "clang/Basic/LangOptions.h" #include "llvm/ADT/StringExtras.h" #include "llvm/IR/Intrinsics.h" #include "llvm/IR/MDBuilder.h" #include "llvm/Support/Path.h" using namespace clang; using namespace CodeGen; static void EmitDeclInit(CodeGenFunction &CGF, const VarDecl &D, ConstantAddress DeclPtr) { assert( (D.hasGlobalStorage() || (D.hasLocalStorage() && CGF.getContext().getLangOpts().OpenCLCPlusPlus)) && "VarDecl must have global or local (in the case of OpenCL) storage!"); assert(!D.getType()->isReferenceType() && "Should not call EmitDeclInit on a reference!"); QualType type = D.getType(); LValue lv = CGF.MakeAddrLValue(DeclPtr, type); const Expr *Init = D.getInit(); switch (CGF.getEvaluationKind(type)) { case TEK_Scalar: { CodeGenModule &CGM = CGF.CGM; if (lv.isObjCStrong()) CGM.getObjCRuntime().EmitObjCGlobalAssign(CGF, CGF.EmitScalarExpr(Init), DeclPtr, D.getTLSKind()); else if (lv.isObjCWeak()) CGM.getObjCRuntime().EmitObjCWeakAssign(CGF, CGF.EmitScalarExpr(Init), DeclPtr); else CGF.EmitScalarInit(Init, &D, lv, false); return; } case TEK_Complex: CGF.EmitComplexExprIntoLValue(Init, lv, /*isInit*/ true); return; case TEK_Aggregate: CGF.EmitAggExpr(Init, AggValueSlot::forLValue(lv, CGF, AggValueSlot::IsDestructed, AggValueSlot::DoesNotNeedGCBarriers, AggValueSlot::IsNotAliased, AggValueSlot::DoesNotOverlap)); return; } llvm_unreachable("bad evaluation kind"); } /// Emit code to cause the destruction of the given variable with /// static storage duration. static void EmitDeclDestroy(CodeGenFunction &CGF, const VarDecl &D, ConstantAddress Addr) { // Honor __attribute__((no_destroy)) and bail instead of attempting // to emit a reference to a possibly nonexistent destructor, which // in turn can cause a crash. This will result in a global constructor // that isn't balanced out by a destructor call as intended by the // attribute. This also checks for -fno-c++-static-destructors and // bails even if the attribute is not present. QualType::DestructionKind DtorKind = D.needsDestruction(CGF.getContext()); // FIXME: __attribute__((cleanup)) ? switch (DtorKind) { case QualType::DK_none: return; case QualType::DK_cxx_destructor: break; case QualType::DK_objc_strong_lifetime: case QualType::DK_objc_weak_lifetime: case QualType::DK_nontrivial_c_struct: // We don't care about releasing objects during process teardown. assert(!D.getTLSKind() && "should have rejected this"); return; } llvm::FunctionCallee Func; llvm::Constant *Argument; CodeGenModule &CGM = CGF.CGM; QualType Type = D.getType(); // Special-case non-array C++ destructors, if they have the right signature. // Under some ABIs, destructors return this instead of void, and cannot be // passed directly to __cxa_atexit if the target does not allow this // mismatch. const CXXRecordDecl *Record = Type->getAsCXXRecordDecl(); bool CanRegisterDestructor = Record && (!CGM.getCXXABI().HasThisReturn( GlobalDecl(Record->getDestructor(), Dtor_Complete)) || CGM.getCXXABI().canCallMismatchedFunctionType()); // If __cxa_atexit is disabled via a flag, a different helper function is // generated elsewhere which uses atexit instead, and it takes the destructor // directly. bool UsingExternalHelper = !CGM.getCodeGenOpts().CXAAtExit; if (Record && (CanRegisterDestructor || UsingExternalHelper)) { assert(!Record->hasTrivialDestructor()); CXXDestructorDecl *Dtor = Record->getDestructor(); Func = CGM.getAddrAndTypeOfCXXStructor(GlobalDecl(Dtor, Dtor_Complete)); if (CGF.getContext().getLangOpts().OpenCL) { auto DestAS = CGM.getTargetCodeGenInfo().getAddrSpaceOfCxaAtexitPtrParam(); auto DestTy = CGF.getTypes().ConvertType(Type)->getPointerTo( CGM.getContext().getTargetAddressSpace(DestAS)); auto SrcAS = D.getType().getQualifiers().getAddressSpace(); if (DestAS == SrcAS) Argument = llvm::ConstantExpr::getBitCast(Addr.getPointer(), DestTy); else // FIXME: On addr space mismatch we are passing NULL. The generation // of the global destructor function should be adjusted accordingly. Argument = llvm::ConstantPointerNull::get(DestTy); } else { Argument = llvm::ConstantExpr::getBitCast( Addr.getPointer(), CGF.getTypes().ConvertType(Type)->getPointerTo()); } // Otherwise, the standard logic requires a helper function. } else { Func = CodeGenFunction(CGM) .generateDestroyHelper(Addr, Type, CGF.getDestroyer(DtorKind), CGF.needsEHCleanup(DtorKind), &D); Argument = llvm::Constant::getNullValue(CGF.Int8PtrTy); } CGM.getCXXABI().registerGlobalDtor(CGF, D, Func, Argument); } /// Emit code to cause the variable at the given address to be considered as /// constant from this point onwards. static void EmitDeclInvariant(CodeGenFunction &CGF, const VarDecl &D, llvm::Constant *Addr) { return CGF.EmitInvariantStart( Addr, CGF.getContext().getTypeSizeInChars(D.getType())); } void CodeGenFunction::EmitInvariantStart(llvm::Constant *Addr, CharUnits Size) { // Do not emit the intrinsic if we're not optimizing. if (!CGM.getCodeGenOpts().OptimizationLevel) return; // Grab the llvm.invariant.start intrinsic. llvm::Intrinsic::ID InvStartID = llvm::Intrinsic::invariant_start; // Overloaded address space type. llvm::Type *ObjectPtr[1] = {Int8PtrTy}; llvm::Function *InvariantStart = CGM.getIntrinsic(InvStartID, ObjectPtr); // Emit a call with the size in bytes of the object. uint64_t Width = Size.getQuantity(); llvm::Value *Args[2] = { llvm::ConstantInt::getSigned(Int64Ty, Width), llvm::ConstantExpr::getBitCast(Addr, Int8PtrTy)}; Builder.CreateCall(InvariantStart, Args); } void CodeGenFunction::EmitCXXGlobalVarDeclInit(const VarDecl &D, llvm::Constant *DeclPtr, bool PerformInit) { const Expr *Init = D.getInit(); QualType T = D.getType(); // The address space of a static local variable (DeclPtr) may be different // from the address space of the "this" argument of the constructor. In that // case, we need an addrspacecast before calling the constructor. // // struct StructWithCtor { // __device__ StructWithCtor() {...} // }; // __device__ void foo() { // __shared__ StructWithCtor s; // ... // } // // For example, in the above CUDA code, the static local variable s has a // "shared" address space qualifier, but the constructor of StructWithCtor // expects "this" in the "generic" address space. unsigned ExpectedAddrSpace = getContext().getTargetAddressSpace(T); unsigned ActualAddrSpace = DeclPtr->getType()->getPointerAddressSpace(); if (ActualAddrSpace != ExpectedAddrSpace) { llvm::Type *LTy = CGM.getTypes().ConvertTypeForMem(T); llvm::PointerType *PTy = llvm::PointerType::get(LTy, ExpectedAddrSpace); DeclPtr = llvm::ConstantExpr::getAddrSpaceCast(DeclPtr, PTy); } ConstantAddress DeclAddr(DeclPtr, getContext().getDeclAlign(&D)); if (!T->isReferenceType()) { if (getLangOpts().OpenMP && !getLangOpts().OpenMPSimd && D.hasAttr()) { (void)CGM.getOpenMPRuntime().emitThreadPrivateVarDefinition( &D, DeclAddr, D.getAttr()->getLocation(), PerformInit, this); } if (PerformInit) EmitDeclInit(*this, D, DeclAddr); if (CGM.isTypeConstant(D.getType(), true)) EmitDeclInvariant(*this, D, DeclPtr); else EmitDeclDestroy(*this, D, DeclAddr); return; } assert(PerformInit && "cannot have constant initializer which needs " "destruction for reference"); RValue RV = EmitReferenceBindingToExpr(Init); EmitStoreOfScalar(RV.getScalarVal(), DeclAddr, false, T); } /// Create a stub function, suitable for being passed to atexit, /// which passes the given address to the given destructor function. llvm::Function *CodeGenFunction::createAtExitStub(const VarDecl &VD, llvm::FunctionCallee dtor, llvm::Constant *addr) { // Get the destructor function type, void(*)(void). llvm::FunctionType *ty = llvm::FunctionType::get(CGM.VoidTy, false); SmallString<256> FnName; { llvm::raw_svector_ostream Out(FnName); CGM.getCXXABI().getMangleContext().mangleDynamicAtExitDestructor(&VD, Out); } const CGFunctionInfo &FI = CGM.getTypes().arrangeNullaryFunction(); llvm::Function *fn = CGM.CreateGlobalInitOrCleanUpFunction( ty, FnName.str(), FI, VD.getLocation()); CodeGenFunction CGF(CGM); CGF.StartFunction(GlobalDecl(&VD, DynamicInitKind::AtExit), CGM.getContext().VoidTy, fn, FI, FunctionArgList(), VD.getLocation(), VD.getInit()->getExprLoc()); llvm::CallInst *call = CGF.Builder.CreateCall(dtor, addr); // Make sure the call and the callee agree on calling convention. if (auto *dtorFn = dyn_cast( dtor.getCallee()->stripPointerCastsAndAliases())) call->setCallingConv(dtorFn->getCallingConv()); CGF.FinishFunction(); return fn; } /// Register a global destructor using the C atexit runtime function. void CodeGenFunction::registerGlobalDtorWithAtExit(const VarDecl &VD, llvm::FunctionCallee dtor, llvm::Constant *addr) { // Create a function which calls the destructor. llvm::Constant *dtorStub = createAtExitStub(VD, dtor, addr); registerGlobalDtorWithAtExit(dtorStub); } void CodeGenFunction::registerGlobalDtorWithAtExit(llvm::Constant *dtorStub) { // extern "C" int atexit(void (*f)(void)); assert(cast(dtorStub)->getFunctionType() == llvm::FunctionType::get(CGM.VoidTy, false) && "Argument to atexit has a wrong type."); llvm::FunctionType *atexitTy = llvm::FunctionType::get(IntTy, dtorStub->getType(), false); llvm::FunctionCallee atexit = CGM.CreateRuntimeFunction(atexitTy, "atexit", llvm::AttributeList(), /*Local=*/true); if (llvm::Function *atexitFn = dyn_cast(atexit.getCallee())) atexitFn->setDoesNotThrow(); EmitNounwindRuntimeCall(atexit, dtorStub); } llvm::Value * CodeGenFunction::unregisterGlobalDtorWithUnAtExit(llvm::Function *dtorStub) { // The unatexit subroutine unregisters __dtor functions that were previously // registered by the atexit subroutine. If the referenced function is found, // it is removed from the list of functions that are called at normal program // termination and the unatexit returns a value of 0, otherwise a non-zero // value is returned. // // extern "C" int unatexit(void (*f)(void)); assert(dtorStub->getFunctionType() == llvm::FunctionType::get(CGM.VoidTy, false) && "Argument to unatexit has a wrong type."); llvm::FunctionType *unatexitTy = llvm::FunctionType::get(IntTy, {dtorStub->getType()}, /*isVarArg=*/false); llvm::FunctionCallee unatexit = CGM.CreateRuntimeFunction(unatexitTy, "unatexit", llvm::AttributeList()); cast(unatexit.getCallee())->setDoesNotThrow(); return EmitNounwindRuntimeCall(unatexit, dtorStub); } void CodeGenFunction::EmitCXXGuardedInit(const VarDecl &D, llvm::GlobalVariable *DeclPtr, bool PerformInit) { // If we've been asked to forbid guard variables, emit an error now. // This diagnostic is hard-coded for Darwin's use case; we can find // better phrasing if someone else needs it. if (CGM.getCodeGenOpts().ForbidGuardVariables) CGM.Error(D.getLocation(), "this initialization requires a guard variable, which " "the kernel does not support"); CGM.getCXXABI().EmitGuardedInit(*this, D, DeclPtr, PerformInit); } void CodeGenFunction::EmitCXXGuardedInitBranch(llvm::Value *NeedsInit, llvm::BasicBlock *InitBlock, llvm::BasicBlock *NoInitBlock, GuardKind Kind, const VarDecl *D) { assert((Kind == GuardKind::TlsGuard || D) && "no guarded variable"); // A guess at how many times we will enter the initialization of a // variable, depending on the kind of variable. static const uint64_t InitsPerTLSVar = 1024; static const uint64_t InitsPerLocalVar = 1024 * 1024; llvm::MDNode *Weights; if (Kind == GuardKind::VariableGuard && !D->isLocalVarDecl()) { // For non-local variables, don't apply any weighting for now. Due to our // use of COMDATs, we expect there to be at most one initialization of the // variable per DSO, but we have no way to know how many DSOs will try to // initialize the variable. Weights = nullptr; } else { uint64_t NumInits; // FIXME: For the TLS case, collect and use profiling information to // determine a more accurate brach weight. if (Kind == GuardKind::TlsGuard || D->getTLSKind()) NumInits = InitsPerTLSVar; else NumInits = InitsPerLocalVar; // The probability of us entering the initializer is // 1 / (total number of times we attempt to initialize the variable). llvm::MDBuilder MDHelper(CGM.getLLVMContext()); Weights = MDHelper.createBranchWeights(1, NumInits - 1); } Builder.CreateCondBr(NeedsInit, InitBlock, NoInitBlock, Weights); } llvm::Function *CodeGenModule::CreateGlobalInitOrCleanUpFunction( llvm::FunctionType *FTy, const Twine &Name, const CGFunctionInfo &FI, SourceLocation Loc, bool TLS) { llvm::Function *Fn = llvm::Function::Create( FTy, llvm::GlobalValue::InternalLinkage, Name, &getModule()); if (!getLangOpts().AppleKext && !TLS) { // Set the section if needed. if (const char *Section = getTarget().getStaticInitSectionSpecifier()) Fn->setSection(Section); } SetInternalFunctionAttributes(GlobalDecl(), Fn, FI); Fn->setCallingConv(getRuntimeCC()); if (!getLangOpts().Exceptions) Fn->setDoesNotThrow(); if (getLangOpts().Sanitize.has(SanitizerKind::Address) && !isInSanitizerBlacklist(SanitizerKind::Address, Fn, Loc)) Fn->addFnAttr(llvm::Attribute::SanitizeAddress); if (getLangOpts().Sanitize.has(SanitizerKind::KernelAddress) && !isInSanitizerBlacklist(SanitizerKind::KernelAddress, Fn, Loc)) Fn->addFnAttr(llvm::Attribute::SanitizeAddress); if (getLangOpts().Sanitize.has(SanitizerKind::HWAddress) && !isInSanitizerBlacklist(SanitizerKind::HWAddress, Fn, Loc)) Fn->addFnAttr(llvm::Attribute::SanitizeHWAddress); if (getLangOpts().Sanitize.has(SanitizerKind::KernelHWAddress) && !isInSanitizerBlacklist(SanitizerKind::KernelHWAddress, Fn, Loc)) Fn->addFnAttr(llvm::Attribute::SanitizeHWAddress); if (getLangOpts().Sanitize.has(SanitizerKind::MemTag) && !isInSanitizerBlacklist(SanitizerKind::MemTag, Fn, Loc)) Fn->addFnAttr(llvm::Attribute::SanitizeMemTag); if (getLangOpts().Sanitize.has(SanitizerKind::Thread) && !isInSanitizerBlacklist(SanitizerKind::Thread, Fn, Loc)) Fn->addFnAttr(llvm::Attribute::SanitizeThread); if (getLangOpts().Sanitize.has(SanitizerKind::Memory) && !isInSanitizerBlacklist(SanitizerKind::Memory, Fn, Loc)) Fn->addFnAttr(llvm::Attribute::SanitizeMemory); if (getLangOpts().Sanitize.has(SanitizerKind::KernelMemory) && !isInSanitizerBlacklist(SanitizerKind::KernelMemory, Fn, Loc)) Fn->addFnAttr(llvm::Attribute::SanitizeMemory); if (getLangOpts().Sanitize.has(SanitizerKind::SafeStack) && !isInSanitizerBlacklist(SanitizerKind::SafeStack, Fn, Loc)) Fn->addFnAttr(llvm::Attribute::SafeStack); if (getLangOpts().Sanitize.has(SanitizerKind::ShadowCallStack) && !isInSanitizerBlacklist(SanitizerKind::ShadowCallStack, Fn, Loc)) Fn->addFnAttr(llvm::Attribute::ShadowCallStack); - auto RASignKind = getLangOpts().getSignReturnAddressScope(); - if (RASignKind != LangOptions::SignReturnAddressScopeKind::None) { - Fn->addFnAttr("sign-return-address", - RASignKind == LangOptions::SignReturnAddressScopeKind::All - ? "all" - : "non-leaf"); - auto RASignKey = getLangOpts().getSignReturnAddressKey(); - Fn->addFnAttr("sign-return-address-key", - RASignKey == LangOptions::SignReturnAddressKeyKind::AKey - ? "a_key" - : "b_key"); - } - - if (getLangOpts().BranchTargetEnforcement) - Fn->addFnAttr("branch-target-enforcement"); - return Fn; } /// Create a global pointer to a function that will initialize a global /// variable. The user has requested that this pointer be emitted in a specific /// section. void CodeGenModule::EmitPointerToInitFunc(const VarDecl *D, llvm::GlobalVariable *GV, llvm::Function *InitFunc, InitSegAttr *ISA) { llvm::GlobalVariable *PtrArray = new llvm::GlobalVariable( TheModule, InitFunc->getType(), /*isConstant=*/true, llvm::GlobalValue::PrivateLinkage, InitFunc, "__cxx_init_fn_ptr"); PtrArray->setSection(ISA->getSection()); addUsedGlobal(PtrArray); // If the GV is already in a comdat group, then we have to join it. if (llvm::Comdat *C = GV->getComdat()) PtrArray->setComdat(C); } void CodeGenModule::EmitCXXGlobalVarDeclInitFunc(const VarDecl *D, llvm::GlobalVariable *Addr, bool PerformInit) { // According to E.2.3.1 in CUDA-7.5 Programming guide: __device__, // __constant__ and __shared__ variables defined in namespace scope, // that are of class type, cannot have a non-empty constructor. All // the checks have been done in Sema by now. Whatever initializers // are allowed are empty and we just need to ignore them here. if (getLangOpts().CUDAIsDevice && !getLangOpts().GPUAllowDeviceInit && (D->hasAttr() || D->hasAttr() || D->hasAttr())) return; if (getLangOpts().OpenMP && getOpenMPRuntime().emitDeclareTargetVarDefinition(D, Addr, PerformInit)) return; // Check if we've already initialized this decl. auto I = DelayedCXXInitPosition.find(D); if (I != DelayedCXXInitPosition.end() && I->second == ~0U) return; llvm::FunctionType *FTy = llvm::FunctionType::get(VoidTy, false); SmallString<256> FnName; { llvm::raw_svector_ostream Out(FnName); getCXXABI().getMangleContext().mangleDynamicInitializer(D, Out); } // Create a variable initialization function. llvm::Function *Fn = CreateGlobalInitOrCleanUpFunction( FTy, FnName.str(), getTypes().arrangeNullaryFunction(), D->getLocation()); auto *ISA = D->getAttr(); CodeGenFunction(*this).GenerateCXXGlobalVarDeclInitFunc(Fn, D, Addr, PerformInit); llvm::GlobalVariable *COMDATKey = supportsCOMDAT() && D->isExternallyVisible() ? Addr : nullptr; if (D->getTLSKind()) { // FIXME: Should we support init_priority for thread_local? // FIXME: We only need to register one __cxa_thread_atexit function for the // entire TU. CXXThreadLocalInits.push_back(Fn); CXXThreadLocalInitVars.push_back(D); } else if (PerformInit && ISA) { EmitPointerToInitFunc(D, Addr, Fn, ISA); } else if (auto *IPA = D->getAttr()) { OrderGlobalInits Key(IPA->getPriority(), PrioritizedCXXGlobalInits.size()); PrioritizedCXXGlobalInits.push_back(std::make_pair(Key, Fn)); } else if (isTemplateInstantiation(D->getTemplateSpecializationKind()) || getContext().GetGVALinkageForVariable(D) == GVA_DiscardableODR) { // C++ [basic.start.init]p2: // Definitions of explicitly specialized class template static data // members have ordered initialization. Other class template static data // members (i.e., implicitly or explicitly instantiated specializations) // have unordered initialization. // // As a consequence, we can put them into their own llvm.global_ctors entry. // // If the global is externally visible, put the initializer into a COMDAT // group with the global being initialized. On most platforms, this is a // minor startup time optimization. In the MS C++ ABI, there are no guard // variables, so this COMDAT key is required for correctness. AddGlobalCtor(Fn, 65535, COMDATKey); if (getTarget().getCXXABI().isMicrosoft() && COMDATKey) { // In The MS C++, MS add template static data member in the linker // drective. addUsedGlobal(COMDATKey); } } else if (D->hasAttr()) { // SelectAny globals will be comdat-folded. Put the initializer into a // COMDAT group associated with the global, so the initializers get folded // too. AddGlobalCtor(Fn, 65535, COMDATKey); } else { I = DelayedCXXInitPosition.find(D); // Re-do lookup in case of re-hash. if (I == DelayedCXXInitPosition.end()) { CXXGlobalInits.push_back(Fn); } else if (I->second != ~0U) { assert(I->second < CXXGlobalInits.size() && CXXGlobalInits[I->second] == nullptr); CXXGlobalInits[I->second] = Fn; } } // Remember that we already emitted the initializer for this global. DelayedCXXInitPosition[D] = ~0U; } void CodeGenModule::EmitCXXThreadLocalInitFunc() { getCXXABI().EmitThreadLocalInitFuncs( *this, CXXThreadLocals, CXXThreadLocalInits, CXXThreadLocalInitVars); CXXThreadLocalInits.clear(); CXXThreadLocalInitVars.clear(); CXXThreadLocals.clear(); } static SmallString<128> getTransformedFileName(llvm::Module &M) { SmallString<128> FileName = llvm::sys::path::filename(M.getName()); if (FileName.empty()) FileName = ""; for (size_t i = 0; i < FileName.size(); ++i) { // Replace everything that's not [a-zA-Z0-9._] with a _. This set happens // to be the set of C preprocessing numbers. if (!isPreprocessingNumberBody(FileName[i])) FileName[i] = '_'; } return FileName; } void CodeGenModule::EmitCXXGlobalInitFunc() { while (!CXXGlobalInits.empty() && !CXXGlobalInits.back()) CXXGlobalInits.pop_back(); if (CXXGlobalInits.empty() && PrioritizedCXXGlobalInits.empty()) return; llvm::FunctionType *FTy = llvm::FunctionType::get(VoidTy, false); const CGFunctionInfo &FI = getTypes().arrangeNullaryFunction(); const bool UseSinitAndSterm = getCXXABI().useSinitAndSterm(); // Create our global prioritized initialization function. if (!PrioritizedCXXGlobalInits.empty()) { assert(!UseSinitAndSterm && "Prioritized sinit and sterm functions are not" " supported yet."); SmallVector LocalCXXGlobalInits; llvm::array_pod_sort(PrioritizedCXXGlobalInits.begin(), PrioritizedCXXGlobalInits.end()); // Iterate over "chunks" of ctors with same priority and emit each chunk // into separate function. Note - everything is sorted first by priority, // second - by lex order, so we emit ctor functions in proper order. for (SmallVectorImpl::iterator I = PrioritizedCXXGlobalInits.begin(), E = PrioritizedCXXGlobalInits.end(); I != E; ) { SmallVectorImpl::iterator PrioE = std::upper_bound(I + 1, E, *I, GlobalInitPriorityCmp()); LocalCXXGlobalInits.clear(); unsigned Priority = I->first.priority; // Compute the function suffix from priority. Prepend with zeroes to make // sure the function names are also ordered as priorities. std::string PrioritySuffix = llvm::utostr(Priority); // Priority is always <= 65535 (enforced by sema). PrioritySuffix = std::string(6-PrioritySuffix.size(), '0')+PrioritySuffix; llvm::Function *Fn = CreateGlobalInitOrCleanUpFunction( FTy, "_GLOBAL__I_" + PrioritySuffix, FI); for (; I < PrioE; ++I) LocalCXXGlobalInits.push_back(I->second); CodeGenFunction(*this).GenerateCXXGlobalInitFunc(Fn, LocalCXXGlobalInits); AddGlobalCtor(Fn, Priority); } PrioritizedCXXGlobalInits.clear(); } if (UseSinitAndSterm && CXXGlobalInits.empty()) return; // Include the filename in the symbol name. Including "sub_" matches gcc // and makes sure these symbols appear lexicographically behind the symbols // with priority emitted above. llvm::Function *Fn = CreateGlobalInitOrCleanUpFunction( FTy, llvm::Twine("_GLOBAL__sub_I_", getTransformedFileName(getModule())), FI); CodeGenFunction(*this).GenerateCXXGlobalInitFunc(Fn, CXXGlobalInits); AddGlobalCtor(Fn); // In OpenCL global init functions must be converted to kernels in order to // be able to launch them from the host. // FIXME: Some more work might be needed to handle destructors correctly. // Current initialization function makes use of function pointers callbacks. // We can't support function pointers especially between host and device. // However it seems global destruction has little meaning without any // dynamic resource allocation on the device and program scope variables are // destroyed by the runtime when program is released. if (getLangOpts().OpenCL) { GenOpenCLArgMetadata(Fn); Fn->setCallingConv(llvm::CallingConv::SPIR_KERNEL); } if (getLangOpts().HIP) { Fn->setCallingConv(llvm::CallingConv::AMDGPU_KERNEL); Fn->addFnAttr("device-init"); } CXXGlobalInits.clear(); } void CodeGenModule::EmitCXXGlobalCleanUpFunc() { if (CXXGlobalDtorsOrStermFinalizers.empty()) return; llvm::FunctionType *FTy = llvm::FunctionType::get(VoidTy, false); const CGFunctionInfo &FI = getTypes().arrangeNullaryFunction(); // Create our global cleanup function. llvm::Function *Fn = CreateGlobalInitOrCleanUpFunction(FTy, "_GLOBAL__D_a", FI); CodeGenFunction(*this).GenerateCXXGlobalCleanUpFunc( Fn, CXXGlobalDtorsOrStermFinalizers); AddGlobalDtor(Fn); CXXGlobalDtorsOrStermFinalizers.clear(); } /// Emit the code necessary to initialize the given global variable. void CodeGenFunction::GenerateCXXGlobalVarDeclInitFunc(llvm::Function *Fn, const VarDecl *D, llvm::GlobalVariable *Addr, bool PerformInit) { // Check if we need to emit debug info for variable initializer. if (D->hasAttr()) DebugInfo = nullptr; // disable debug info indefinitely for this function CurEHLocation = D->getBeginLoc(); StartFunction(GlobalDecl(D, DynamicInitKind::Initializer), getContext().VoidTy, Fn, getTypes().arrangeNullaryFunction(), FunctionArgList(), D->getLocation(), D->getInit()->getExprLoc()); // Use guarded initialization if the global variable is weak. This // occurs for, e.g., instantiated static data members and // definitions explicitly marked weak. // // Also use guarded initialization for a variable with dynamic TLS and // unordered initialization. (If the initialization is ordered, the ABI // layer will guard the whole-TU initialization for us.) if (Addr->hasWeakLinkage() || Addr->hasLinkOnceLinkage() || (D->getTLSKind() == VarDecl::TLS_Dynamic && isTemplateInstantiation(D->getTemplateSpecializationKind()))) { EmitCXXGuardedInit(*D, Addr, PerformInit); } else { EmitCXXGlobalVarDeclInit(*D, Addr, PerformInit); } FinishFunction(); } void CodeGenFunction::GenerateCXXGlobalInitFunc(llvm::Function *Fn, ArrayRef Decls, ConstantAddress Guard) { { auto NL = ApplyDebugLocation::CreateEmpty(*this); StartFunction(GlobalDecl(), getContext().VoidTy, Fn, getTypes().arrangeNullaryFunction(), FunctionArgList()); // Emit an artificial location for this function. auto AL = ApplyDebugLocation::CreateArtificial(*this); llvm::BasicBlock *ExitBlock = nullptr; if (Guard.isValid()) { // If we have a guard variable, check whether we've already performed // these initializations. This happens for TLS initialization functions. llvm::Value *GuardVal = Builder.CreateLoad(Guard); llvm::Value *Uninit = Builder.CreateIsNull(GuardVal, "guard.uninitialized"); llvm::BasicBlock *InitBlock = createBasicBlock("init"); ExitBlock = createBasicBlock("exit"); EmitCXXGuardedInitBranch(Uninit, InitBlock, ExitBlock, GuardKind::TlsGuard, nullptr); EmitBlock(InitBlock); // Mark as initialized before initializing anything else. If the // initializers use previously-initialized thread_local vars, that's // probably supposed to be OK, but the standard doesn't say. Builder.CreateStore(llvm::ConstantInt::get(GuardVal->getType(),1), Guard); // The guard variable can't ever change again. EmitInvariantStart( Guard.getPointer(), CharUnits::fromQuantity( CGM.getDataLayout().getTypeAllocSize(GuardVal->getType()))); } RunCleanupsScope Scope(*this); // When building in Objective-C++ ARC mode, create an autorelease pool // around the global initializers. if (getLangOpts().ObjCAutoRefCount && getLangOpts().CPlusPlus) { llvm::Value *token = EmitObjCAutoreleasePoolPush(); EmitObjCAutoreleasePoolCleanup(token); } for (unsigned i = 0, e = Decls.size(); i != e; ++i) if (Decls[i]) EmitRuntimeCall(Decls[i]); Scope.ForceCleanup(); if (ExitBlock) { Builder.CreateBr(ExitBlock); EmitBlock(ExitBlock); } } FinishFunction(); } void CodeGenFunction::GenerateCXXGlobalCleanUpFunc( llvm::Function *Fn, const std::vector> &DtorsOrStermFinalizers) { { auto NL = ApplyDebugLocation::CreateEmpty(*this); StartFunction(GlobalDecl(), getContext().VoidTy, Fn, getTypes().arrangeNullaryFunction(), FunctionArgList()); // Emit an artificial location for this function. auto AL = ApplyDebugLocation::CreateArtificial(*this); // Emit the cleanups, in reverse order from construction. for (unsigned i = 0, e = DtorsOrStermFinalizers.size(); i != e; ++i) { llvm::FunctionType *CalleeTy; llvm::Value *Callee; llvm::Constant *Arg; std::tie(CalleeTy, Callee, Arg) = DtorsOrStermFinalizers[e - i - 1]; llvm::CallInst *CI = nullptr; if (Arg == nullptr) { assert( CGM.getCXXABI().useSinitAndSterm() && "Arg could not be nullptr unless using sinit and sterm functions."); CI = Builder.CreateCall(CalleeTy, Callee); } else CI = Builder.CreateCall(CalleeTy, Callee, Arg); // Make sure the call and the callee agree on calling convention. if (llvm::Function *F = dyn_cast(Callee)) CI->setCallingConv(F->getCallingConv()); } } FinishFunction(); } /// generateDestroyHelper - Generates a helper function which, when /// invoked, destroys the given object. The address of the object /// should be in global memory. llvm::Function *CodeGenFunction::generateDestroyHelper( Address addr, QualType type, Destroyer *destroyer, bool useEHCleanupForArray, const VarDecl *VD) { FunctionArgList args; ImplicitParamDecl Dst(getContext(), getContext().VoidPtrTy, ImplicitParamDecl::Other); args.push_back(&Dst); const CGFunctionInfo &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration(getContext().VoidTy, args); llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI); llvm::Function *fn = CGM.CreateGlobalInitOrCleanUpFunction( FTy, "__cxx_global_array_dtor", FI, VD->getLocation()); CurEHLocation = VD->getBeginLoc(); StartFunction(VD, getContext().VoidTy, fn, FI, args); emitDestroy(addr, type, destroyer, useEHCleanupForArray); FinishFunction(); return fn; } diff --git a/clang/lib/CodeGen/CodeGenModule.cpp b/clang/lib/CodeGen/CodeGenModule.cpp index 6a77f6b040a1..c3457865c0b0 100644 --- a/clang/lib/CodeGen/CodeGenModule.cpp +++ b/clang/lib/CodeGen/CodeGenModule.cpp @@ -1,6117 +1,6134 @@ //===--- CodeGenModule.cpp - Emit LLVM Code from ASTs for a Module --------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // // This coordinates the per-module state used while generating code. // //===----------------------------------------------------------------------===// #include "CodeGenModule.h" #include "CGBlocks.h" #include "CGCUDARuntime.h" #include "CGCXXABI.h" #include "CGCall.h" #include "CGDebugInfo.h" #include "CGObjCRuntime.h" #include "CGOpenCLRuntime.h" #include "CGOpenMPRuntime.h" #include "CGOpenMPRuntimeAMDGCN.h" #include "CGOpenMPRuntimeNVPTX.h" #include "CodeGenFunction.h" #include "CodeGenPGO.h" #include "ConstantEmitter.h" #include "CoverageMappingGen.h" #include "TargetInfo.h" #include "clang/AST/ASTContext.h" #include "clang/AST/CharUnits.h" #include "clang/AST/DeclCXX.h" #include "clang/AST/DeclObjC.h" #include "clang/AST/DeclTemplate.h" #include "clang/AST/Mangle.h" #include "clang/AST/RecordLayout.h" #include "clang/AST/RecursiveASTVisitor.h" #include "clang/AST/StmtVisitor.h" #include "clang/Basic/Builtins.h" #include "clang/Basic/CharInfo.h" #include "clang/Basic/CodeGenOptions.h" #include "clang/Basic/Diagnostic.h" #include "clang/Basic/FileManager.h" #include "clang/Basic/Module.h" #include "clang/Basic/SourceManager.h" #include "clang/Basic/TargetInfo.h" #include "clang/Basic/Version.h" #include "clang/CodeGen/ConstantInitBuilder.h" #include "clang/Frontend/FrontendDiagnostic.h" #include "llvm/ADT/StringSwitch.h" #include "llvm/ADT/Triple.h" #include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/Frontend/OpenMP/OMPIRBuilder.h" #include "llvm/IR/CallingConv.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/Intrinsics.h" #include "llvm/IR/LLVMContext.h" #include "llvm/IR/Module.h" #include "llvm/IR/ProfileSummary.h" #include "llvm/ProfileData/InstrProfReader.h" #include "llvm/Support/CodeGen.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/ConvertUTF.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/MD5.h" #include "llvm/Support/TimeProfiler.h" using namespace clang; using namespace CodeGen; static llvm::cl::opt LimitedCoverage( "limited-coverage-experimental", llvm::cl::ZeroOrMore, llvm::cl::Hidden, llvm::cl::desc("Emit limited coverage mapping information (experimental)"), llvm::cl::init(false)); static const char AnnotationSection[] = "llvm.metadata"; static CGCXXABI *createCXXABI(CodeGenModule &CGM) { switch (CGM.getTarget().getCXXABI().getKind()) { case TargetCXXABI::Fuchsia: case TargetCXXABI::GenericAArch64: case TargetCXXABI::GenericARM: case TargetCXXABI::iOS: case TargetCXXABI::iOS64: case TargetCXXABI::WatchOS: case TargetCXXABI::GenericMIPS: case TargetCXXABI::GenericItanium: case TargetCXXABI::WebAssembly: case TargetCXXABI::XL: return CreateItaniumCXXABI(CGM); case TargetCXXABI::Microsoft: return CreateMicrosoftCXXABI(CGM); } llvm_unreachable("invalid C++ ABI kind"); } CodeGenModule::CodeGenModule(ASTContext &C, const HeaderSearchOptions &HSO, const PreprocessorOptions &PPO, const CodeGenOptions &CGO, llvm::Module &M, DiagnosticsEngine &diags, CoverageSourceInfo *CoverageInfo) : Context(C), LangOpts(C.getLangOpts()), HeaderSearchOpts(HSO), PreprocessorOpts(PPO), CodeGenOpts(CGO), TheModule(M), Diags(diags), Target(C.getTargetInfo()), ABI(createCXXABI(*this)), VMContext(M.getContext()), Types(*this), VTables(*this), SanitizerMD(new SanitizerMetadata(*this)) { // Initialize the type cache. llvm::LLVMContext &LLVMContext = M.getContext(); VoidTy = llvm::Type::getVoidTy(LLVMContext); Int8Ty = llvm::Type::getInt8Ty(LLVMContext); Int16Ty = llvm::Type::getInt16Ty(LLVMContext); Int32Ty = llvm::Type::getInt32Ty(LLVMContext); Int64Ty = llvm::Type::getInt64Ty(LLVMContext); HalfTy = llvm::Type::getHalfTy(LLVMContext); BFloatTy = llvm::Type::getBFloatTy(LLVMContext); FloatTy = llvm::Type::getFloatTy(LLVMContext); DoubleTy = llvm::Type::getDoubleTy(LLVMContext); PointerWidthInBits = C.getTargetInfo().getPointerWidth(0); PointerAlignInBytes = C.toCharUnitsFromBits(C.getTargetInfo().getPointerAlign(0)).getQuantity(); SizeSizeInBytes = C.toCharUnitsFromBits(C.getTargetInfo().getMaxPointerWidth()).getQuantity(); IntAlignInBytes = C.toCharUnitsFromBits(C.getTargetInfo().getIntAlign()).getQuantity(); IntTy = llvm::IntegerType::get(LLVMContext, C.getTargetInfo().getIntWidth()); IntPtrTy = llvm::IntegerType::get(LLVMContext, C.getTargetInfo().getMaxPointerWidth()); Int8PtrTy = Int8Ty->getPointerTo(0); Int8PtrPtrTy = Int8PtrTy->getPointerTo(0); AllocaInt8PtrTy = Int8Ty->getPointerTo( M.getDataLayout().getAllocaAddrSpace()); ASTAllocaAddressSpace = getTargetCodeGenInfo().getASTAllocaAddressSpace(); RuntimeCC = getTargetCodeGenInfo().getABIInfo().getRuntimeCC(); if (LangOpts.ObjC) createObjCRuntime(); if (LangOpts.OpenCL) createOpenCLRuntime(); if (LangOpts.OpenMP) createOpenMPRuntime(); if (LangOpts.CUDA) createCUDARuntime(); // Enable TBAA unless it's suppressed. ThreadSanitizer needs TBAA even at O0. if (LangOpts.Sanitize.has(SanitizerKind::Thread) || (!CodeGenOpts.RelaxedAliasing && CodeGenOpts.OptimizationLevel > 0)) TBAA.reset(new CodeGenTBAA(Context, TheModule, CodeGenOpts, getLangOpts(), getCXXABI().getMangleContext())); // If debug info or coverage generation is enabled, create the CGDebugInfo // object. if (CodeGenOpts.getDebugInfo() != codegenoptions::NoDebugInfo || CodeGenOpts.EmitGcovArcs || CodeGenOpts.EmitGcovNotes) DebugInfo.reset(new CGDebugInfo(*this)); Block.GlobalUniqueCount = 0; if (C.getLangOpts().ObjC) ObjCData.reset(new ObjCEntrypoints()); if (CodeGenOpts.hasProfileClangUse()) { auto ReaderOrErr = llvm::IndexedInstrProfReader::create( CodeGenOpts.ProfileInstrumentUsePath, CodeGenOpts.ProfileRemappingFile); if (auto E = ReaderOrErr.takeError()) { unsigned DiagID = Diags.getCustomDiagID(DiagnosticsEngine::Error, "Could not read profile %0: %1"); llvm::handleAllErrors(std::move(E), [&](const llvm::ErrorInfoBase &EI) { getDiags().Report(DiagID) << CodeGenOpts.ProfileInstrumentUsePath << EI.message(); }); } else PGOReader = std::move(ReaderOrErr.get()); } // If coverage mapping generation is enabled, create the // CoverageMappingModuleGen object. if (CodeGenOpts.CoverageMapping) CoverageMapping.reset(new CoverageMappingModuleGen(*this, *CoverageInfo)); } CodeGenModule::~CodeGenModule() {} void CodeGenModule::createObjCRuntime() { // This is just isGNUFamily(), but we want to force implementors of // new ABIs to decide how best to do this. switch (LangOpts.ObjCRuntime.getKind()) { case ObjCRuntime::GNUstep: case ObjCRuntime::GCC: case ObjCRuntime::ObjFW: ObjCRuntime.reset(CreateGNUObjCRuntime(*this)); return; case ObjCRuntime::FragileMacOSX: case ObjCRuntime::MacOSX: case ObjCRuntime::iOS: case ObjCRuntime::WatchOS: ObjCRuntime.reset(CreateMacObjCRuntime(*this)); return; } llvm_unreachable("bad runtime kind"); } void CodeGenModule::createOpenCLRuntime() { OpenCLRuntime.reset(new CGOpenCLRuntime(*this)); } void CodeGenModule::createOpenMPRuntime() { // Select a specialized code generation class based on the target, if any. // If it does not exist use the default implementation. switch (getTriple().getArch()) { case llvm::Triple::nvptx: case llvm::Triple::nvptx64: assert(getLangOpts().OpenMPIsDevice && "OpenMP NVPTX is only prepared to deal with device code."); OpenMPRuntime.reset(new CGOpenMPRuntimeNVPTX(*this)); break; case llvm::Triple::amdgcn: assert(getLangOpts().OpenMPIsDevice && "OpenMP AMDGCN is only prepared to deal with device code."); OpenMPRuntime.reset(new CGOpenMPRuntimeAMDGCN(*this)); break; default: if (LangOpts.OpenMPSimd) OpenMPRuntime.reset(new CGOpenMPSIMDRuntime(*this)); else OpenMPRuntime.reset(new CGOpenMPRuntime(*this)); break; } } void CodeGenModule::createCUDARuntime() { CUDARuntime.reset(CreateNVCUDARuntime(*this)); } void CodeGenModule::addReplacement(StringRef Name, llvm::Constant *C) { Replacements[Name] = C; } void CodeGenModule::applyReplacements() { for (auto &I : Replacements) { StringRef MangledName = I.first(); llvm::Constant *Replacement = I.second; llvm::GlobalValue *Entry = GetGlobalValue(MangledName); if (!Entry) continue; auto *OldF = cast(Entry); auto *NewF = dyn_cast(Replacement); if (!NewF) { if (auto *Alias = dyn_cast(Replacement)) { NewF = dyn_cast(Alias->getAliasee()); } else { auto *CE = cast(Replacement); assert(CE->getOpcode() == llvm::Instruction::BitCast || CE->getOpcode() == llvm::Instruction::GetElementPtr); NewF = dyn_cast(CE->getOperand(0)); } } // Replace old with new, but keep the old order. OldF->replaceAllUsesWith(Replacement); if (NewF) { NewF->removeFromParent(); OldF->getParent()->getFunctionList().insertAfter(OldF->getIterator(), NewF); } OldF->eraseFromParent(); } } void CodeGenModule::addGlobalValReplacement(llvm::GlobalValue *GV, llvm::Constant *C) { GlobalValReplacements.push_back(std::make_pair(GV, C)); } void CodeGenModule::applyGlobalValReplacements() { for (auto &I : GlobalValReplacements) { llvm::GlobalValue *GV = I.first; llvm::Constant *C = I.second; GV->replaceAllUsesWith(C); GV->eraseFromParent(); } } // This is only used in aliases that we created and we know they have a // linear structure. static const llvm::GlobalObject *getAliasedGlobal( const llvm::GlobalIndirectSymbol &GIS) { llvm::SmallPtrSet Visited; const llvm::Constant *C = &GIS; for (;;) { C = C->stripPointerCasts(); if (auto *GO = dyn_cast(C)) return GO; // stripPointerCasts will not walk over weak aliases. auto *GIS2 = dyn_cast(C); if (!GIS2) return nullptr; if (!Visited.insert(GIS2).second) return nullptr; C = GIS2->getIndirectSymbol(); } } void CodeGenModule::checkAliases() { // Check if the constructed aliases are well formed. It is really unfortunate // that we have to do this in CodeGen, but we only construct mangled names // and aliases during codegen. bool Error = false; DiagnosticsEngine &Diags = getDiags(); for (const GlobalDecl &GD : Aliases) { const auto *D = cast(GD.getDecl()); SourceLocation Location; bool IsIFunc = D->hasAttr(); if (const Attr *A = D->getDefiningAttr()) Location = A->getLocation(); else llvm_unreachable("Not an alias or ifunc?"); StringRef MangledName = getMangledName(GD); llvm::GlobalValue *Entry = GetGlobalValue(MangledName); auto *Alias = cast(Entry); const llvm::GlobalValue *GV = getAliasedGlobal(*Alias); if (!GV) { Error = true; Diags.Report(Location, diag::err_cyclic_alias) << IsIFunc; } else if (GV->isDeclaration()) { Error = true; Diags.Report(Location, diag::err_alias_to_undefined) << IsIFunc << IsIFunc; } else if (IsIFunc) { // Check resolver function type. llvm::FunctionType *FTy = dyn_cast( GV->getType()->getPointerElementType()); assert(FTy); if (!FTy->getReturnType()->isPointerTy()) Diags.Report(Location, diag::err_ifunc_resolver_return); } llvm::Constant *Aliasee = Alias->getIndirectSymbol(); llvm::GlobalValue *AliaseeGV; if (auto CE = dyn_cast(Aliasee)) AliaseeGV = cast(CE->getOperand(0)); else AliaseeGV = cast(Aliasee); if (const SectionAttr *SA = D->getAttr()) { StringRef AliasSection = SA->getName(); if (AliasSection != AliaseeGV->getSection()) Diags.Report(SA->getLocation(), diag::warn_alias_with_section) << AliasSection << IsIFunc << IsIFunc; } // We have to handle alias to weak aliases in here. LLVM itself disallows // this since the object semantics would not match the IL one. For // compatibility with gcc we implement it by just pointing the alias // to its aliasee's aliasee. We also warn, since the user is probably // expecting the link to be weak. if (auto GA = dyn_cast(AliaseeGV)) { if (GA->isInterposable()) { Diags.Report(Location, diag::warn_alias_to_weak_alias) << GV->getName() << GA->getName() << IsIFunc; Aliasee = llvm::ConstantExpr::getPointerBitCastOrAddrSpaceCast( GA->getIndirectSymbol(), Alias->getType()); Alias->setIndirectSymbol(Aliasee); } } } if (!Error) return; for (const GlobalDecl &GD : Aliases) { StringRef MangledName = getMangledName(GD); llvm::GlobalValue *Entry = GetGlobalValue(MangledName); auto *Alias = dyn_cast(Entry); Alias->replaceAllUsesWith(llvm::UndefValue::get(Alias->getType())); Alias->eraseFromParent(); } } void CodeGenModule::clear() { DeferredDeclsToEmit.clear(); if (OpenMPRuntime) OpenMPRuntime->clear(); } void InstrProfStats::reportDiagnostics(DiagnosticsEngine &Diags, StringRef MainFile) { if (!hasDiagnostics()) return; if (VisitedInMainFile > 0 && VisitedInMainFile == MissingInMainFile) { if (MainFile.empty()) MainFile = ""; Diags.Report(diag::warn_profile_data_unprofiled) << MainFile; } else { if (Mismatched > 0) Diags.Report(diag::warn_profile_data_out_of_date) << Visited << Mismatched; if (Missing > 0) Diags.Report(diag::warn_profile_data_missing) << Visited << Missing; } } void CodeGenModule::Release() { EmitDeferred(); EmitVTablesOpportunistically(); applyGlobalValReplacements(); applyReplacements(); checkAliases(); emitMultiVersionFunctions(); EmitCXXGlobalInitFunc(); EmitCXXGlobalCleanUpFunc(); registerGlobalDtorsWithAtExit(); EmitCXXThreadLocalInitFunc(); if (ObjCRuntime) if (llvm::Function *ObjCInitFunction = ObjCRuntime->ModuleInitFunction()) AddGlobalCtor(ObjCInitFunction); if (Context.getLangOpts().CUDA && !Context.getLangOpts().CUDAIsDevice && CUDARuntime) { if (llvm::Function *CudaCtorFunction = CUDARuntime->makeModuleCtorFunction()) AddGlobalCtor(CudaCtorFunction); } if (OpenMPRuntime) { if (llvm::Function *OpenMPRequiresDirectiveRegFun = OpenMPRuntime->emitRequiresDirectiveRegFun()) { AddGlobalCtor(OpenMPRequiresDirectiveRegFun, 0); } OpenMPRuntime->createOffloadEntriesAndInfoMetadata(); OpenMPRuntime->clear(); } if (PGOReader) { getModule().setProfileSummary( PGOReader->getSummary(/* UseCS */ false).getMD(VMContext), llvm::ProfileSummary::PSK_Instr); if (PGOStats.hasDiagnostics()) PGOStats.reportDiagnostics(getDiags(), getCodeGenOpts().MainFileName); } EmitCtorList(GlobalCtors, "llvm.global_ctors"); EmitCtorList(GlobalDtors, "llvm.global_dtors"); EmitGlobalAnnotations(); EmitStaticExternCAliases(); EmitDeferredUnusedCoverageMappings(); if (CoverageMapping) CoverageMapping->emit(); if (CodeGenOpts.SanitizeCfiCrossDso) { CodeGenFunction(*this).EmitCfiCheckFail(); CodeGenFunction(*this).EmitCfiCheckStub(); } emitAtAvailableLinkGuard(); if (Context.getTargetInfo().getTriple().isWasm() && !Context.getTargetInfo().getTriple().isOSEmscripten()) { EmitMainVoidAlias(); } emitLLVMUsed(); if (SanStats) SanStats->finish(); if (CodeGenOpts.Autolink && (Context.getLangOpts().Modules || !LinkerOptionsMetadata.empty())) { EmitModuleLinkOptions(); } // On ELF we pass the dependent library specifiers directly to the linker // without manipulating them. This is in contrast to other platforms where // they are mapped to a specific linker option by the compiler. This // difference is a result of the greater variety of ELF linkers and the fact // that ELF linkers tend to handle libraries in a more complicated fashion // than on other platforms. This forces us to defer handling the dependent // libs to the linker. // // CUDA/HIP device and host libraries are different. Currently there is no // way to differentiate dependent libraries for host or device. Existing // usage of #pragma comment(lib, *) is intended for host libraries on // Windows. Therefore emit llvm.dependent-libraries only for host. if (!ELFDependentLibraries.empty() && !Context.getLangOpts().CUDAIsDevice) { auto *NMD = getModule().getOrInsertNamedMetadata("llvm.dependent-libraries"); for (auto *MD : ELFDependentLibraries) NMD->addOperand(MD); } // Record mregparm value now so it is visible through rest of codegen. if (Context.getTargetInfo().getTriple().getArch() == llvm::Triple::x86) getModule().addModuleFlag(llvm::Module::Error, "NumRegisterParameters", CodeGenOpts.NumRegisterParameters); if (CodeGenOpts.DwarfVersion) { getModule().addModuleFlag(llvm::Module::Max, "Dwarf Version", CodeGenOpts.DwarfVersion); } if (Context.getLangOpts().SemanticInterposition) // Require various optimization to respect semantic interposition. getModule().setSemanticInterposition(1); else if (Context.getLangOpts().ExplicitNoSemanticInterposition) // Allow dso_local on applicable targets. getModule().setSemanticInterposition(0); if (CodeGenOpts.EmitCodeView) { // Indicate that we want CodeView in the metadata. getModule().addModuleFlag(llvm::Module::Warning, "CodeView", 1); } if (CodeGenOpts.CodeViewGHash) { getModule().addModuleFlag(llvm::Module::Warning, "CodeViewGHash", 1); } if (CodeGenOpts.ControlFlowGuard) { // Function ID tables and checks for Control Flow Guard (cfguard=2). getModule().addModuleFlag(llvm::Module::Warning, "cfguard", 2); } else if (CodeGenOpts.ControlFlowGuardNoChecks) { // Function ID tables for Control Flow Guard (cfguard=1). getModule().addModuleFlag(llvm::Module::Warning, "cfguard", 1); } if (CodeGenOpts.OptimizationLevel > 0 && CodeGenOpts.StrictVTablePointers) { // We don't support LTO with 2 with different StrictVTablePointers // FIXME: we could support it by stripping all the information introduced // by StrictVTablePointers. getModule().addModuleFlag(llvm::Module::Error, "StrictVTablePointers",1); llvm::Metadata *Ops[2] = { llvm::MDString::get(VMContext, "StrictVTablePointers"), llvm::ConstantAsMetadata::get(llvm::ConstantInt::get( llvm::Type::getInt32Ty(VMContext), 1))}; getModule().addModuleFlag(llvm::Module::Require, "StrictVTablePointersRequirement", llvm::MDNode::get(VMContext, Ops)); } if (getModuleDebugInfo()) // We support a single version in the linked module. The LLVM // parser will drop debug info with a different version number // (and warn about it, too). getModule().addModuleFlag(llvm::Module::Warning, "Debug Info Version", llvm::DEBUG_METADATA_VERSION); // We need to record the widths of enums and wchar_t, so that we can generate // the correct build attributes in the ARM backend. wchar_size is also used by // TargetLibraryInfo. uint64_t WCharWidth = Context.getTypeSizeInChars(Context.getWideCharType()).getQuantity(); getModule().addModuleFlag(llvm::Module::Error, "wchar_size", WCharWidth); llvm::Triple::ArchType Arch = Context.getTargetInfo().getTriple().getArch(); if ( Arch == llvm::Triple::arm || Arch == llvm::Triple::armeb || Arch == llvm::Triple::thumb || Arch == llvm::Triple::thumbeb) { // The minimum width of an enum in bytes uint64_t EnumWidth = Context.getLangOpts().ShortEnums ? 1 : 4; getModule().addModuleFlag(llvm::Module::Error, "min_enum_size", EnumWidth); } if (Arch == llvm::Triple::riscv32 || Arch == llvm::Triple::riscv64) { StringRef ABIStr = Target.getABI(); llvm::LLVMContext &Ctx = TheModule.getContext(); getModule().addModuleFlag(llvm::Module::Error, "target-abi", llvm::MDString::get(Ctx, ABIStr)); } if (CodeGenOpts.SanitizeCfiCrossDso) { // Indicate that we want cross-DSO control flow integrity checks. getModule().addModuleFlag(llvm::Module::Override, "Cross-DSO CFI", 1); } if (CodeGenOpts.WholeProgramVTables) { // Indicate whether VFE was enabled for this module, so that the // vcall_visibility metadata added under whole program vtables is handled // appropriately in the optimizer. getModule().addModuleFlag(llvm::Module::Error, "Virtual Function Elim", CodeGenOpts.VirtualFunctionElimination); } if (LangOpts.Sanitize.has(SanitizerKind::CFIICall)) { getModule().addModuleFlag(llvm::Module::Override, "CFI Canonical Jump Tables", CodeGenOpts.SanitizeCfiCanonicalJumpTables); } if (CodeGenOpts.CFProtectionReturn && Target.checkCFProtectionReturnSupported(getDiags())) { // Indicate that we want to instrument return control flow protection. getModule().addModuleFlag(llvm::Module::Override, "cf-protection-return", 1); } if (CodeGenOpts.CFProtectionBranch && Target.checkCFProtectionBranchSupported(getDiags())) { // Indicate that we want to instrument branch control flow protection. getModule().addModuleFlag(llvm::Module::Override, "cf-protection-branch", 1); } + if (Arch == llvm::Triple::aarch64 || Arch == llvm::Triple::aarch64_32 || + Arch == llvm::Triple::aarch64_be) { + getModule().addModuleFlag(llvm::Module::Error, + "branch-target-enforcement", + LangOpts.BranchTargetEnforcement); + + getModule().addModuleFlag(llvm::Module::Error, "sign-return-address", + LangOpts.hasSignReturnAddress()); + + getModule().addModuleFlag(llvm::Module::Error, "sign-return-address-all", + LangOpts.isSignReturnAddressScopeAll()); + + getModule().addModuleFlag(llvm::Module::Error, + "sign-return-address-with-bkey", + !LangOpts.isSignReturnAddressWithAKey()); + } + if (LangOpts.CUDAIsDevice && getTriple().isNVPTX()) { // Indicate whether __nvvm_reflect should be configured to flush denormal // floating point values to 0. (This corresponds to its "__CUDA_FTZ" // property.) getModule().addModuleFlag(llvm::Module::Override, "nvvm-reflect-ftz", CodeGenOpts.FP32DenormalMode.Output != llvm::DenormalMode::IEEE); } // Emit OpenCL specific module metadata: OpenCL/SPIR version. if (LangOpts.OpenCL) { EmitOpenCLMetadata(); // Emit SPIR version. if (getTriple().isSPIR()) { // SPIR v2.0 s2.12 - The SPIR version used by the module is stored in the // opencl.spir.version named metadata. // C++ is backwards compatible with OpenCL v2.0. auto Version = LangOpts.OpenCLCPlusPlus ? 200 : LangOpts.OpenCLVersion; llvm::Metadata *SPIRVerElts[] = { llvm::ConstantAsMetadata::get(llvm::ConstantInt::get( Int32Ty, Version / 100)), llvm::ConstantAsMetadata::get(llvm::ConstantInt::get( Int32Ty, (Version / 100 > 1) ? 0 : 2))}; llvm::NamedMDNode *SPIRVerMD = TheModule.getOrInsertNamedMetadata("opencl.spir.version"); llvm::LLVMContext &Ctx = TheModule.getContext(); SPIRVerMD->addOperand(llvm::MDNode::get(Ctx, SPIRVerElts)); } } if (uint32_t PLevel = Context.getLangOpts().PICLevel) { assert(PLevel < 3 && "Invalid PIC Level"); getModule().setPICLevel(static_cast(PLevel)); if (Context.getLangOpts().PIE) getModule().setPIELevel(static_cast(PLevel)); } if (getCodeGenOpts().CodeModel.size() > 0) { unsigned CM = llvm::StringSwitch(getCodeGenOpts().CodeModel) .Case("tiny", llvm::CodeModel::Tiny) .Case("small", llvm::CodeModel::Small) .Case("kernel", llvm::CodeModel::Kernel) .Case("medium", llvm::CodeModel::Medium) .Case("large", llvm::CodeModel::Large) .Default(~0u); if (CM != ~0u) { llvm::CodeModel::Model codeModel = static_cast(CM); getModule().setCodeModel(codeModel); } } if (CodeGenOpts.NoPLT) getModule().setRtLibUseGOT(); SimplifyPersonality(); if (getCodeGenOpts().EmitDeclMetadata) EmitDeclMetadata(); if (getCodeGenOpts().EmitGcovArcs || getCodeGenOpts().EmitGcovNotes) EmitCoverageFile(); if (CGDebugInfo *DI = getModuleDebugInfo()) DI->finalize(); if (getCodeGenOpts().EmitVersionIdentMetadata) EmitVersionIdentMetadata(); if (!getCodeGenOpts().RecordCommandLine.empty()) EmitCommandLineMetadata(); getTargetCodeGenInfo().emitTargetMetadata(*this, MangledDeclNames); EmitBackendOptionsMetadata(getCodeGenOpts()); } void CodeGenModule::EmitOpenCLMetadata() { // SPIR v2.0 s2.13 - The OpenCL version used by the module is stored in the // opencl.ocl.version named metadata node. // C++ is backwards compatible with OpenCL v2.0. // FIXME: We might need to add CXX version at some point too? auto Version = LangOpts.OpenCLCPlusPlus ? 200 : LangOpts.OpenCLVersion; llvm::Metadata *OCLVerElts[] = { llvm::ConstantAsMetadata::get(llvm::ConstantInt::get( Int32Ty, Version / 100)), llvm::ConstantAsMetadata::get(llvm::ConstantInt::get( Int32Ty, (Version % 100) / 10))}; llvm::NamedMDNode *OCLVerMD = TheModule.getOrInsertNamedMetadata("opencl.ocl.version"); llvm::LLVMContext &Ctx = TheModule.getContext(); OCLVerMD->addOperand(llvm::MDNode::get(Ctx, OCLVerElts)); } void CodeGenModule::EmitBackendOptionsMetadata( const CodeGenOptions CodeGenOpts) { switch (getTriple().getArch()) { default: break; case llvm::Triple::riscv32: case llvm::Triple::riscv64: getModule().addModuleFlag(llvm::Module::Error, "SmallDataLimit", CodeGenOpts.SmallDataLimit); break; } } void CodeGenModule::UpdateCompletedType(const TagDecl *TD) { // Make sure that this type is translated. Types.UpdateCompletedType(TD); } void CodeGenModule::RefreshTypeCacheForClass(const CXXRecordDecl *RD) { // Make sure that this type is translated. Types.RefreshTypeCacheForClass(RD); } llvm::MDNode *CodeGenModule::getTBAATypeInfo(QualType QTy) { if (!TBAA) return nullptr; return TBAA->getTypeInfo(QTy); } TBAAAccessInfo CodeGenModule::getTBAAAccessInfo(QualType AccessType) { if (!TBAA) return TBAAAccessInfo(); if (getLangOpts().CUDAIsDevice) { // As CUDA builtin surface/texture types are replaced, skip generating TBAA // access info. if (AccessType->isCUDADeviceBuiltinSurfaceType()) { if (getTargetCodeGenInfo().getCUDADeviceBuiltinSurfaceDeviceType() != nullptr) return TBAAAccessInfo(); } else if (AccessType->isCUDADeviceBuiltinTextureType()) { if (getTargetCodeGenInfo().getCUDADeviceBuiltinTextureDeviceType() != nullptr) return TBAAAccessInfo(); } } return TBAA->getAccessInfo(AccessType); } TBAAAccessInfo CodeGenModule::getTBAAVTablePtrAccessInfo(llvm::Type *VTablePtrType) { if (!TBAA) return TBAAAccessInfo(); return TBAA->getVTablePtrAccessInfo(VTablePtrType); } llvm::MDNode *CodeGenModule::getTBAAStructInfo(QualType QTy) { if (!TBAA) return nullptr; return TBAA->getTBAAStructInfo(QTy); } llvm::MDNode *CodeGenModule::getTBAABaseTypeInfo(QualType QTy) { if (!TBAA) return nullptr; return TBAA->getBaseTypeInfo(QTy); } llvm::MDNode *CodeGenModule::getTBAAAccessTagInfo(TBAAAccessInfo Info) { if (!TBAA) return nullptr; return TBAA->getAccessTagInfo(Info); } TBAAAccessInfo CodeGenModule::mergeTBAAInfoForCast(TBAAAccessInfo SourceInfo, TBAAAccessInfo TargetInfo) { if (!TBAA) return TBAAAccessInfo(); return TBAA->mergeTBAAInfoForCast(SourceInfo, TargetInfo); } TBAAAccessInfo CodeGenModule::mergeTBAAInfoForConditionalOperator(TBAAAccessInfo InfoA, TBAAAccessInfo InfoB) { if (!TBAA) return TBAAAccessInfo(); return TBAA->mergeTBAAInfoForConditionalOperator(InfoA, InfoB); } TBAAAccessInfo CodeGenModule::mergeTBAAInfoForMemoryTransfer(TBAAAccessInfo DestInfo, TBAAAccessInfo SrcInfo) { if (!TBAA) return TBAAAccessInfo(); return TBAA->mergeTBAAInfoForConditionalOperator(DestInfo, SrcInfo); } void CodeGenModule::DecorateInstructionWithTBAA(llvm::Instruction *Inst, TBAAAccessInfo TBAAInfo) { if (llvm::MDNode *Tag = getTBAAAccessTagInfo(TBAAInfo)) Inst->setMetadata(llvm::LLVMContext::MD_tbaa, Tag); } void CodeGenModule::DecorateInstructionWithInvariantGroup( llvm::Instruction *I, const CXXRecordDecl *RD) { I->setMetadata(llvm::LLVMContext::MD_invariant_group, llvm::MDNode::get(getLLVMContext(), {})); } void CodeGenModule::Error(SourceLocation loc, StringRef message) { unsigned diagID = getDiags().getCustomDiagID(DiagnosticsEngine::Error, "%0"); getDiags().Report(Context.getFullLoc(loc), diagID) << message; } /// ErrorUnsupported - Print out an error that codegen doesn't support the /// specified stmt yet. void CodeGenModule::ErrorUnsupported(const Stmt *S, const char *Type) { unsigned DiagID = getDiags().getCustomDiagID(DiagnosticsEngine::Error, "cannot compile this %0 yet"); std::string Msg = Type; getDiags().Report(Context.getFullLoc(S->getBeginLoc()), DiagID) << Msg << S->getSourceRange(); } /// ErrorUnsupported - Print out an error that codegen doesn't support the /// specified decl yet. void CodeGenModule::ErrorUnsupported(const Decl *D, const char *Type) { unsigned DiagID = getDiags().getCustomDiagID(DiagnosticsEngine::Error, "cannot compile this %0 yet"); std::string Msg = Type; getDiags().Report(Context.getFullLoc(D->getLocation()), DiagID) << Msg; } llvm::ConstantInt *CodeGenModule::getSize(CharUnits size) { return llvm::ConstantInt::get(SizeTy, size.getQuantity()); } void CodeGenModule::setGlobalVisibility(llvm::GlobalValue *GV, const NamedDecl *D) const { if (GV->hasDLLImportStorageClass()) return; // Internal definitions always have default visibility. if (GV->hasLocalLinkage()) { GV->setVisibility(llvm::GlobalValue::DefaultVisibility); return; } if (!D) return; // Set visibility for definitions, and for declarations if requested globally // or set explicitly. LinkageInfo LV = D->getLinkageAndVisibility(); if (LV.isVisibilityExplicit() || getLangOpts().SetVisibilityForExternDecls || !GV->isDeclarationForLinker()) GV->setVisibility(GetLLVMVisibility(LV.getVisibility())); } static bool shouldAssumeDSOLocal(const CodeGenModule &CGM, llvm::GlobalValue *GV) { if (GV->hasLocalLinkage()) return true; if (!GV->hasDefaultVisibility() && !GV->hasExternalWeakLinkage()) return true; // DLLImport explicitly marks the GV as external. if (GV->hasDLLImportStorageClass()) return false; const llvm::Triple &TT = CGM.getTriple(); if (TT.isWindowsGNUEnvironment()) { // In MinGW, variables without DLLImport can still be automatically // imported from a DLL by the linker; don't mark variables that // potentially could come from another DLL as DSO local. if (GV->isDeclarationForLinker() && isa(GV) && !GV->isThreadLocal()) return false; } // On COFF, don't mark 'extern_weak' symbols as DSO local. If these symbols // remain unresolved in the link, they can be resolved to zero, which is // outside the current DSO. if (TT.isOSBinFormatCOFF() && GV->hasExternalWeakLinkage()) return false; // Every other GV is local on COFF. // Make an exception for windows OS in the triple: Some firmware builds use // *-win32-macho triples. This (accidentally?) produced windows relocations // without GOT tables in older clang versions; Keep this behaviour. // FIXME: even thread local variables? if (TT.isOSBinFormatCOFF() || (TT.isOSWindows() && TT.isOSBinFormatMachO())) return true; // Only handle COFF and ELF for now. if (!TT.isOSBinFormatELF()) return false; // If this is not an executable, don't assume anything is local. const auto &CGOpts = CGM.getCodeGenOpts(); llvm::Reloc::Model RM = CGOpts.RelocationModel; const auto &LOpts = CGM.getLangOpts(); if (RM != llvm::Reloc::Static && !LOpts.PIE) return false; // A definition cannot be preempted from an executable. if (!GV->isDeclarationForLinker()) return true; // Most PIC code sequences that assume that a symbol is local cannot produce a // 0 if it turns out the symbol is undefined. While this is ABI and relocation // depended, it seems worth it to handle it here. if (RM == llvm::Reloc::PIC_ && GV->hasExternalWeakLinkage()) return false; // PPC has no copy relocations and cannot use a plt entry as a symbol address. llvm::Triple::ArchType Arch = TT.getArch(); if (Arch == llvm::Triple::ppc || Arch == llvm::Triple::ppc64 || Arch == llvm::Triple::ppc64le) return false; // If we can use copy relocations we can assume it is local. if (auto *Var = dyn_cast(GV)) if (!Var->isThreadLocal() && (RM == llvm::Reloc::Static || CGOpts.PIECopyRelocations)) return true; // If we can use a plt entry as the symbol address we can assume it // is local. // FIXME: This should work for PIE, but the gold linker doesn't support it. if (isa(GV) && !CGOpts.NoPLT && RM == llvm::Reloc::Static) return true; // Otherwise don't assume it is local. return false; } void CodeGenModule::setDSOLocal(llvm::GlobalValue *GV) const { GV->setDSOLocal(shouldAssumeDSOLocal(*this, GV)); } void CodeGenModule::setDLLImportDLLExport(llvm::GlobalValue *GV, GlobalDecl GD) const { const auto *D = dyn_cast(GD.getDecl()); // C++ destructors have a few C++ ABI specific special cases. if (const auto *Dtor = dyn_cast_or_null(D)) { getCXXABI().setCXXDestructorDLLStorage(GV, Dtor, GD.getDtorType()); return; } setDLLImportDLLExport(GV, D); } void CodeGenModule::setDLLImportDLLExport(llvm::GlobalValue *GV, const NamedDecl *D) const { if (D && D->isExternallyVisible()) { if (D->hasAttr()) GV->setDLLStorageClass(llvm::GlobalVariable::DLLImportStorageClass); else if (D->hasAttr() && !GV->isDeclarationForLinker()) GV->setDLLStorageClass(llvm::GlobalVariable::DLLExportStorageClass); } } void CodeGenModule::setGVProperties(llvm::GlobalValue *GV, GlobalDecl GD) const { setDLLImportDLLExport(GV, GD); setGVPropertiesAux(GV, dyn_cast(GD.getDecl())); } void CodeGenModule::setGVProperties(llvm::GlobalValue *GV, const NamedDecl *D) const { setDLLImportDLLExport(GV, D); setGVPropertiesAux(GV, D); } void CodeGenModule::setGVPropertiesAux(llvm::GlobalValue *GV, const NamedDecl *D) const { setGlobalVisibility(GV, D); setDSOLocal(GV); GV->setPartition(CodeGenOpts.SymbolPartition); } static llvm::GlobalVariable::ThreadLocalMode GetLLVMTLSModel(StringRef S) { return llvm::StringSwitch(S) .Case("global-dynamic", llvm::GlobalVariable::GeneralDynamicTLSModel) .Case("local-dynamic", llvm::GlobalVariable::LocalDynamicTLSModel) .Case("initial-exec", llvm::GlobalVariable::InitialExecTLSModel) .Case("local-exec", llvm::GlobalVariable::LocalExecTLSModel); } llvm::GlobalVariable::ThreadLocalMode CodeGenModule::GetDefaultLLVMTLSModel() const { switch (CodeGenOpts.getDefaultTLSModel()) { case CodeGenOptions::GeneralDynamicTLSModel: return llvm::GlobalVariable::GeneralDynamicTLSModel; case CodeGenOptions::LocalDynamicTLSModel: return llvm::GlobalVariable::LocalDynamicTLSModel; case CodeGenOptions::InitialExecTLSModel: return llvm::GlobalVariable::InitialExecTLSModel; case CodeGenOptions::LocalExecTLSModel: return llvm::GlobalVariable::LocalExecTLSModel; } llvm_unreachable("Invalid TLS model!"); } void CodeGenModule::setTLSMode(llvm::GlobalValue *GV, const VarDecl &D) const { assert(D.getTLSKind() && "setting TLS mode on non-TLS var!"); llvm::GlobalValue::ThreadLocalMode TLM; TLM = GetDefaultLLVMTLSModel(); // Override the TLS model if it is explicitly specified. if (const TLSModelAttr *Attr = D.getAttr()) { TLM = GetLLVMTLSModel(Attr->getModel()); } GV->setThreadLocalMode(TLM); } static std::string getCPUSpecificMangling(const CodeGenModule &CGM, StringRef Name) { const TargetInfo &Target = CGM.getTarget(); return (Twine('.') + Twine(Target.CPUSpecificManglingCharacter(Name))).str(); } static void AppendCPUSpecificCPUDispatchMangling(const CodeGenModule &CGM, const CPUSpecificAttr *Attr, unsigned CPUIndex, raw_ostream &Out) { // cpu_specific gets the current name, dispatch gets the resolver if IFunc is // supported. if (Attr) Out << getCPUSpecificMangling(CGM, Attr->getCPUName(CPUIndex)->getName()); else if (CGM.getTarget().supportsIFunc()) Out << ".resolver"; } static void AppendTargetMangling(const CodeGenModule &CGM, const TargetAttr *Attr, raw_ostream &Out) { if (Attr->isDefaultVersion()) return; Out << '.'; const TargetInfo &Target = CGM.getTarget(); ParsedTargetAttr Info = Attr->parse([&Target](StringRef LHS, StringRef RHS) { // Multiversioning doesn't allow "no-${feature}", so we can // only have "+" prefixes here. assert(LHS.startswith("+") && RHS.startswith("+") && "Features should always have a prefix."); return Target.multiVersionSortPriority(LHS.substr(1)) > Target.multiVersionSortPriority(RHS.substr(1)); }); bool IsFirst = true; if (!Info.Architecture.empty()) { IsFirst = false; Out << "arch_" << Info.Architecture; } for (StringRef Feat : Info.Features) { if (!IsFirst) Out << '_'; IsFirst = false; Out << Feat.substr(1); } } static std::string getMangledNameImpl(const CodeGenModule &CGM, GlobalDecl GD, const NamedDecl *ND, bool OmitMultiVersionMangling = false) { SmallString<256> Buffer; llvm::raw_svector_ostream Out(Buffer); MangleContext &MC = CGM.getCXXABI().getMangleContext(); if (MC.shouldMangleDeclName(ND)) MC.mangleName(GD.getWithDecl(ND), Out); else { IdentifierInfo *II = ND->getIdentifier(); assert(II && "Attempt to mangle unnamed decl."); const auto *FD = dyn_cast(ND); if (FD && FD->getType()->castAs()->getCallConv() == CC_X86RegCall) { Out << "__regcall3__" << II->getName(); } else if (FD && FD->hasAttr() && GD.getKernelReferenceKind() == KernelReferenceKind::Stub) { Out << "__device_stub__" << II->getName(); } else { Out << II->getName(); } } if (const auto *FD = dyn_cast(ND)) if (FD->isMultiVersion() && !OmitMultiVersionMangling) { switch (FD->getMultiVersionKind()) { case MultiVersionKind::CPUDispatch: case MultiVersionKind::CPUSpecific: AppendCPUSpecificCPUDispatchMangling(CGM, FD->getAttr(), GD.getMultiVersionIndex(), Out); break; case MultiVersionKind::Target: AppendTargetMangling(CGM, FD->getAttr(), Out); break; case MultiVersionKind::None: llvm_unreachable("None multiversion type isn't valid here"); } } return std::string(Out.str()); } void CodeGenModule::UpdateMultiVersionNames(GlobalDecl GD, const FunctionDecl *FD) { if (!FD->isMultiVersion()) return; // Get the name of what this would be without the 'target' attribute. This // allows us to lookup the version that was emitted when this wasn't a // multiversion function. std::string NonTargetName = getMangledNameImpl(*this, GD, FD, /*OmitMultiVersionMangling=*/true); GlobalDecl OtherGD; if (lookupRepresentativeDecl(NonTargetName, OtherGD)) { assert(OtherGD.getCanonicalDecl() .getDecl() ->getAsFunction() ->isMultiVersion() && "Other GD should now be a multiversioned function"); // OtherFD is the version of this function that was mangled BEFORE // becoming a MultiVersion function. It potentially needs to be updated. const FunctionDecl *OtherFD = OtherGD.getCanonicalDecl() .getDecl() ->getAsFunction() ->getMostRecentDecl(); std::string OtherName = getMangledNameImpl(*this, OtherGD, OtherFD); // This is so that if the initial version was already the 'default' // version, we don't try to update it. if (OtherName != NonTargetName) { // Remove instead of erase, since others may have stored the StringRef // to this. const auto ExistingRecord = Manglings.find(NonTargetName); if (ExistingRecord != std::end(Manglings)) Manglings.remove(&(*ExistingRecord)); auto Result = Manglings.insert(std::make_pair(OtherName, OtherGD)); MangledDeclNames[OtherGD.getCanonicalDecl()] = Result.first->first(); if (llvm::GlobalValue *Entry = GetGlobalValue(NonTargetName)) Entry->setName(OtherName); } } } StringRef CodeGenModule::getMangledName(GlobalDecl GD) { GlobalDecl CanonicalGD = GD.getCanonicalDecl(); // Some ABIs don't have constructor variants. Make sure that base and // complete constructors get mangled the same. if (const auto *CD = dyn_cast(CanonicalGD.getDecl())) { if (!getTarget().getCXXABI().hasConstructorVariants()) { CXXCtorType OrigCtorType = GD.getCtorType(); assert(OrigCtorType == Ctor_Base || OrigCtorType == Ctor_Complete); if (OrigCtorType == Ctor_Base) CanonicalGD = GlobalDecl(CD, Ctor_Complete); } } auto FoundName = MangledDeclNames.find(CanonicalGD); if (FoundName != MangledDeclNames.end()) return FoundName->second; // Keep the first result in the case of a mangling collision. const auto *ND = cast(GD.getDecl()); std::string MangledName = getMangledNameImpl(*this, GD, ND); // Ensure either we have different ABIs between host and device compilations, // says host compilation following MSVC ABI but device compilation follows // Itanium C++ ABI or, if they follow the same ABI, kernel names after // mangling should be the same after name stubbing. The later checking is // very important as the device kernel name being mangled in host-compilation // is used to resolve the device binaries to be executed. Inconsistent naming // result in undefined behavior. Even though we cannot check that naming // directly between host- and device-compilations, the host- and // device-mangling in host compilation could help catching certain ones. assert(!isa(ND) || !ND->hasAttr() || getLangOpts().CUDAIsDevice || (getContext().getAuxTargetInfo() && (getContext().getAuxTargetInfo()->getCXXABI() != getContext().getTargetInfo().getCXXABI())) || getCUDARuntime().getDeviceSideName(ND) == getMangledNameImpl( *this, GD.getWithKernelReferenceKind(KernelReferenceKind::Kernel), ND)); auto Result = Manglings.insert(std::make_pair(MangledName, GD)); return MangledDeclNames[CanonicalGD] = Result.first->first(); } StringRef CodeGenModule::getBlockMangledName(GlobalDecl GD, const BlockDecl *BD) { MangleContext &MangleCtx = getCXXABI().getMangleContext(); const Decl *D = GD.getDecl(); SmallString<256> Buffer; llvm::raw_svector_ostream Out(Buffer); if (!D) MangleCtx.mangleGlobalBlock(BD, dyn_cast_or_null(initializedGlobalDecl.getDecl()), Out); else if (const auto *CD = dyn_cast(D)) MangleCtx.mangleCtorBlock(CD, GD.getCtorType(), BD, Out); else if (const auto *DD = dyn_cast(D)) MangleCtx.mangleDtorBlock(DD, GD.getDtorType(), BD, Out); else MangleCtx.mangleBlock(cast(D), BD, Out); auto Result = Manglings.insert(std::make_pair(Out.str(), BD)); return Result.first->first(); } llvm::GlobalValue *CodeGenModule::GetGlobalValue(StringRef Name) { return getModule().getNamedValue(Name); } /// AddGlobalCtor - Add a function to the list that will be called before /// main() runs. void CodeGenModule::AddGlobalCtor(llvm::Function *Ctor, int Priority, llvm::Constant *AssociatedData) { // FIXME: Type coercion of void()* types. GlobalCtors.push_back(Structor(Priority, Ctor, AssociatedData)); } /// AddGlobalDtor - Add a function to the list that will be called /// when the module is unloaded. void CodeGenModule::AddGlobalDtor(llvm::Function *Dtor, int Priority) { if (CodeGenOpts.RegisterGlobalDtorsWithAtExit) { if (getCXXABI().useSinitAndSterm()) llvm::report_fatal_error( "register global dtors with atexit() is not supported yet"); DtorsUsingAtExit[Priority].push_back(Dtor); return; } // FIXME: Type coercion of void()* types. GlobalDtors.push_back(Structor(Priority, Dtor, nullptr)); } void CodeGenModule::EmitCtorList(CtorList &Fns, const char *GlobalName) { if (Fns.empty()) return; // Ctor function type is void()*. llvm::FunctionType* CtorFTy = llvm::FunctionType::get(VoidTy, false); llvm::Type *CtorPFTy = llvm::PointerType::get(CtorFTy, TheModule.getDataLayout().getProgramAddressSpace()); // Get the type of a ctor entry, { i32, void ()*, i8* }. llvm::StructType *CtorStructTy = llvm::StructType::get( Int32Ty, CtorPFTy, VoidPtrTy); // Construct the constructor and destructor arrays. ConstantInitBuilder builder(*this); auto ctors = builder.beginArray(CtorStructTy); for (const auto &I : Fns) { auto ctor = ctors.beginStruct(CtorStructTy); ctor.addInt(Int32Ty, I.Priority); ctor.add(llvm::ConstantExpr::getBitCast(I.Initializer, CtorPFTy)); if (I.AssociatedData) ctor.add(llvm::ConstantExpr::getBitCast(I.AssociatedData, VoidPtrTy)); else ctor.addNullPointer(VoidPtrTy); ctor.finishAndAddTo(ctors); } auto list = ctors.finishAndCreateGlobal(GlobalName, getPointerAlign(), /*constant*/ false, llvm::GlobalValue::AppendingLinkage); // The LTO linker doesn't seem to like it when we set an alignment // on appending variables. Take it off as a workaround. list->setAlignment(llvm::None); Fns.clear(); } llvm::GlobalValue::LinkageTypes CodeGenModule::getFunctionLinkage(GlobalDecl GD) { const auto *D = cast(GD.getDecl()); GVALinkage Linkage = getContext().GetGVALinkageForFunction(D); if (const auto *Dtor = dyn_cast(D)) return getCXXABI().getCXXDestructorLinkage(Linkage, Dtor, GD.getDtorType()); if (isa(D) && cast(D)->isInheritingConstructor() && Context.getTargetInfo().getCXXABI().isMicrosoft()) { // Our approach to inheriting constructors is fundamentally different from // that used by the MS ABI, so keep our inheriting constructor thunks // internal rather than trying to pick an unambiguous mangling for them. return llvm::GlobalValue::InternalLinkage; } return getLLVMLinkageForDeclarator(D, Linkage, /*IsConstantVariable=*/false); } llvm::ConstantInt *CodeGenModule::CreateCrossDsoCfiTypeId(llvm::Metadata *MD) { llvm::MDString *MDS = dyn_cast(MD); if (!MDS) return nullptr; return llvm::ConstantInt::get(Int64Ty, llvm::MD5Hash(MDS->getString())); } void CodeGenModule::SetLLVMFunctionAttributes(GlobalDecl GD, const CGFunctionInfo &Info, llvm::Function *F) { unsigned CallingConv; llvm::AttributeList PAL; ConstructAttributeList(F->getName(), Info, GD, PAL, CallingConv, false); F->setAttributes(PAL); F->setCallingConv(static_cast(CallingConv)); } static void removeImageAccessQualifier(std::string& TyName) { std::string ReadOnlyQual("__read_only"); std::string::size_type ReadOnlyPos = TyName.find(ReadOnlyQual); if (ReadOnlyPos != std::string::npos) // "+ 1" for the space after access qualifier. TyName.erase(ReadOnlyPos, ReadOnlyQual.size() + 1); else { std::string WriteOnlyQual("__write_only"); std::string::size_type WriteOnlyPos = TyName.find(WriteOnlyQual); if (WriteOnlyPos != std::string::npos) TyName.erase(WriteOnlyPos, WriteOnlyQual.size() + 1); else { std::string ReadWriteQual("__read_write"); std::string::size_type ReadWritePos = TyName.find(ReadWriteQual); if (ReadWritePos != std::string::npos) TyName.erase(ReadWritePos, ReadWriteQual.size() + 1); } } } // Returns the address space id that should be produced to the // kernel_arg_addr_space metadata. This is always fixed to the ids // as specified in the SPIR 2.0 specification in order to differentiate // for example in clGetKernelArgInfo() implementation between the address // spaces with targets without unique mapping to the OpenCL address spaces // (basically all single AS CPUs). static unsigned ArgInfoAddressSpace(LangAS AS) { switch (AS) { case LangAS::opencl_global: return 1; case LangAS::opencl_constant: return 2; case LangAS::opencl_local: return 3; case LangAS::opencl_generic: return 4; // Not in SPIR 2.0 specs. case LangAS::opencl_global_device: return 5; case LangAS::opencl_global_host: return 6; default: return 0; // Assume private. } } void CodeGenModule::GenOpenCLArgMetadata(llvm::Function *Fn, const FunctionDecl *FD, CodeGenFunction *CGF) { assert(((FD && CGF) || (!FD && !CGF)) && "Incorrect use - FD and CGF should either be both null or not!"); // Create MDNodes that represent the kernel arg metadata. // Each MDNode is a list in the form of "key", N number of values which is // the same number of values as their are kernel arguments. const PrintingPolicy &Policy = Context.getPrintingPolicy(); // MDNode for the kernel argument address space qualifiers. SmallVector addressQuals; // MDNode for the kernel argument access qualifiers (images only). SmallVector accessQuals; // MDNode for the kernel argument type names. SmallVector argTypeNames; // MDNode for the kernel argument base type names. SmallVector argBaseTypeNames; // MDNode for the kernel argument type qualifiers. SmallVector argTypeQuals; // MDNode for the kernel argument names. SmallVector argNames; if (FD && CGF) for (unsigned i = 0, e = FD->getNumParams(); i != e; ++i) { const ParmVarDecl *parm = FD->getParamDecl(i); QualType ty = parm->getType(); std::string typeQuals; if (ty->isPointerType()) { QualType pointeeTy = ty->getPointeeType(); // Get address qualifier. addressQuals.push_back( llvm::ConstantAsMetadata::get(CGF->Builder.getInt32( ArgInfoAddressSpace(pointeeTy.getAddressSpace())))); // Get argument type name. std::string typeName = pointeeTy.getUnqualifiedType().getAsString(Policy) + "*"; // Turn "unsigned type" to "utype" std::string::size_type pos = typeName.find("unsigned"); if (pointeeTy.isCanonical() && pos != std::string::npos) typeName.erase(pos + 1, 8); argTypeNames.push_back(llvm::MDString::get(VMContext, typeName)); std::string baseTypeName = pointeeTy.getUnqualifiedType().getCanonicalType().getAsString( Policy) + "*"; // Turn "unsigned type" to "utype" pos = baseTypeName.find("unsigned"); if (pos != std::string::npos) baseTypeName.erase(pos + 1, 8); argBaseTypeNames.push_back( llvm::MDString::get(VMContext, baseTypeName)); // Get argument type qualifiers: if (ty.isRestrictQualified()) typeQuals = "restrict"; if (pointeeTy.isConstQualified() || (pointeeTy.getAddressSpace() == LangAS::opencl_constant)) typeQuals += typeQuals.empty() ? "const" : " const"; if (pointeeTy.isVolatileQualified()) typeQuals += typeQuals.empty() ? "volatile" : " volatile"; } else { uint32_t AddrSpc = 0; bool isPipe = ty->isPipeType(); if (ty->isImageType() || isPipe) AddrSpc = ArgInfoAddressSpace(LangAS::opencl_global); addressQuals.push_back( llvm::ConstantAsMetadata::get(CGF->Builder.getInt32(AddrSpc))); // Get argument type name. std::string typeName; if (isPipe) typeName = ty.getCanonicalType() ->castAs() ->getElementType() .getAsString(Policy); else typeName = ty.getUnqualifiedType().getAsString(Policy); // Turn "unsigned type" to "utype" std::string::size_type pos = typeName.find("unsigned"); if (ty.isCanonical() && pos != std::string::npos) typeName.erase(pos + 1, 8); std::string baseTypeName; if (isPipe) baseTypeName = ty.getCanonicalType() ->castAs() ->getElementType() .getCanonicalType() .getAsString(Policy); else baseTypeName = ty.getUnqualifiedType().getCanonicalType().getAsString(Policy); // Remove access qualifiers on images // (as they are inseparable from type in clang implementation, // but OpenCL spec provides a special query to get access qualifier // via clGetKernelArgInfo with CL_KERNEL_ARG_ACCESS_QUALIFIER): if (ty->isImageType()) { removeImageAccessQualifier(typeName); removeImageAccessQualifier(baseTypeName); } argTypeNames.push_back(llvm::MDString::get(VMContext, typeName)); // Turn "unsigned type" to "utype" pos = baseTypeName.find("unsigned"); if (pos != std::string::npos) baseTypeName.erase(pos + 1, 8); argBaseTypeNames.push_back( llvm::MDString::get(VMContext, baseTypeName)); if (isPipe) typeQuals = "pipe"; } argTypeQuals.push_back(llvm::MDString::get(VMContext, typeQuals)); // Get image and pipe access qualifier: if (ty->isImageType() || ty->isPipeType()) { const Decl *PDecl = parm; if (auto *TD = dyn_cast(ty)) PDecl = TD->getDecl(); const OpenCLAccessAttr *A = PDecl->getAttr(); if (A && A->isWriteOnly()) accessQuals.push_back(llvm::MDString::get(VMContext, "write_only")); else if (A && A->isReadWrite()) accessQuals.push_back(llvm::MDString::get(VMContext, "read_write")); else accessQuals.push_back(llvm::MDString::get(VMContext, "read_only")); } else accessQuals.push_back(llvm::MDString::get(VMContext, "none")); // Get argument name. argNames.push_back(llvm::MDString::get(VMContext, parm->getName())); } Fn->setMetadata("kernel_arg_addr_space", llvm::MDNode::get(VMContext, addressQuals)); Fn->setMetadata("kernel_arg_access_qual", llvm::MDNode::get(VMContext, accessQuals)); Fn->setMetadata("kernel_arg_type", llvm::MDNode::get(VMContext, argTypeNames)); Fn->setMetadata("kernel_arg_base_type", llvm::MDNode::get(VMContext, argBaseTypeNames)); Fn->setMetadata("kernel_arg_type_qual", llvm::MDNode::get(VMContext, argTypeQuals)); if (getCodeGenOpts().EmitOpenCLArgMetadata) Fn->setMetadata("kernel_arg_name", llvm::MDNode::get(VMContext, argNames)); } /// Determines whether the language options require us to model /// unwind exceptions. We treat -fexceptions as mandating this /// except under the fragile ObjC ABI with only ObjC exceptions /// enabled. This means, for example, that C with -fexceptions /// enables this. static bool hasUnwindExceptions(const LangOptions &LangOpts) { // If exceptions are completely disabled, obviously this is false. if (!LangOpts.Exceptions) return false; // If C++ exceptions are enabled, this is true. if (LangOpts.CXXExceptions) return true; // If ObjC exceptions are enabled, this depends on the ABI. if (LangOpts.ObjCExceptions) { return LangOpts.ObjCRuntime.hasUnwindExceptions(); } return true; } static bool requiresMemberFunctionPointerTypeMetadata(CodeGenModule &CGM, const CXXMethodDecl *MD) { // Check that the type metadata can ever actually be used by a call. if (!CGM.getCodeGenOpts().LTOUnit || !CGM.HasHiddenLTOVisibility(MD->getParent())) return false; // Only functions whose address can be taken with a member function pointer // need this sort of type metadata. return !MD->isStatic() && !MD->isVirtual() && !isa(MD) && !isa(MD); } std::vector CodeGenModule::getMostBaseClasses(const CXXRecordDecl *RD) { llvm::SetVector MostBases; std::function CollectMostBases; CollectMostBases = [&](const CXXRecordDecl *RD) { if (RD->getNumBases() == 0) MostBases.insert(RD); for (const CXXBaseSpecifier &B : RD->bases()) CollectMostBases(B.getType()->getAsCXXRecordDecl()); }; CollectMostBases(RD); return MostBases.takeVector(); } void CodeGenModule::SetLLVMFunctionAttributesForDefinition(const Decl *D, llvm::Function *F) { llvm::AttrBuilder B; if (CodeGenOpts.UnwindTables) B.addAttribute(llvm::Attribute::UWTable); if (CodeGenOpts.StackClashProtector) B.addAttribute("probe-stack", "inline-asm"); if (!hasUnwindExceptions(LangOpts)) B.addAttribute(llvm::Attribute::NoUnwind); if (!D || !D->hasAttr()) { if (LangOpts.getStackProtector() == LangOptions::SSPOn) B.addAttribute(llvm::Attribute::StackProtect); else if (LangOpts.getStackProtector() == LangOptions::SSPStrong) B.addAttribute(llvm::Attribute::StackProtectStrong); else if (LangOpts.getStackProtector() == LangOptions::SSPReq) B.addAttribute(llvm::Attribute::StackProtectReq); } if (!D) { // If we don't have a declaration to control inlining, the function isn't // explicitly marked as alwaysinline for semantic reasons, and inlining is // disabled, mark the function as noinline. if (!F->hasFnAttribute(llvm::Attribute::AlwaysInline) && CodeGenOpts.getInlining() == CodeGenOptions::OnlyAlwaysInlining) B.addAttribute(llvm::Attribute::NoInline); F->addAttributes(llvm::AttributeList::FunctionIndex, B); return; } // Track whether we need to add the optnone LLVM attribute, // starting with the default for this optimization level. bool ShouldAddOptNone = !CodeGenOpts.DisableO0ImplyOptNone && CodeGenOpts.OptimizationLevel == 0; // We can't add optnone in the following cases, it won't pass the verifier. ShouldAddOptNone &= !D->hasAttr(); ShouldAddOptNone &= !D->hasAttr(); // Add optnone, but do so only if the function isn't always_inline. if ((ShouldAddOptNone || D->hasAttr()) && !F->hasFnAttribute(llvm::Attribute::AlwaysInline)) { B.addAttribute(llvm::Attribute::OptimizeNone); // OptimizeNone implies noinline; we should not be inlining such functions. B.addAttribute(llvm::Attribute::NoInline); // We still need to handle naked functions even though optnone subsumes // much of their semantics. if (D->hasAttr()) B.addAttribute(llvm::Attribute::Naked); // OptimizeNone wins over OptimizeForSize and MinSize. F->removeFnAttr(llvm::Attribute::OptimizeForSize); F->removeFnAttr(llvm::Attribute::MinSize); } else if (D->hasAttr()) { // Naked implies noinline: we should not be inlining such functions. B.addAttribute(llvm::Attribute::Naked); B.addAttribute(llvm::Attribute::NoInline); } else if (D->hasAttr()) { B.addAttribute(llvm::Attribute::NoDuplicate); } else if (D->hasAttr() && !F->hasFnAttribute(llvm::Attribute::AlwaysInline)) { // Add noinline if the function isn't always_inline. B.addAttribute(llvm::Attribute::NoInline); } else if (D->hasAttr() && !F->hasFnAttribute(llvm::Attribute::NoInline)) { // (noinline wins over always_inline, and we can't specify both in IR) B.addAttribute(llvm::Attribute::AlwaysInline); } else if (CodeGenOpts.getInlining() == CodeGenOptions::OnlyAlwaysInlining) { // If we're not inlining, then force everything that isn't always_inline to // carry an explicit noinline attribute. if (!F->hasFnAttribute(llvm::Attribute::AlwaysInline)) B.addAttribute(llvm::Attribute::NoInline); } else { // Otherwise, propagate the inline hint attribute and potentially use its // absence to mark things as noinline. if (auto *FD = dyn_cast(D)) { // Search function and template pattern redeclarations for inline. auto CheckForInline = [](const FunctionDecl *FD) { auto CheckRedeclForInline = [](const FunctionDecl *Redecl) { return Redecl->isInlineSpecified(); }; if (any_of(FD->redecls(), CheckRedeclForInline)) return true; const FunctionDecl *Pattern = FD->getTemplateInstantiationPattern(); if (!Pattern) return false; return any_of(Pattern->redecls(), CheckRedeclForInline); }; if (CheckForInline(FD)) { B.addAttribute(llvm::Attribute::InlineHint); } else if (CodeGenOpts.getInlining() == CodeGenOptions::OnlyHintInlining && !FD->isInlined() && !F->hasFnAttribute(llvm::Attribute::AlwaysInline)) { B.addAttribute(llvm::Attribute::NoInline); } } } // Add other optimization related attributes if we are optimizing this // function. if (!D->hasAttr()) { if (D->hasAttr()) { if (!ShouldAddOptNone) B.addAttribute(llvm::Attribute::OptimizeForSize); B.addAttribute(llvm::Attribute::Cold); } if (D->hasAttr()) B.addAttribute(llvm::Attribute::MinSize); } F->addAttributes(llvm::AttributeList::FunctionIndex, B); unsigned alignment = D->getMaxAlignment() / Context.getCharWidth(); if (alignment) F->setAlignment(llvm::Align(alignment)); if (!D->hasAttr()) if (LangOpts.FunctionAlignment) F->setAlignment(llvm::Align(1ull << LangOpts.FunctionAlignment)); // Some C++ ABIs require 2-byte alignment for member functions, in order to // reserve a bit for differentiating between virtual and non-virtual member // functions. If the current target's C++ ABI requires this and this is a // member function, set its alignment accordingly. if (getTarget().getCXXABI().areMemberFunctionsAligned()) { if (F->getAlignment() < 2 && isa(D)) F->setAlignment(llvm::Align(2)); } // In the cross-dso CFI mode with canonical jump tables, we want !type // attributes on definitions only. if (CodeGenOpts.SanitizeCfiCrossDso && CodeGenOpts.SanitizeCfiCanonicalJumpTables) { if (auto *FD = dyn_cast(D)) { // Skip available_externally functions. They won't be codegen'ed in the // current module anyway. if (getContext().GetGVALinkageForFunction(FD) != GVA_AvailableExternally) CreateFunctionTypeMetadataForIcall(FD, F); } } // Emit type metadata on member functions for member function pointer checks. // These are only ever necessary on definitions; we're guaranteed that the // definition will be present in the LTO unit as a result of LTO visibility. auto *MD = dyn_cast(D); if (MD && requiresMemberFunctionPointerTypeMetadata(*this, MD)) { for (const CXXRecordDecl *Base : getMostBaseClasses(MD->getParent())) { llvm::Metadata *Id = CreateMetadataIdentifierForType(Context.getMemberPointerType( MD->getType(), Context.getRecordType(Base).getTypePtr())); F->addTypeMetadata(0, Id); } } } void CodeGenModule::SetCommonAttributes(GlobalDecl GD, llvm::GlobalValue *GV) { const Decl *D = GD.getDecl(); if (dyn_cast_or_null(D)) setGVProperties(GV, GD); else GV->setVisibility(llvm::GlobalValue::DefaultVisibility); if (D && D->hasAttr()) addUsedGlobal(GV); if (CodeGenOpts.KeepStaticConsts && D && isa(D)) { const auto *VD = cast(D); if (VD->getType().isConstQualified() && VD->getStorageDuration() == SD_Static) addUsedGlobal(GV); } } bool CodeGenModule::GetCPUAndFeaturesAttributes(GlobalDecl GD, llvm::AttrBuilder &Attrs) { // Add target-cpu and target-features attributes to functions. If // we have a decl for the function and it has a target attribute then // parse that and add it to the feature set. StringRef TargetCPU = getTarget().getTargetOpts().CPU; StringRef TuneCPU = getTarget().getTargetOpts().TuneCPU; std::vector Features; const auto *FD = dyn_cast_or_null(GD.getDecl()); FD = FD ? FD->getMostRecentDecl() : FD; const auto *TD = FD ? FD->getAttr() : nullptr; const auto *SD = FD ? FD->getAttr() : nullptr; bool AddedAttr = false; if (TD || SD) { llvm::StringMap FeatureMap; getContext().getFunctionFeatureMap(FeatureMap, GD); // Produce the canonical string for this set of features. for (const llvm::StringMap::value_type &Entry : FeatureMap) Features.push_back((Entry.getValue() ? "+" : "-") + Entry.getKey().str()); // Now add the target-cpu and target-features to the function. // While we populated the feature map above, we still need to // get and parse the target attribute so we can get the cpu for // the function. if (TD) { ParsedTargetAttr ParsedAttr = TD->parse(); if (!ParsedAttr.Architecture.empty() && getTarget().isValidCPUName(ParsedAttr.Architecture)) { TargetCPU = ParsedAttr.Architecture; TuneCPU = ""; // Clear the tune CPU. } if (!ParsedAttr.Tune.empty() && getTarget().isValidCPUName(ParsedAttr.Tune)) TuneCPU = ParsedAttr.Tune; } } else { // Otherwise just add the existing target cpu and target features to the // function. Features = getTarget().getTargetOpts().Features; } if (!TargetCPU.empty()) { Attrs.addAttribute("target-cpu", TargetCPU); AddedAttr = true; } if (!TuneCPU.empty()) { Attrs.addAttribute("tune-cpu", TuneCPU); AddedAttr = true; } if (!Features.empty()) { llvm::sort(Features); Attrs.addAttribute("target-features", llvm::join(Features, ",")); AddedAttr = true; } return AddedAttr; } void CodeGenModule::setNonAliasAttributes(GlobalDecl GD, llvm::GlobalObject *GO) { const Decl *D = GD.getDecl(); SetCommonAttributes(GD, GO); if (D) { if (auto *GV = dyn_cast(GO)) { if (auto *SA = D->getAttr()) GV->addAttribute("bss-section", SA->getName()); if (auto *SA = D->getAttr()) GV->addAttribute("data-section", SA->getName()); if (auto *SA = D->getAttr()) GV->addAttribute("rodata-section", SA->getName()); if (auto *SA = D->getAttr()) GV->addAttribute("relro-section", SA->getName()); } if (auto *F = dyn_cast(GO)) { if (auto *SA = D->getAttr()) if (!D->getAttr()) F->addFnAttr("implicit-section-name", SA->getName()); llvm::AttrBuilder Attrs; if (GetCPUAndFeaturesAttributes(GD, Attrs)) { // We know that GetCPUAndFeaturesAttributes will always have the // newest set, since it has the newest possible FunctionDecl, so the // new ones should replace the old. llvm::AttrBuilder RemoveAttrs; RemoveAttrs.addAttribute("target-cpu"); RemoveAttrs.addAttribute("target-features"); RemoveAttrs.addAttribute("tune-cpu"); F->removeAttributes(llvm::AttributeList::FunctionIndex, RemoveAttrs); F->addAttributes(llvm::AttributeList::FunctionIndex, Attrs); } } if (const auto *CSA = D->getAttr()) GO->setSection(CSA->getName()); else if (const auto *SA = D->getAttr()) GO->setSection(SA->getName()); } getTargetCodeGenInfo().setTargetAttributes(D, GO, *this); } void CodeGenModule::SetInternalFunctionAttributes(GlobalDecl GD, llvm::Function *F, const CGFunctionInfo &FI) { const Decl *D = GD.getDecl(); SetLLVMFunctionAttributes(GD, FI, F); SetLLVMFunctionAttributesForDefinition(D, F); F->setLinkage(llvm::Function::InternalLinkage); setNonAliasAttributes(GD, F); } static void setLinkageForGV(llvm::GlobalValue *GV, const NamedDecl *ND) { // Set linkage and visibility in case we never see a definition. LinkageInfo LV = ND->getLinkageAndVisibility(); // Don't set internal linkage on declarations. // "extern_weak" is overloaded in LLVM; we probably should have // separate linkage types for this. if (isExternallyVisible(LV.getLinkage()) && (ND->hasAttr() || ND->isWeakImported())) GV->setLinkage(llvm::GlobalValue::ExternalWeakLinkage); } void CodeGenModule::CreateFunctionTypeMetadataForIcall(const FunctionDecl *FD, llvm::Function *F) { // Only if we are checking indirect calls. if (!LangOpts.Sanitize.has(SanitizerKind::CFIICall)) return; // Non-static class methods are handled via vtable or member function pointer // checks elsewhere. if (isa(FD) && !cast(FD)->isStatic()) return; llvm::Metadata *MD = CreateMetadataIdentifierForType(FD->getType()); F->addTypeMetadata(0, MD); F->addTypeMetadata(0, CreateMetadataIdentifierGeneralized(FD->getType())); // Emit a hash-based bit set entry for cross-DSO calls. if (CodeGenOpts.SanitizeCfiCrossDso) if (auto CrossDsoTypeId = CreateCrossDsoCfiTypeId(MD)) F->addTypeMetadata(0, llvm::ConstantAsMetadata::get(CrossDsoTypeId)); } void CodeGenModule::SetFunctionAttributes(GlobalDecl GD, llvm::Function *F, bool IsIncompleteFunction, bool IsThunk) { if (llvm::Intrinsic::ID IID = F->getIntrinsicID()) { // If this is an intrinsic function, set the function's attributes // to the intrinsic's attributes. F->setAttributes(llvm::Intrinsic::getAttributes(getLLVMContext(), IID)); return; } const auto *FD = cast(GD.getDecl()); if (!IsIncompleteFunction) SetLLVMFunctionAttributes(GD, getTypes().arrangeGlobalDeclaration(GD), F); // Add the Returned attribute for "this", except for iOS 5 and earlier // where substantial code, including the libstdc++ dylib, was compiled with // GCC and does not actually return "this". if (!IsThunk && getCXXABI().HasThisReturn(GD) && !(getTriple().isiOS() && getTriple().isOSVersionLT(6))) { assert(!F->arg_empty() && F->arg_begin()->getType() ->canLosslesslyBitCastTo(F->getReturnType()) && "unexpected this return"); F->addAttribute(1, llvm::Attribute::Returned); } // Only a few attributes are set on declarations; these may later be // overridden by a definition. setLinkageForGV(F, FD); setGVProperties(F, FD); // Setup target-specific attributes. if (!IsIncompleteFunction && F->isDeclaration()) getTargetCodeGenInfo().setTargetAttributes(FD, F, *this); if (const auto *CSA = FD->getAttr()) F->setSection(CSA->getName()); else if (const auto *SA = FD->getAttr()) F->setSection(SA->getName()); // If we plan on emitting this inline builtin, we can't treat it as a builtin. if (FD->isInlineBuiltinDeclaration()) { const FunctionDecl *FDBody; bool HasBody = FD->hasBody(FDBody); (void)HasBody; assert(HasBody && "Inline builtin declarations should always have an " "available body!"); if (shouldEmitFunction(FDBody)) F->addAttribute(llvm::AttributeList::FunctionIndex, llvm::Attribute::NoBuiltin); } if (FD->isReplaceableGlobalAllocationFunction()) { // A replaceable global allocation function does not act like a builtin by // default, only if it is invoked by a new-expression or delete-expression. F->addAttribute(llvm::AttributeList::FunctionIndex, llvm::Attribute::NoBuiltin); } if (isa(FD) || isa(FD)) F->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global); else if (const auto *MD = dyn_cast(FD)) if (MD->isVirtual()) F->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global); // Don't emit entries for function declarations in the cross-DSO mode. This // is handled with better precision by the receiving DSO. But if jump tables // are non-canonical then we need type metadata in order to produce the local // jump table. if (!CodeGenOpts.SanitizeCfiCrossDso || !CodeGenOpts.SanitizeCfiCanonicalJumpTables) CreateFunctionTypeMetadataForIcall(FD, F); if (getLangOpts().OpenMP && FD->hasAttr()) getOpenMPRuntime().emitDeclareSimdFunction(FD, F); if (const auto *CB = FD->getAttr()) { // Annotate the callback behavior as metadata: // - The callback callee (as argument number). // - The callback payloads (as argument numbers). llvm::LLVMContext &Ctx = F->getContext(); llvm::MDBuilder MDB(Ctx); // The payload indices are all but the first one in the encoding. The first // identifies the callback callee. int CalleeIdx = *CB->encoding_begin(); ArrayRef PayloadIndices(CB->encoding_begin() + 1, CB->encoding_end()); F->addMetadata(llvm::LLVMContext::MD_callback, *llvm::MDNode::get(Ctx, {MDB.createCallbackEncoding( CalleeIdx, PayloadIndices, /* VarArgsArePassed */ false)})); } } void CodeGenModule::addUsedGlobal(llvm::GlobalValue *GV) { assert((isa(GV) || !GV->isDeclaration()) && "Only globals with definition can force usage."); LLVMUsed.emplace_back(GV); } void CodeGenModule::addCompilerUsedGlobal(llvm::GlobalValue *GV) { assert(!GV->isDeclaration() && "Only globals with definition can force usage."); LLVMCompilerUsed.emplace_back(GV); } static void emitUsed(CodeGenModule &CGM, StringRef Name, std::vector &List) { // Don't create llvm.used if there is no need. if (List.empty()) return; // Convert List to what ConstantArray needs. SmallVector UsedArray; UsedArray.resize(List.size()); for (unsigned i = 0, e = List.size(); i != e; ++i) { UsedArray[i] = llvm::ConstantExpr::getPointerBitCastOrAddrSpaceCast( cast(&*List[i]), CGM.Int8PtrTy); } if (UsedArray.empty()) return; llvm::ArrayType *ATy = llvm::ArrayType::get(CGM.Int8PtrTy, UsedArray.size()); auto *GV = new llvm::GlobalVariable( CGM.getModule(), ATy, false, llvm::GlobalValue::AppendingLinkage, llvm::ConstantArray::get(ATy, UsedArray), Name); GV->setSection("llvm.metadata"); } void CodeGenModule::emitLLVMUsed() { emitUsed(*this, "llvm.used", LLVMUsed); emitUsed(*this, "llvm.compiler.used", LLVMCompilerUsed); } void CodeGenModule::AppendLinkerOptions(StringRef Opts) { auto *MDOpts = llvm::MDString::get(getLLVMContext(), Opts); LinkerOptionsMetadata.push_back(llvm::MDNode::get(getLLVMContext(), MDOpts)); } void CodeGenModule::AddDetectMismatch(StringRef Name, StringRef Value) { llvm::SmallString<32> Opt; getTargetCodeGenInfo().getDetectMismatchOption(Name, Value, Opt); if (Opt.empty()) return; auto *MDOpts = llvm::MDString::get(getLLVMContext(), Opt); LinkerOptionsMetadata.push_back(llvm::MDNode::get(getLLVMContext(), MDOpts)); } void CodeGenModule::AddDependentLib(StringRef Lib) { auto &C = getLLVMContext(); if (getTarget().getTriple().isOSBinFormatELF()) { ELFDependentLibraries.push_back( llvm::MDNode::get(C, llvm::MDString::get(C, Lib))); return; } llvm::SmallString<24> Opt; getTargetCodeGenInfo().getDependentLibraryOption(Lib, Opt); auto *MDOpts = llvm::MDString::get(getLLVMContext(), Opt); LinkerOptionsMetadata.push_back(llvm::MDNode::get(C, MDOpts)); } /// Add link options implied by the given module, including modules /// it depends on, using a postorder walk. static void addLinkOptionsPostorder(CodeGenModule &CGM, Module *Mod, SmallVectorImpl &Metadata, llvm::SmallPtrSet &Visited) { // Import this module's parent. if (Mod->Parent && Visited.insert(Mod->Parent).second) { addLinkOptionsPostorder(CGM, Mod->Parent, Metadata, Visited); } // Import this module's dependencies. for (unsigned I = Mod->Imports.size(); I > 0; --I) { if (Visited.insert(Mod->Imports[I - 1]).second) addLinkOptionsPostorder(CGM, Mod->Imports[I-1], Metadata, Visited); } // Add linker options to link against the libraries/frameworks // described by this module. llvm::LLVMContext &Context = CGM.getLLVMContext(); bool IsELF = CGM.getTarget().getTriple().isOSBinFormatELF(); // For modules that use export_as for linking, use that module // name instead. if (Mod->UseExportAsModuleLinkName) return; for (unsigned I = Mod->LinkLibraries.size(); I > 0; --I) { // Link against a framework. Frameworks are currently Darwin only, so we // don't to ask TargetCodeGenInfo for the spelling of the linker option. if (Mod->LinkLibraries[I-1].IsFramework) { llvm::Metadata *Args[2] = { llvm::MDString::get(Context, "-framework"), llvm::MDString::get(Context, Mod->LinkLibraries[I - 1].Library)}; Metadata.push_back(llvm::MDNode::get(Context, Args)); continue; } // Link against a library. if (IsELF) { llvm::Metadata *Args[2] = { llvm::MDString::get(Context, "lib"), llvm::MDString::get(Context, Mod->LinkLibraries[I - 1].Library), }; Metadata.push_back(llvm::MDNode::get(Context, Args)); } else { llvm::SmallString<24> Opt; CGM.getTargetCodeGenInfo().getDependentLibraryOption( Mod->LinkLibraries[I - 1].Library, Opt); auto *OptString = llvm::MDString::get(Context, Opt); Metadata.push_back(llvm::MDNode::get(Context, OptString)); } } } void CodeGenModule::EmitModuleLinkOptions() { // Collect the set of all of the modules we want to visit to emit link // options, which is essentially the imported modules and all of their // non-explicit child modules. llvm::SetVector LinkModules; llvm::SmallPtrSet Visited; SmallVector Stack; // Seed the stack with imported modules. for (Module *M : ImportedModules) { // Do not add any link flags when an implementation TU of a module imports // a header of that same module. if (M->getTopLevelModuleName() == getLangOpts().CurrentModule && !getLangOpts().isCompilingModule()) continue; if (Visited.insert(M).second) Stack.push_back(M); } // Find all of the modules to import, making a little effort to prune // non-leaf modules. while (!Stack.empty()) { clang::Module *Mod = Stack.pop_back_val(); bool AnyChildren = false; // Visit the submodules of this module. for (const auto &SM : Mod->submodules()) { // Skip explicit children; they need to be explicitly imported to be // linked against. if (SM->IsExplicit) continue; if (Visited.insert(SM).second) { Stack.push_back(SM); AnyChildren = true; } } // We didn't find any children, so add this module to the list of // modules to link against. if (!AnyChildren) { LinkModules.insert(Mod); } } // Add link options for all of the imported modules in reverse topological // order. We don't do anything to try to order import link flags with respect // to linker options inserted by things like #pragma comment(). SmallVector MetadataArgs; Visited.clear(); for (Module *M : LinkModules) if (Visited.insert(M).second) addLinkOptionsPostorder(*this, M, MetadataArgs, Visited); std::reverse(MetadataArgs.begin(), MetadataArgs.end()); LinkerOptionsMetadata.append(MetadataArgs.begin(), MetadataArgs.end()); // Add the linker options metadata flag. auto *NMD = getModule().getOrInsertNamedMetadata("llvm.linker.options"); for (auto *MD : LinkerOptionsMetadata) NMD->addOperand(MD); } void CodeGenModule::EmitDeferred() { // Emit deferred declare target declarations. if (getLangOpts().OpenMP && !getLangOpts().OpenMPSimd) getOpenMPRuntime().emitDeferredTargetDecls(); // Emit code for any potentially referenced deferred decls. Since a // previously unused static decl may become used during the generation of code // for a static function, iterate until no changes are made. if (!DeferredVTables.empty()) { EmitDeferredVTables(); // Emitting a vtable doesn't directly cause more vtables to // become deferred, although it can cause functions to be // emitted that then need those vtables. assert(DeferredVTables.empty()); } // Emit CUDA/HIP static device variables referenced by host code only. if (getLangOpts().CUDA) for (auto V : getContext().CUDAStaticDeviceVarReferencedByHost) DeferredDeclsToEmit.push_back(V); // Stop if we're out of both deferred vtables and deferred declarations. if (DeferredDeclsToEmit.empty()) return; // Grab the list of decls to emit. If EmitGlobalDefinition schedules more // work, it will not interfere with this. std::vector CurDeclsToEmit; CurDeclsToEmit.swap(DeferredDeclsToEmit); for (GlobalDecl &D : CurDeclsToEmit) { // We should call GetAddrOfGlobal with IsForDefinition set to true in order // to get GlobalValue with exactly the type we need, not something that // might had been created for another decl with the same mangled name but // different type. llvm::GlobalValue *GV = dyn_cast( GetAddrOfGlobal(D, ForDefinition)); // In case of different address spaces, we may still get a cast, even with // IsForDefinition equal to true. Query mangled names table to get // GlobalValue. if (!GV) GV = GetGlobalValue(getMangledName(D)); // Make sure GetGlobalValue returned non-null. assert(GV); // Check to see if we've already emitted this. This is necessary // for a couple of reasons: first, decls can end up in the // deferred-decls queue multiple times, and second, decls can end // up with definitions in unusual ways (e.g. by an extern inline // function acquiring a strong function redefinition). Just // ignore these cases. if (!GV->isDeclaration()) continue; // If this is OpenMP, check if it is legal to emit this global normally. if (LangOpts.OpenMP && OpenMPRuntime && OpenMPRuntime->emitTargetGlobal(D)) continue; // Otherwise, emit the definition and move on to the next one. EmitGlobalDefinition(D, GV); // If we found out that we need to emit more decls, do that recursively. // This has the advantage that the decls are emitted in a DFS and related // ones are close together, which is convenient for testing. if (!DeferredVTables.empty() || !DeferredDeclsToEmit.empty()) { EmitDeferred(); assert(DeferredVTables.empty() && DeferredDeclsToEmit.empty()); } } } void CodeGenModule::EmitVTablesOpportunistically() { // Try to emit external vtables as available_externally if they have emitted // all inlined virtual functions. It runs after EmitDeferred() and therefore // is not allowed to create new references to things that need to be emitted // lazily. Note that it also uses fact that we eagerly emitting RTTI. assert((OpportunisticVTables.empty() || shouldOpportunisticallyEmitVTables()) && "Only emit opportunistic vtables with optimizations"); for (const CXXRecordDecl *RD : OpportunisticVTables) { assert(getVTables().isVTableExternal(RD) && "This queue should only contain external vtables"); if (getCXXABI().canSpeculativelyEmitVTable(RD)) VTables.GenerateClassData(RD); } OpportunisticVTables.clear(); } void CodeGenModule::EmitGlobalAnnotations() { if (Annotations.empty()) return; // Create a new global variable for the ConstantStruct in the Module. llvm::Constant *Array = llvm::ConstantArray::get(llvm::ArrayType::get( Annotations[0]->getType(), Annotations.size()), Annotations); auto *gv = new llvm::GlobalVariable(getModule(), Array->getType(), false, llvm::GlobalValue::AppendingLinkage, Array, "llvm.global.annotations"); gv->setSection(AnnotationSection); } llvm::Constant *CodeGenModule::EmitAnnotationString(StringRef Str) { llvm::Constant *&AStr = AnnotationStrings[Str]; if (AStr) return AStr; // Not found yet, create a new global. llvm::Constant *s = llvm::ConstantDataArray::getString(getLLVMContext(), Str); auto *gv = new llvm::GlobalVariable(getModule(), s->getType(), true, llvm::GlobalValue::PrivateLinkage, s, ".str"); gv->setSection(AnnotationSection); gv->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global); AStr = gv; return gv; } llvm::Constant *CodeGenModule::EmitAnnotationUnit(SourceLocation Loc) { SourceManager &SM = getContext().getSourceManager(); PresumedLoc PLoc = SM.getPresumedLoc(Loc); if (PLoc.isValid()) return EmitAnnotationString(PLoc.getFilename()); return EmitAnnotationString(SM.getBufferName(Loc)); } llvm::Constant *CodeGenModule::EmitAnnotationLineNo(SourceLocation L) { SourceManager &SM = getContext().getSourceManager(); PresumedLoc PLoc = SM.getPresumedLoc(L); unsigned LineNo = PLoc.isValid() ? PLoc.getLine() : SM.getExpansionLineNumber(L); return llvm::ConstantInt::get(Int32Ty, LineNo); } llvm::Constant *CodeGenModule::EmitAnnotateAttr(llvm::GlobalValue *GV, const AnnotateAttr *AA, SourceLocation L) { // Get the globals for file name, annotation, and the line number. llvm::Constant *AnnoGV = EmitAnnotationString(AA->getAnnotation()), *UnitGV = EmitAnnotationUnit(L), *LineNoCst = EmitAnnotationLineNo(L); llvm::Constant *ASZeroGV = GV; if (GV->getAddressSpace() != 0) { ASZeroGV = llvm::ConstantExpr::getAddrSpaceCast( GV, GV->getValueType()->getPointerTo(0)); } // Create the ConstantStruct for the global annotation. llvm::Constant *Fields[4] = { llvm::ConstantExpr::getBitCast(ASZeroGV, Int8PtrTy), llvm::ConstantExpr::getBitCast(AnnoGV, Int8PtrTy), llvm::ConstantExpr::getBitCast(UnitGV, Int8PtrTy), LineNoCst }; return llvm::ConstantStruct::getAnon(Fields); } void CodeGenModule::AddGlobalAnnotations(const ValueDecl *D, llvm::GlobalValue *GV) { assert(D->hasAttr() && "no annotate attribute"); // Get the struct elements for these annotations. for (const auto *I : D->specific_attrs()) Annotations.push_back(EmitAnnotateAttr(GV, I, D->getLocation())); } bool CodeGenModule::isInSanitizerBlacklist(SanitizerMask Kind, llvm::Function *Fn, SourceLocation Loc) const { const auto &SanitizerBL = getContext().getSanitizerBlacklist(); // Blacklist by function name. if (SanitizerBL.isBlacklistedFunction(Kind, Fn->getName())) return true; // Blacklist by location. if (Loc.isValid()) return SanitizerBL.isBlacklistedLocation(Kind, Loc); // If location is unknown, this may be a compiler-generated function. Assume // it's located in the main file. auto &SM = Context.getSourceManager(); if (const auto *MainFile = SM.getFileEntryForID(SM.getMainFileID())) { return SanitizerBL.isBlacklistedFile(Kind, MainFile->getName()); } return false; } bool CodeGenModule::isInSanitizerBlacklist(llvm::GlobalVariable *GV, SourceLocation Loc, QualType Ty, StringRef Category) const { // For now globals can be blacklisted only in ASan and KASan. const SanitizerMask EnabledAsanMask = LangOpts.Sanitize.Mask & (SanitizerKind::Address | SanitizerKind::KernelAddress | SanitizerKind::HWAddress | SanitizerKind::KernelHWAddress | SanitizerKind::MemTag); if (!EnabledAsanMask) return false; const auto &SanitizerBL = getContext().getSanitizerBlacklist(); if (SanitizerBL.isBlacklistedGlobal(EnabledAsanMask, GV->getName(), Category)) return true; if (SanitizerBL.isBlacklistedLocation(EnabledAsanMask, Loc, Category)) return true; // Check global type. if (!Ty.isNull()) { // Drill down the array types: if global variable of a fixed type is // blacklisted, we also don't instrument arrays of them. while (auto AT = dyn_cast(Ty.getTypePtr())) Ty = AT->getElementType(); Ty = Ty.getCanonicalType().getUnqualifiedType(); // We allow to blacklist only record types (classes, structs etc.) if (Ty->isRecordType()) { std::string TypeStr = Ty.getAsString(getContext().getPrintingPolicy()); if (SanitizerBL.isBlacklistedType(EnabledAsanMask, TypeStr, Category)) return true; } } return false; } bool CodeGenModule::imbueXRayAttrs(llvm::Function *Fn, SourceLocation Loc, StringRef Category) const { const auto &XRayFilter = getContext().getXRayFilter(); using ImbueAttr = XRayFunctionFilter::ImbueAttribute; auto Attr = ImbueAttr::NONE; if (Loc.isValid()) Attr = XRayFilter.shouldImbueLocation(Loc, Category); if (Attr == ImbueAttr::NONE) Attr = XRayFilter.shouldImbueFunction(Fn->getName()); switch (Attr) { case ImbueAttr::NONE: return false; case ImbueAttr::ALWAYS: Fn->addFnAttr("function-instrument", "xray-always"); break; case ImbueAttr::ALWAYS_ARG1: Fn->addFnAttr("function-instrument", "xray-always"); Fn->addFnAttr("xray-log-args", "1"); break; case ImbueAttr::NEVER: Fn->addFnAttr("function-instrument", "xray-never"); break; } return true; } bool CodeGenModule::MustBeEmitted(const ValueDecl *Global) { // Never defer when EmitAllDecls is specified. if (LangOpts.EmitAllDecls) return true; if (CodeGenOpts.KeepStaticConsts) { const auto *VD = dyn_cast(Global); if (VD && VD->getType().isConstQualified() && VD->getStorageDuration() == SD_Static) return true; } return getContext().DeclMustBeEmitted(Global); } bool CodeGenModule::MayBeEmittedEagerly(const ValueDecl *Global) { if (const auto *FD = dyn_cast(Global)) { if (FD->getTemplateSpecializationKind() == TSK_ImplicitInstantiation) // Implicit template instantiations may change linkage if they are later // explicitly instantiated, so they should not be emitted eagerly. return false; // In OpenMP 5.0 function may be marked as device_type(nohost) and we should // not emit them eagerly unless we sure that the function must be emitted on // the host. if (LangOpts.OpenMP >= 50 && !LangOpts.OpenMPSimd && !LangOpts.OpenMPIsDevice && !OMPDeclareTargetDeclAttr::getDeviceType(FD) && !FD->isUsed(/*CheckUsedAttr=*/false) && !FD->isReferenced()) return false; } if (const auto *VD = dyn_cast(Global)) if (Context.getInlineVariableDefinitionKind(VD) == ASTContext::InlineVariableDefinitionKind::WeakUnknown) // A definition of an inline constexpr static data member may change // linkage later if it's redeclared outside the class. return false; // If OpenMP is enabled and threadprivates must be generated like TLS, delay // codegen for global variables, because they may be marked as threadprivate. if (LangOpts.OpenMP && LangOpts.OpenMPUseTLS && getContext().getTargetInfo().isTLSSupported() && isa(Global) && !isTypeConstant(Global->getType(), false) && !OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(Global)) return false; return true; } ConstantAddress CodeGenModule::GetAddrOfMSGuidDecl(const MSGuidDecl *GD) { StringRef Name = getMangledName(GD); // The UUID descriptor should be pointer aligned. CharUnits Alignment = CharUnits::fromQuantity(PointerAlignInBytes); // Look for an existing global. if (llvm::GlobalVariable *GV = getModule().getNamedGlobal(Name)) return ConstantAddress(GV, Alignment); ConstantEmitter Emitter(*this); llvm::Constant *Init; APValue &V = GD->getAsAPValue(); if (!V.isAbsent()) { // If possible, emit the APValue version of the initializer. In particular, // this gets the type of the constant right. Init = Emitter.emitForInitializer( GD->getAsAPValue(), GD->getType().getAddressSpace(), GD->getType()); } else { // As a fallback, directly construct the constant. // FIXME: This may get padding wrong under esoteric struct layout rules. // MSVC appears to create a complete type 'struct __s_GUID' that it // presumably uses to represent these constants. MSGuidDecl::Parts Parts = GD->getParts(); llvm::Constant *Fields[4] = { llvm::ConstantInt::get(Int32Ty, Parts.Part1), llvm::ConstantInt::get(Int16Ty, Parts.Part2), llvm::ConstantInt::get(Int16Ty, Parts.Part3), llvm::ConstantDataArray::getRaw( StringRef(reinterpret_cast(Parts.Part4And5), 8), 8, Int8Ty)}; Init = llvm::ConstantStruct::getAnon(Fields); } auto *GV = new llvm::GlobalVariable( getModule(), Init->getType(), /*isConstant=*/true, llvm::GlobalValue::LinkOnceODRLinkage, Init, Name); if (supportsCOMDAT()) GV->setComdat(TheModule.getOrInsertComdat(GV->getName())); setDSOLocal(GV); llvm::Constant *Addr = GV; if (!V.isAbsent()) { Emitter.finalize(GV); } else { llvm::Type *Ty = getTypes().ConvertTypeForMem(GD->getType()); Addr = llvm::ConstantExpr::getBitCast( GV, Ty->getPointerTo(GV->getAddressSpace())); } return ConstantAddress(Addr, Alignment); } ConstantAddress CodeGenModule::GetWeakRefReference(const ValueDecl *VD) { const AliasAttr *AA = VD->getAttr(); assert(AA && "No alias?"); CharUnits Alignment = getContext().getDeclAlign(VD); llvm::Type *DeclTy = getTypes().ConvertTypeForMem(VD->getType()); // See if there is already something with the target's name in the module. llvm::GlobalValue *Entry = GetGlobalValue(AA->getAliasee()); if (Entry) { unsigned AS = getContext().getTargetAddressSpace(VD->getType()); auto Ptr = llvm::ConstantExpr::getBitCast(Entry, DeclTy->getPointerTo(AS)); return ConstantAddress(Ptr, Alignment); } llvm::Constant *Aliasee; if (isa(DeclTy)) Aliasee = GetOrCreateLLVMFunction(AA->getAliasee(), DeclTy, GlobalDecl(cast(VD)), /*ForVTable=*/false); else Aliasee = GetOrCreateLLVMGlobal(AA->getAliasee(), llvm::PointerType::getUnqual(DeclTy), nullptr); auto *F = cast(Aliasee); F->setLinkage(llvm::Function::ExternalWeakLinkage); WeakRefReferences.insert(F); return ConstantAddress(Aliasee, Alignment); } void CodeGenModule::EmitGlobal(GlobalDecl GD) { const auto *Global = cast(GD.getDecl()); // Weak references don't produce any output by themselves. if (Global->hasAttr()) return; // If this is an alias definition (which otherwise looks like a declaration) // emit it now. if (Global->hasAttr()) return EmitAliasDefinition(GD); // IFunc like an alias whose value is resolved at runtime by calling resolver. if (Global->hasAttr()) return emitIFuncDefinition(GD); // If this is a cpu_dispatch multiversion function, emit the resolver. if (Global->hasAttr()) return emitCPUDispatchDefinition(GD); // If this is CUDA, be selective about which declarations we emit. if (LangOpts.CUDA) { if (LangOpts.CUDAIsDevice) { if (!Global->hasAttr() && !Global->hasAttr() && !Global->hasAttr() && !Global->hasAttr() && !Global->getType()->isCUDADeviceBuiltinSurfaceType() && !Global->getType()->isCUDADeviceBuiltinTextureType()) return; } else { // We need to emit host-side 'shadows' for all global // device-side variables because the CUDA runtime needs their // size and host-side address in order to provide access to // their device-side incarnations. // So device-only functions are the only things we skip. if (isa(Global) && !Global->hasAttr() && Global->hasAttr()) return; assert((isa(Global) || isa(Global)) && "Expected Variable or Function"); } } if (LangOpts.OpenMP) { // If this is OpenMP, check if it is legal to emit this global normally. if (OpenMPRuntime && OpenMPRuntime->emitTargetGlobal(GD)) return; if (auto *DRD = dyn_cast(Global)) { if (MustBeEmitted(Global)) EmitOMPDeclareReduction(DRD); return; } else if (auto *DMD = dyn_cast(Global)) { if (MustBeEmitted(Global)) EmitOMPDeclareMapper(DMD); return; } } // Ignore declarations, they will be emitted on their first use. if (const auto *FD = dyn_cast(Global)) { // Forward declarations are emitted lazily on first use. if (!FD->doesThisDeclarationHaveABody()) { if (!FD->doesDeclarationForceExternallyVisibleDefinition()) return; StringRef MangledName = getMangledName(GD); // Compute the function info and LLVM type. const CGFunctionInfo &FI = getTypes().arrangeGlobalDeclaration(GD); llvm::Type *Ty = getTypes().GetFunctionType(FI); GetOrCreateLLVMFunction(MangledName, Ty, GD, /*ForVTable=*/false, /*DontDefer=*/false); return; } } else { const auto *VD = cast(Global); assert(VD->isFileVarDecl() && "Cannot emit local var decl as global."); if (VD->isThisDeclarationADefinition() != VarDecl::Definition && !Context.isMSStaticDataMemberInlineDefinition(VD)) { if (LangOpts.OpenMP) { // Emit declaration of the must-be-emitted declare target variable. if (llvm::Optional Res = OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD)) { bool UnifiedMemoryEnabled = getOpenMPRuntime().hasRequiresUnifiedSharedMemory(); if (*Res == OMPDeclareTargetDeclAttr::MT_To && !UnifiedMemoryEnabled) { (void)GetAddrOfGlobalVar(VD); } else { assert(((*Res == OMPDeclareTargetDeclAttr::MT_Link) || (*Res == OMPDeclareTargetDeclAttr::MT_To && UnifiedMemoryEnabled)) && "Link clause or to clause with unified memory expected."); (void)getOpenMPRuntime().getAddrOfDeclareTargetVar(VD); } return; } } // If this declaration may have caused an inline variable definition to // change linkage, make sure that it's emitted. if (Context.getInlineVariableDefinitionKind(VD) == ASTContext::InlineVariableDefinitionKind::Strong) GetAddrOfGlobalVar(VD); return; } } // Defer code generation to first use when possible, e.g. if this is an inline // function. If the global must always be emitted, do it eagerly if possible // to benefit from cache locality. if (MustBeEmitted(Global) && MayBeEmittedEagerly(Global)) { // Emit the definition if it can't be deferred. EmitGlobalDefinition(GD); return; } // If we're deferring emission of a C++ variable with an // initializer, remember the order in which it appeared in the file. if (getLangOpts().CPlusPlus && isa(Global) && cast(Global)->hasInit()) { DelayedCXXInitPosition[Global] = CXXGlobalInits.size(); CXXGlobalInits.push_back(nullptr); } StringRef MangledName = getMangledName(GD); if (GetGlobalValue(MangledName) != nullptr) { // The value has already been used and should therefore be emitted. addDeferredDeclToEmit(GD); } else if (MustBeEmitted(Global)) { // The value must be emitted, but cannot be emitted eagerly. assert(!MayBeEmittedEagerly(Global)); addDeferredDeclToEmit(GD); } else { // Otherwise, remember that we saw a deferred decl with this name. The // first use of the mangled name will cause it to move into // DeferredDeclsToEmit. DeferredDecls[MangledName] = GD; } } // Check if T is a class type with a destructor that's not dllimport. static bool HasNonDllImportDtor(QualType T) { if (const auto *RT = T->getBaseElementTypeUnsafe()->getAs()) if (CXXRecordDecl *RD = dyn_cast(RT->getDecl())) if (RD->getDestructor() && !RD->getDestructor()->hasAttr()) return true; return false; } namespace { struct FunctionIsDirectlyRecursive : public ConstStmtVisitor { const StringRef Name; const Builtin::Context &BI; FunctionIsDirectlyRecursive(StringRef N, const Builtin::Context &C) : Name(N), BI(C) {} bool VisitCallExpr(const CallExpr *E) { const FunctionDecl *FD = E->getDirectCallee(); if (!FD) return false; AsmLabelAttr *Attr = FD->getAttr(); if (Attr && Name == Attr->getLabel()) return true; unsigned BuiltinID = FD->getBuiltinID(); if (!BuiltinID || !BI.isLibFunction(BuiltinID)) return false; StringRef BuiltinName = BI.getName(BuiltinID); if (BuiltinName.startswith("__builtin_") && Name == BuiltinName.slice(strlen("__builtin_"), StringRef::npos)) { return true; } return false; } bool VisitStmt(const Stmt *S) { for (const Stmt *Child : S->children()) if (Child && this->Visit(Child)) return true; return false; } }; // Make sure we're not referencing non-imported vars or functions. struct DLLImportFunctionVisitor : public RecursiveASTVisitor { bool SafeToInline = true; bool shouldVisitImplicitCode() const { return true; } bool VisitVarDecl(VarDecl *VD) { if (VD->getTLSKind()) { // A thread-local variable cannot be imported. SafeToInline = false; return SafeToInline; } // A variable definition might imply a destructor call. if (VD->isThisDeclarationADefinition()) SafeToInline = !HasNonDllImportDtor(VD->getType()); return SafeToInline; } bool VisitCXXBindTemporaryExpr(CXXBindTemporaryExpr *E) { if (const auto *D = E->getTemporary()->getDestructor()) SafeToInline = D->hasAttr(); return SafeToInline; } bool VisitDeclRefExpr(DeclRefExpr *E) { ValueDecl *VD = E->getDecl(); if (isa(VD)) SafeToInline = VD->hasAttr(); else if (VarDecl *V = dyn_cast(VD)) SafeToInline = !V->hasGlobalStorage() || V->hasAttr(); return SafeToInline; } bool VisitCXXConstructExpr(CXXConstructExpr *E) { SafeToInline = E->getConstructor()->hasAttr(); return SafeToInline; } bool VisitCXXMemberCallExpr(CXXMemberCallExpr *E) { CXXMethodDecl *M = E->getMethodDecl(); if (!M) { // Call through a pointer to member function. This is safe to inline. SafeToInline = true; } else { SafeToInline = M->hasAttr(); } return SafeToInline; } bool VisitCXXDeleteExpr(CXXDeleteExpr *E) { SafeToInline = E->getOperatorDelete()->hasAttr(); return SafeToInline; } bool VisitCXXNewExpr(CXXNewExpr *E) { SafeToInline = E->getOperatorNew()->hasAttr(); return SafeToInline; } }; } // isTriviallyRecursive - Check if this function calls another // decl that, because of the asm attribute or the other decl being a builtin, // ends up pointing to itself. bool CodeGenModule::isTriviallyRecursive(const FunctionDecl *FD) { StringRef Name; if (getCXXABI().getMangleContext().shouldMangleDeclName(FD)) { // asm labels are a special kind of mangling we have to support. AsmLabelAttr *Attr = FD->getAttr(); if (!Attr) return false; Name = Attr->getLabel(); } else { Name = FD->getName(); } FunctionIsDirectlyRecursive Walker(Name, Context.BuiltinInfo); const Stmt *Body = FD->getBody(); return Body ? Walker.Visit(Body) : false; } bool CodeGenModule::shouldEmitFunction(GlobalDecl GD) { if (getFunctionLinkage(GD) != llvm::Function::AvailableExternallyLinkage) return true; const auto *F = cast(GD.getDecl()); if (CodeGenOpts.OptimizationLevel == 0 && !F->hasAttr()) return false; if (F->hasAttr()) { // Check whether it would be safe to inline this dllimport function. DLLImportFunctionVisitor Visitor; Visitor.TraverseFunctionDecl(const_cast(F)); if (!Visitor.SafeToInline) return false; if (const CXXDestructorDecl *Dtor = dyn_cast(F)) { // Implicit destructor invocations aren't captured in the AST, so the // check above can't see them. Check for them manually here. for (const Decl *Member : Dtor->getParent()->decls()) if (isa(Member)) if (HasNonDllImportDtor(cast(Member)->getType())) return false; for (const CXXBaseSpecifier &B : Dtor->getParent()->bases()) if (HasNonDllImportDtor(B.getType())) return false; } } // PR9614. Avoid cases where the source code is lying to us. An available // externally function should have an equivalent function somewhere else, // but a function that calls itself through asm label/`__builtin_` trickery is // clearly not equivalent to the real implementation. // This happens in glibc's btowc and in some configure checks. return !isTriviallyRecursive(F); } bool CodeGenModule::shouldOpportunisticallyEmitVTables() { return CodeGenOpts.OptimizationLevel > 0; } void CodeGenModule::EmitMultiVersionFunctionDefinition(GlobalDecl GD, llvm::GlobalValue *GV) { const auto *FD = cast(GD.getDecl()); if (FD->isCPUSpecificMultiVersion()) { auto *Spec = FD->getAttr(); for (unsigned I = 0; I < Spec->cpus_size(); ++I) EmitGlobalFunctionDefinition(GD.getWithMultiVersionIndex(I), nullptr); // Requires multiple emits. } else EmitGlobalFunctionDefinition(GD, GV); } void CodeGenModule::EmitGlobalDefinition(GlobalDecl GD, llvm::GlobalValue *GV) { const auto *D = cast(GD.getDecl()); PrettyStackTraceDecl CrashInfo(const_cast(D), D->getLocation(), Context.getSourceManager(), "Generating code for declaration"); if (const auto *FD = dyn_cast(D)) { // At -O0, don't generate IR for functions with available_externally // linkage. if (!shouldEmitFunction(GD)) return; llvm::TimeTraceScope TimeScope("CodeGen Function", [&]() { std::string Name; llvm::raw_string_ostream OS(Name); FD->getNameForDiagnostic(OS, getContext().getPrintingPolicy(), /*Qualified=*/true); return Name; }); if (const auto *Method = dyn_cast(D)) { // Make sure to emit the definition(s) before we emit the thunks. // This is necessary for the generation of certain thunks. if (isa(Method) || isa(Method)) ABI->emitCXXStructor(GD); else if (FD->isMultiVersion()) EmitMultiVersionFunctionDefinition(GD, GV); else EmitGlobalFunctionDefinition(GD, GV); if (Method->isVirtual()) getVTables().EmitThunks(GD); return; } if (FD->isMultiVersion()) return EmitMultiVersionFunctionDefinition(GD, GV); return EmitGlobalFunctionDefinition(GD, GV); } if (const auto *VD = dyn_cast(D)) return EmitGlobalVarDefinition(VD, !VD->hasDefinition()); llvm_unreachable("Invalid argument to EmitGlobalDefinition()"); } static void ReplaceUsesOfNonProtoTypeWithRealFunction(llvm::GlobalValue *Old, llvm::Function *NewFn); static unsigned TargetMVPriority(const TargetInfo &TI, const CodeGenFunction::MultiVersionResolverOption &RO) { unsigned Priority = 0; for (StringRef Feat : RO.Conditions.Features) Priority = std::max(Priority, TI.multiVersionSortPriority(Feat)); if (!RO.Conditions.Architecture.empty()) Priority = std::max( Priority, TI.multiVersionSortPriority(RO.Conditions.Architecture)); return Priority; } void CodeGenModule::emitMultiVersionFunctions() { for (GlobalDecl GD : MultiVersionFuncs) { SmallVector Options; const FunctionDecl *FD = cast(GD.getDecl()); getContext().forEachMultiversionedFunctionVersion( FD, [this, &GD, &Options](const FunctionDecl *CurFD) { GlobalDecl CurGD{ (CurFD->isDefined() ? CurFD->getDefinition() : CurFD)}; StringRef MangledName = getMangledName(CurGD); llvm::Constant *Func = GetGlobalValue(MangledName); if (!Func) { if (CurFD->isDefined()) { EmitGlobalFunctionDefinition(CurGD, nullptr); Func = GetGlobalValue(MangledName); } else { const CGFunctionInfo &FI = getTypes().arrangeGlobalDeclaration(GD); llvm::FunctionType *Ty = getTypes().GetFunctionType(FI); Func = GetAddrOfFunction(CurGD, Ty, /*ForVTable=*/false, /*DontDefer=*/false, ForDefinition); } assert(Func && "This should have just been created"); } const auto *TA = CurFD->getAttr(); llvm::SmallVector Feats; TA->getAddedFeatures(Feats); Options.emplace_back(cast(Func), TA->getArchitecture(), Feats); }); llvm::Function *ResolverFunc; const TargetInfo &TI = getTarget(); if (TI.supportsIFunc() || FD->isTargetMultiVersion()) { ResolverFunc = cast( GetGlobalValue((getMangledName(GD) + ".resolver").str())); ResolverFunc->setLinkage(llvm::Function::WeakODRLinkage); } else { ResolverFunc = cast(GetGlobalValue(getMangledName(GD))); } if (supportsCOMDAT()) ResolverFunc->setComdat( getModule().getOrInsertComdat(ResolverFunc->getName())); llvm::stable_sort( Options, [&TI](const CodeGenFunction::MultiVersionResolverOption &LHS, const CodeGenFunction::MultiVersionResolverOption &RHS) { return TargetMVPriority(TI, LHS) > TargetMVPriority(TI, RHS); }); CodeGenFunction CGF(*this); CGF.EmitMultiVersionResolver(ResolverFunc, Options); } } void CodeGenModule::emitCPUDispatchDefinition(GlobalDecl GD) { const auto *FD = cast(GD.getDecl()); assert(FD && "Not a FunctionDecl?"); const auto *DD = FD->getAttr(); assert(DD && "Not a cpu_dispatch Function?"); llvm::Type *DeclTy = getTypes().ConvertType(FD->getType()); if (const auto *CXXFD = dyn_cast(FD)) { const CGFunctionInfo &FInfo = getTypes().arrangeCXXMethodDeclaration(CXXFD); DeclTy = getTypes().GetFunctionType(FInfo); } StringRef ResolverName = getMangledName(GD); llvm::Type *ResolverType; GlobalDecl ResolverGD; if (getTarget().supportsIFunc()) ResolverType = llvm::FunctionType::get( llvm::PointerType::get(DeclTy, Context.getTargetAddressSpace(FD->getType())), false); else { ResolverType = DeclTy; ResolverGD = GD; } auto *ResolverFunc = cast(GetOrCreateLLVMFunction( ResolverName, ResolverType, ResolverGD, /*ForVTable=*/false)); ResolverFunc->setLinkage(llvm::Function::WeakODRLinkage); if (supportsCOMDAT()) ResolverFunc->setComdat( getModule().getOrInsertComdat(ResolverFunc->getName())); SmallVector Options; const TargetInfo &Target = getTarget(); unsigned Index = 0; for (const IdentifierInfo *II : DD->cpus()) { // Get the name of the target function so we can look it up/create it. std::string MangledName = getMangledNameImpl(*this, GD, FD, true) + getCPUSpecificMangling(*this, II->getName()); llvm::Constant *Func = GetGlobalValue(MangledName); if (!Func) { GlobalDecl ExistingDecl = Manglings.lookup(MangledName); if (ExistingDecl.getDecl() && ExistingDecl.getDecl()->getAsFunction()->isDefined()) { EmitGlobalFunctionDefinition(ExistingDecl, nullptr); Func = GetGlobalValue(MangledName); } else { if (!ExistingDecl.getDecl()) ExistingDecl = GD.getWithMultiVersionIndex(Index); Func = GetOrCreateLLVMFunction( MangledName, DeclTy, ExistingDecl, /*ForVTable=*/false, /*DontDefer=*/true, /*IsThunk=*/false, llvm::AttributeList(), ForDefinition); } } llvm::SmallVector Features; Target.getCPUSpecificCPUDispatchFeatures(II->getName(), Features); llvm::transform(Features, Features.begin(), [](StringRef Str) { return Str.substr(1); }); Features.erase(std::remove_if( Features.begin(), Features.end(), [&Target](StringRef Feat) { return !Target.validateCpuSupports(Feat); }), Features.end()); Options.emplace_back(cast(Func), StringRef{}, Features); ++Index; } llvm::sort( Options, [](const CodeGenFunction::MultiVersionResolverOption &LHS, const CodeGenFunction::MultiVersionResolverOption &RHS) { return CodeGenFunction::GetX86CpuSupportsMask(LHS.Conditions.Features) > CodeGenFunction::GetX86CpuSupportsMask(RHS.Conditions.Features); }); // If the list contains multiple 'default' versions, such as when it contains // 'pentium' and 'generic', don't emit the call to the generic one (since we // always run on at least a 'pentium'). We do this by deleting the 'least // advanced' (read, lowest mangling letter). while (Options.size() > 1 && CodeGenFunction::GetX86CpuSupportsMask( (Options.end() - 2)->Conditions.Features) == 0) { StringRef LHSName = (Options.end() - 2)->Function->getName(); StringRef RHSName = (Options.end() - 1)->Function->getName(); if (LHSName.compare(RHSName) < 0) Options.erase(Options.end() - 2); else Options.erase(Options.end() - 1); } CodeGenFunction CGF(*this); CGF.EmitMultiVersionResolver(ResolverFunc, Options); if (getTarget().supportsIFunc()) { std::string AliasName = getMangledNameImpl( *this, GD, FD, /*OmitMultiVersionMangling=*/true); llvm::Constant *AliasFunc = GetGlobalValue(AliasName); if (!AliasFunc) { auto *IFunc = cast(GetOrCreateLLVMFunction( AliasName, DeclTy, GD, /*ForVTable=*/false, /*DontDefer=*/true, /*IsThunk=*/false, llvm::AttributeList(), NotForDefinition)); auto *GA = llvm::GlobalAlias::create( DeclTy, 0, getFunctionLinkage(GD), AliasName, IFunc, &getModule()); GA->setLinkage(llvm::Function::WeakODRLinkage); SetCommonAttributes(GD, GA); } } } /// If a dispatcher for the specified mangled name is not in the module, create /// and return an llvm Function with the specified type. llvm::Constant *CodeGenModule::GetOrCreateMultiVersionResolver( GlobalDecl GD, llvm::Type *DeclTy, const FunctionDecl *FD) { std::string MangledName = getMangledNameImpl(*this, GD, FD, /*OmitMultiVersionMangling=*/true); // Holds the name of the resolver, in ifunc mode this is the ifunc (which has // a separate resolver). std::string ResolverName = MangledName; if (getTarget().supportsIFunc()) ResolverName += ".ifunc"; else if (FD->isTargetMultiVersion()) ResolverName += ".resolver"; // If this already exists, just return that one. if (llvm::GlobalValue *ResolverGV = GetGlobalValue(ResolverName)) return ResolverGV; // Since this is the first time we've created this IFunc, make sure // that we put this multiversioned function into the list to be // replaced later if necessary (target multiversioning only). if (!FD->isCPUDispatchMultiVersion() && !FD->isCPUSpecificMultiVersion()) MultiVersionFuncs.push_back(GD); if (getTarget().supportsIFunc()) { llvm::Type *ResolverType = llvm::FunctionType::get( llvm::PointerType::get( DeclTy, getContext().getTargetAddressSpace(FD->getType())), false); llvm::Constant *Resolver = GetOrCreateLLVMFunction( MangledName + ".resolver", ResolverType, GlobalDecl{}, /*ForVTable=*/false); llvm::GlobalIFunc *GIF = llvm::GlobalIFunc::create( DeclTy, 0, llvm::Function::WeakODRLinkage, "", Resolver, &getModule()); GIF->setName(ResolverName); SetCommonAttributes(FD, GIF); return GIF; } llvm::Constant *Resolver = GetOrCreateLLVMFunction( ResolverName, DeclTy, GlobalDecl{}, /*ForVTable=*/false); assert(isa(Resolver) && "Resolver should be created for the first time"); SetCommonAttributes(FD, cast(Resolver)); return Resolver; } /// GetOrCreateLLVMFunction - If the specified mangled name is not in the /// module, create and return an llvm Function with the specified type. If there /// is something in the module with the specified name, return it potentially /// bitcasted to the right type. /// /// If D is non-null, it specifies a decl that correspond to this. This is used /// to set the attributes on the function when it is first created. llvm::Constant *CodeGenModule::GetOrCreateLLVMFunction( StringRef MangledName, llvm::Type *Ty, GlobalDecl GD, bool ForVTable, bool DontDefer, bool IsThunk, llvm::AttributeList ExtraAttrs, ForDefinition_t IsForDefinition) { const Decl *D = GD.getDecl(); // Any attempts to use a MultiVersion function should result in retrieving // the iFunc instead. Name Mangling will handle the rest of the changes. if (const FunctionDecl *FD = cast_or_null(D)) { // For the device mark the function as one that should be emitted. if (getLangOpts().OpenMPIsDevice && OpenMPRuntime && !OpenMPRuntime->markAsGlobalTarget(GD) && FD->isDefined() && !DontDefer && !IsForDefinition) { if (const FunctionDecl *FDDef = FD->getDefinition()) { GlobalDecl GDDef; if (const auto *CD = dyn_cast(FDDef)) GDDef = GlobalDecl(CD, GD.getCtorType()); else if (const auto *DD = dyn_cast(FDDef)) GDDef = GlobalDecl(DD, GD.getDtorType()); else GDDef = GlobalDecl(FDDef); EmitGlobal(GDDef); } } if (FD->isMultiVersion()) { if (FD->hasAttr()) UpdateMultiVersionNames(GD, FD); if (!IsForDefinition) return GetOrCreateMultiVersionResolver(GD, Ty, FD); } } // Lookup the entry, lazily creating it if necessary. llvm::GlobalValue *Entry = GetGlobalValue(MangledName); if (Entry) { if (WeakRefReferences.erase(Entry)) { const FunctionDecl *FD = cast_or_null(D); if (FD && !FD->hasAttr()) Entry->setLinkage(llvm::Function::ExternalLinkage); } // Handle dropped DLL attributes. if (D && !D->hasAttr() && !D->hasAttr()) { Entry->setDLLStorageClass(llvm::GlobalValue::DefaultStorageClass); setDSOLocal(Entry); } // If there are two attempts to define the same mangled name, issue an // error. if (IsForDefinition && !Entry->isDeclaration()) { GlobalDecl OtherGD; // Check that GD is not yet in DiagnosedConflictingDefinitions is required // to make sure that we issue an error only once. if (lookupRepresentativeDecl(MangledName, OtherGD) && (GD.getCanonicalDecl().getDecl() != OtherGD.getCanonicalDecl().getDecl()) && DiagnosedConflictingDefinitions.insert(GD).second) { getDiags().Report(D->getLocation(), diag::err_duplicate_mangled_name) << MangledName; getDiags().Report(OtherGD.getDecl()->getLocation(), diag::note_previous_definition); } } if ((isa(Entry) || isa(Entry)) && (Entry->getValueType() == Ty)) { return Entry; } // Make sure the result is of the correct type. // (If function is requested for a definition, we always need to create a new // function, not just return a bitcast.) if (!IsForDefinition) return llvm::ConstantExpr::getBitCast(Entry, Ty->getPointerTo()); } // This function doesn't have a complete type (for example, the return // type is an incomplete struct). Use a fake type instead, and make // sure not to try to set attributes. bool IsIncompleteFunction = false; llvm::FunctionType *FTy; if (isa(Ty)) { FTy = cast(Ty); } else { FTy = llvm::FunctionType::get(VoidTy, false); IsIncompleteFunction = true; } llvm::Function *F = llvm::Function::Create(FTy, llvm::Function::ExternalLinkage, Entry ? StringRef() : MangledName, &getModule()); // If we already created a function with the same mangled name (but different // type) before, take its name and add it to the list of functions to be // replaced with F at the end of CodeGen. // // This happens if there is a prototype for a function (e.g. "int f()") and // then a definition of a different type (e.g. "int f(int x)"). if (Entry) { F->takeName(Entry); // This might be an implementation of a function without a prototype, in // which case, try to do special replacement of calls which match the new // prototype. The really key thing here is that we also potentially drop // arguments from the call site so as to make a direct call, which makes the // inliner happier and suppresses a number of optimizer warnings (!) about // dropping arguments. if (!Entry->use_empty()) { ReplaceUsesOfNonProtoTypeWithRealFunction(Entry, F); Entry->removeDeadConstantUsers(); } llvm::Constant *BC = llvm::ConstantExpr::getBitCast( F, Entry->getValueType()->getPointerTo()); addGlobalValReplacement(Entry, BC); } assert(F->getName() == MangledName && "name was uniqued!"); if (D) SetFunctionAttributes(GD, F, IsIncompleteFunction, IsThunk); if (ExtraAttrs.hasAttributes(llvm::AttributeList::FunctionIndex)) { llvm::AttrBuilder B(ExtraAttrs, llvm::AttributeList::FunctionIndex); F->addAttributes(llvm::AttributeList::FunctionIndex, B); } if (!DontDefer) { // All MSVC dtors other than the base dtor are linkonce_odr and delegate to // each other bottoming out with the base dtor. Therefore we emit non-base // dtors on usage, even if there is no dtor definition in the TU. if (D && isa(D) && getCXXABI().useThunkForDtorVariant(cast(D), GD.getDtorType())) addDeferredDeclToEmit(GD); // This is the first use or definition of a mangled name. If there is a // deferred decl with this name, remember that we need to emit it at the end // of the file. auto DDI = DeferredDecls.find(MangledName); if (DDI != DeferredDecls.end()) { // Move the potentially referenced deferred decl to the // DeferredDeclsToEmit list, and remove it from DeferredDecls (since we // don't need it anymore). addDeferredDeclToEmit(DDI->second); DeferredDecls.erase(DDI); // Otherwise, there are cases we have to worry about where we're // using a declaration for which we must emit a definition but where // we might not find a top-level definition: // - member functions defined inline in their classes // - friend functions defined inline in some class // - special member functions with implicit definitions // If we ever change our AST traversal to walk into class methods, // this will be unnecessary. // // We also don't emit a definition for a function if it's going to be an // entry in a vtable, unless it's already marked as used. } else if (getLangOpts().CPlusPlus && D) { // Look for a declaration that's lexically in a record. for (const auto *FD = cast(D)->getMostRecentDecl(); FD; FD = FD->getPreviousDecl()) { if (isa(FD->getLexicalDeclContext())) { if (FD->doesThisDeclarationHaveABody()) { addDeferredDeclToEmit(GD.getWithDecl(FD)); break; } } } } } // Make sure the result is of the requested type. if (!IsIncompleteFunction) { assert(F->getFunctionType() == Ty); return F; } llvm::Type *PTy = llvm::PointerType::getUnqual(Ty); return llvm::ConstantExpr::getBitCast(F, PTy); } /// GetAddrOfFunction - Return the address of the given function. If Ty is /// non-null, then this function will use the specified type if it has to /// create it (this occurs when we see a definition of the function). llvm::Constant *CodeGenModule::GetAddrOfFunction(GlobalDecl GD, llvm::Type *Ty, bool ForVTable, bool DontDefer, ForDefinition_t IsForDefinition) { assert(!cast(GD.getDecl())->isConsteval() && "consteval function should never be emitted"); // If there was no specific requested type, just convert it now. if (!Ty) { const auto *FD = cast(GD.getDecl()); Ty = getTypes().ConvertType(FD->getType()); } // Devirtualized destructor calls may come through here instead of via // getAddrOfCXXStructor. Make sure we use the MS ABI base destructor instead // of the complete destructor when necessary. if (const auto *DD = dyn_cast(GD.getDecl())) { if (getTarget().getCXXABI().isMicrosoft() && GD.getDtorType() == Dtor_Complete && DD->getParent()->getNumVBases() == 0) GD = GlobalDecl(DD, Dtor_Base); } StringRef MangledName = getMangledName(GD); return GetOrCreateLLVMFunction(MangledName, Ty, GD, ForVTable, DontDefer, /*IsThunk=*/false, llvm::AttributeList(), IsForDefinition); } static const FunctionDecl * GetRuntimeFunctionDecl(ASTContext &C, StringRef Name) { TranslationUnitDecl *TUDecl = C.getTranslationUnitDecl(); DeclContext *DC = TranslationUnitDecl::castToDeclContext(TUDecl); IdentifierInfo &CII = C.Idents.get(Name); for (const auto &Result : DC->lookup(&CII)) if (const auto FD = dyn_cast(Result)) return FD; if (!C.getLangOpts().CPlusPlus) return nullptr; // Demangle the premangled name from getTerminateFn() IdentifierInfo &CXXII = (Name == "_ZSt9terminatev" || Name == "?terminate@@YAXXZ") ? C.Idents.get("terminate") : C.Idents.get(Name); for (const auto &N : {"__cxxabiv1", "std"}) { IdentifierInfo &NS = C.Idents.get(N); for (const auto &Result : DC->lookup(&NS)) { NamespaceDecl *ND = dyn_cast(Result); if (auto LSD = dyn_cast(Result)) for (const auto &Result : LSD->lookup(&NS)) if ((ND = dyn_cast(Result))) break; if (ND) for (const auto &Result : ND->lookup(&CXXII)) if (const auto *FD = dyn_cast(Result)) return FD; } } return nullptr; } /// CreateRuntimeFunction - Create a new runtime function with the specified /// type and name. llvm::FunctionCallee CodeGenModule::CreateRuntimeFunction(llvm::FunctionType *FTy, StringRef Name, llvm::AttributeList ExtraAttrs, bool Local, bool AssumeConvergent) { if (AssumeConvergent) { ExtraAttrs = ExtraAttrs.addAttribute(VMContext, llvm::AttributeList::FunctionIndex, llvm::Attribute::Convergent); } llvm::Constant *C = GetOrCreateLLVMFunction(Name, FTy, GlobalDecl(), /*ForVTable=*/false, /*DontDefer=*/false, /*IsThunk=*/false, ExtraAttrs); if (auto *F = dyn_cast(C)) { if (F->empty()) { F->setCallingConv(getRuntimeCC()); // In Windows Itanium environments, try to mark runtime functions // dllimport. For Mingw and MSVC, don't. We don't really know if the user // will link their standard library statically or dynamically. Marking // functions imported when they are not imported can cause linker errors // and warnings. if (!Local && getTriple().isWindowsItaniumEnvironment() && !getCodeGenOpts().LTOVisibilityPublicStd) { const FunctionDecl *FD = GetRuntimeFunctionDecl(Context, Name); if (!FD || FD->hasAttr()) { F->setDLLStorageClass(llvm::GlobalValue::DLLImportStorageClass); F->setLinkage(llvm::GlobalValue::ExternalLinkage); } } setDSOLocal(F); } } return {FTy, C}; } /// isTypeConstant - Determine whether an object of this type can be emitted /// as a constant. /// /// If ExcludeCtor is true, the duration when the object's constructor runs /// will not be considered. The caller will need to verify that the object is /// not written to during its construction. bool CodeGenModule::isTypeConstant(QualType Ty, bool ExcludeCtor) { if (!Ty.isConstant(Context) && !Ty->isReferenceType()) return false; if (Context.getLangOpts().CPlusPlus) { if (const CXXRecordDecl *Record = Context.getBaseElementType(Ty)->getAsCXXRecordDecl()) return ExcludeCtor && !Record->hasMutableFields() && Record->hasTrivialDestructor(); } return true; } /// GetOrCreateLLVMGlobal - If the specified mangled name is not in the module, /// create and return an llvm GlobalVariable with the specified type. If there /// is something in the module with the specified name, return it potentially /// bitcasted to the right type. /// /// If D is non-null, it specifies a decl that correspond to this. This is used /// to set the attributes on the global when it is first created. /// /// If IsForDefinition is true, it is guaranteed that an actual global with /// type Ty will be returned, not conversion of a variable with the same /// mangled name but some other type. llvm::Constant * CodeGenModule::GetOrCreateLLVMGlobal(StringRef MangledName, llvm::PointerType *Ty, const VarDecl *D, ForDefinition_t IsForDefinition) { // Lookup the entry, lazily creating it if necessary. llvm::GlobalValue *Entry = GetGlobalValue(MangledName); if (Entry) { if (WeakRefReferences.erase(Entry)) { if (D && !D->hasAttr()) Entry->setLinkage(llvm::Function::ExternalLinkage); } // Handle dropped DLL attributes. if (D && !D->hasAttr() && !D->hasAttr()) Entry->setDLLStorageClass(llvm::GlobalValue::DefaultStorageClass); if (LangOpts.OpenMP && !LangOpts.OpenMPSimd && D) getOpenMPRuntime().registerTargetGlobalVariable(D, Entry); if (Entry->getType() == Ty) return Entry; // If there are two attempts to define the same mangled name, issue an // error. if (IsForDefinition && !Entry->isDeclaration()) { GlobalDecl OtherGD; const VarDecl *OtherD; // Check that D is not yet in DiagnosedConflictingDefinitions is required // to make sure that we issue an error only once. if (D && lookupRepresentativeDecl(MangledName, OtherGD) && (D->getCanonicalDecl() != OtherGD.getCanonicalDecl().getDecl()) && (OtherD = dyn_cast(OtherGD.getDecl())) && OtherD->hasInit() && DiagnosedConflictingDefinitions.insert(D).second) { getDiags().Report(D->getLocation(), diag::err_duplicate_mangled_name) << MangledName; getDiags().Report(OtherGD.getDecl()->getLocation(), diag::note_previous_definition); } } // Make sure the result is of the correct type. if (Entry->getType()->getAddressSpace() != Ty->getAddressSpace()) return llvm::ConstantExpr::getAddrSpaceCast(Entry, Ty); // (If global is requested for a definition, we always need to create a new // global, not just return a bitcast.) if (!IsForDefinition) return llvm::ConstantExpr::getBitCast(Entry, Ty); } auto AddrSpace = GetGlobalVarAddressSpace(D); auto TargetAddrSpace = getContext().getTargetAddressSpace(AddrSpace); auto *GV = new llvm::GlobalVariable( getModule(), Ty->getElementType(), false, llvm::GlobalValue::ExternalLinkage, nullptr, MangledName, nullptr, llvm::GlobalVariable::NotThreadLocal, TargetAddrSpace); // If we already created a global with the same mangled name (but different // type) before, take its name and remove it from its parent. if (Entry) { GV->takeName(Entry); if (!Entry->use_empty()) { llvm::Constant *NewPtrForOldDecl = llvm::ConstantExpr::getBitCast(GV, Entry->getType()); Entry->replaceAllUsesWith(NewPtrForOldDecl); } Entry->eraseFromParent(); } // This is the first use or definition of a mangled name. If there is a // deferred decl with this name, remember that we need to emit it at the end // of the file. auto DDI = DeferredDecls.find(MangledName); if (DDI != DeferredDecls.end()) { // Move the potentially referenced deferred decl to the DeferredDeclsToEmit // list, and remove it from DeferredDecls (since we don't need it anymore). addDeferredDeclToEmit(DDI->second); DeferredDecls.erase(DDI); } // Handle things which are present even on external declarations. if (D) { if (LangOpts.OpenMP && !LangOpts.OpenMPSimd) getOpenMPRuntime().registerTargetGlobalVariable(D, GV); // FIXME: This code is overly simple and should be merged with other global // handling. GV->setConstant(isTypeConstant(D->getType(), false)); GV->setAlignment(getContext().getDeclAlign(D).getAsAlign()); setLinkageForGV(GV, D); if (D->getTLSKind()) { if (D->getTLSKind() == VarDecl::TLS_Dynamic) CXXThreadLocals.push_back(D); setTLSMode(GV, *D); } setGVProperties(GV, D); // If required by the ABI, treat declarations of static data members with // inline initializers as definitions. if (getContext().isMSStaticDataMemberInlineDefinition(D)) { EmitGlobalVarDefinition(D); } // Emit section information for extern variables. if (D->hasExternalStorage()) { if (const SectionAttr *SA = D->getAttr()) GV->setSection(SA->getName()); } // Handle XCore specific ABI requirements. if (getTriple().getArch() == llvm::Triple::xcore && D->getLanguageLinkage() == CLanguageLinkage && D->getType().isConstant(Context) && isExternallyVisible(D->getLinkageAndVisibility().getLinkage())) GV->setSection(".cp.rodata"); // Check if we a have a const declaration with an initializer, we may be // able to emit it as available_externally to expose it's value to the // optimizer. if (Context.getLangOpts().CPlusPlus && GV->hasExternalLinkage() && D->getType().isConstQualified() && !GV->hasInitializer() && !D->hasDefinition() && D->hasInit() && !D->hasAttr()) { const auto *Record = Context.getBaseElementType(D->getType())->getAsCXXRecordDecl(); bool HasMutableFields = Record && Record->hasMutableFields(); if (!HasMutableFields) { const VarDecl *InitDecl; const Expr *InitExpr = D->getAnyInitializer(InitDecl); if (InitExpr) { ConstantEmitter emitter(*this); llvm::Constant *Init = emitter.tryEmitForInitializer(*InitDecl); if (Init) { auto *InitType = Init->getType(); if (GV->getValueType() != InitType) { // The type of the initializer does not match the definition. // This happens when an initializer has a different type from // the type of the global (because of padding at the end of a // structure for instance). GV->setName(StringRef()); // Make a new global with the correct type, this is now guaranteed // to work. auto *NewGV = cast( GetAddrOfGlobalVar(D, InitType, IsForDefinition) ->stripPointerCasts()); // Erase the old global, since it is no longer used. GV->eraseFromParent(); GV = NewGV; } else { GV->setInitializer(Init); GV->setConstant(true); GV->setLinkage(llvm::GlobalValue::AvailableExternallyLinkage); } emitter.finalize(GV); } } } } } if (GV->isDeclaration()) getTargetCodeGenInfo().setTargetAttributes(D, GV, *this); LangAS ExpectedAS = D ? D->getType().getAddressSpace() : (LangOpts.OpenCL ? LangAS::opencl_global : LangAS::Default); assert(getContext().getTargetAddressSpace(ExpectedAS) == Ty->getPointerAddressSpace()); if (AddrSpace != ExpectedAS) return getTargetCodeGenInfo().performAddrSpaceCast(*this, GV, AddrSpace, ExpectedAS, Ty); return GV; } llvm::Constant * CodeGenModule::GetAddrOfGlobal(GlobalDecl GD, ForDefinition_t IsForDefinition) { const Decl *D = GD.getDecl(); if (isa(D) || isa(D)) return getAddrOfCXXStructor(GD, /*FnInfo=*/nullptr, /*FnType=*/nullptr, /*DontDefer=*/false, IsForDefinition); if (isa(D)) { auto FInfo = &getTypes().arrangeCXXMethodDeclaration(cast(D)); auto Ty = getTypes().GetFunctionType(*FInfo); return GetAddrOfFunction(GD, Ty, /*ForVTable=*/false, /*DontDefer=*/false, IsForDefinition); } if (isa(D)) { const CGFunctionInfo &FI = getTypes().arrangeGlobalDeclaration(GD); llvm::FunctionType *Ty = getTypes().GetFunctionType(FI); return GetAddrOfFunction(GD, Ty, /*ForVTable=*/false, /*DontDefer=*/false, IsForDefinition); } return GetAddrOfGlobalVar(cast(D), /*Ty=*/nullptr, IsForDefinition); } llvm::GlobalVariable *CodeGenModule::CreateOrReplaceCXXRuntimeVariable( StringRef Name, llvm::Type *Ty, llvm::GlobalValue::LinkageTypes Linkage, unsigned Alignment) { llvm::GlobalVariable *GV = getModule().getNamedGlobal(Name); llvm::GlobalVariable *OldGV = nullptr; if (GV) { // Check if the variable has the right type. if (GV->getValueType() == Ty) return GV; // Because C++ name mangling, the only way we can end up with an already // existing global with the same name is if it has been declared extern "C". assert(GV->isDeclaration() && "Declaration has wrong type!"); OldGV = GV; } // Create a new variable. GV = new llvm::GlobalVariable(getModule(), Ty, /*isConstant=*/true, Linkage, nullptr, Name); if (OldGV) { // Replace occurrences of the old variable if needed. GV->takeName(OldGV); if (!OldGV->use_empty()) { llvm::Constant *NewPtrForOldDecl = llvm::ConstantExpr::getBitCast(GV, OldGV->getType()); OldGV->replaceAllUsesWith(NewPtrForOldDecl); } OldGV->eraseFromParent(); } if (supportsCOMDAT() && GV->isWeakForLinker() && !GV->hasAvailableExternallyLinkage()) GV->setComdat(TheModule.getOrInsertComdat(GV->getName())); GV->setAlignment(llvm::MaybeAlign(Alignment)); return GV; } /// GetAddrOfGlobalVar - Return the llvm::Constant for the address of the /// given global variable. If Ty is non-null and if the global doesn't exist, /// then it will be created with the specified type instead of whatever the /// normal requested type would be. If IsForDefinition is true, it is guaranteed /// that an actual global with type Ty will be returned, not conversion of a /// variable with the same mangled name but some other type. llvm::Constant *CodeGenModule::GetAddrOfGlobalVar(const VarDecl *D, llvm::Type *Ty, ForDefinition_t IsForDefinition) { assert(D->hasGlobalStorage() && "Not a global variable"); QualType ASTTy = D->getType(); if (!Ty) Ty = getTypes().ConvertTypeForMem(ASTTy); llvm::PointerType *PTy = llvm::PointerType::get(Ty, getContext().getTargetAddressSpace(ASTTy)); StringRef MangledName = getMangledName(D); return GetOrCreateLLVMGlobal(MangledName, PTy, D, IsForDefinition); } /// CreateRuntimeVariable - Create a new runtime global variable with the /// specified type and name. llvm::Constant * CodeGenModule::CreateRuntimeVariable(llvm::Type *Ty, StringRef Name) { auto PtrTy = getContext().getLangOpts().OpenCL ? llvm::PointerType::get( Ty, getContext().getTargetAddressSpace(LangAS::opencl_global)) : llvm::PointerType::getUnqual(Ty); auto *Ret = GetOrCreateLLVMGlobal(Name, PtrTy, nullptr); setDSOLocal(cast(Ret->stripPointerCasts())); return Ret; } void CodeGenModule::EmitTentativeDefinition(const VarDecl *D) { assert(!D->getInit() && "Cannot emit definite definitions here!"); StringRef MangledName = getMangledName(D); llvm::GlobalValue *GV = GetGlobalValue(MangledName); // We already have a definition, not declaration, with the same mangled name. // Emitting of declaration is not required (and actually overwrites emitted // definition). if (GV && !GV->isDeclaration()) return; // If we have not seen a reference to this variable yet, place it into the // deferred declarations table to be emitted if needed later. if (!MustBeEmitted(D) && !GV) { DeferredDecls[MangledName] = D; return; } // The tentative definition is the only definition. EmitGlobalVarDefinition(D); } void CodeGenModule::EmitExternalDeclaration(const VarDecl *D) { EmitExternalVarDeclaration(D); } CharUnits CodeGenModule::GetTargetTypeStoreSize(llvm::Type *Ty) const { return Context.toCharUnitsFromBits( getDataLayout().getTypeStoreSizeInBits(Ty)); } LangAS CodeGenModule::GetGlobalVarAddressSpace(const VarDecl *D) { LangAS AddrSpace = LangAS::Default; if (LangOpts.OpenCL) { AddrSpace = D ? D->getType().getAddressSpace() : LangAS::opencl_global; assert(AddrSpace == LangAS::opencl_global || AddrSpace == LangAS::opencl_global_device || AddrSpace == LangAS::opencl_global_host || AddrSpace == LangAS::opencl_constant || AddrSpace == LangAS::opencl_local || AddrSpace >= LangAS::FirstTargetAddressSpace); return AddrSpace; } if (LangOpts.CUDA && LangOpts.CUDAIsDevice) { if (D && D->hasAttr()) return LangAS::cuda_constant; else if (D && D->hasAttr()) return LangAS::cuda_shared; else if (D && D->hasAttr()) return LangAS::cuda_device; else if (D && D->getType().isConstQualified()) return LangAS::cuda_constant; else return LangAS::cuda_device; } if (LangOpts.OpenMP) { LangAS AS; if (OpenMPRuntime->hasAllocateAttributeForGlobalVar(D, AS)) return AS; } return getTargetCodeGenInfo().getGlobalVarAddressSpace(*this, D); } LangAS CodeGenModule::getStringLiteralAddressSpace() const { // OpenCL v1.2 s6.5.3: a string literal is in the constant address space. if (LangOpts.OpenCL) return LangAS::opencl_constant; if (auto AS = getTarget().getConstantAddressSpace()) return AS.getValue(); return LangAS::Default; } // In address space agnostic languages, string literals are in default address // space in AST. However, certain targets (e.g. amdgcn) request them to be // emitted in constant address space in LLVM IR. To be consistent with other // parts of AST, string literal global variables in constant address space // need to be casted to default address space before being put into address // map and referenced by other part of CodeGen. // In OpenCL, string literals are in constant address space in AST, therefore // they should not be casted to default address space. static llvm::Constant * castStringLiteralToDefaultAddressSpace(CodeGenModule &CGM, llvm::GlobalVariable *GV) { llvm::Constant *Cast = GV; if (!CGM.getLangOpts().OpenCL) { if (auto AS = CGM.getTarget().getConstantAddressSpace()) { if (AS != LangAS::Default) Cast = CGM.getTargetCodeGenInfo().performAddrSpaceCast( CGM, GV, AS.getValue(), LangAS::Default, GV->getValueType()->getPointerTo( CGM.getContext().getTargetAddressSpace(LangAS::Default))); } } return Cast; } template void CodeGenModule::MaybeHandleStaticInExternC(const SomeDecl *D, llvm::GlobalValue *GV) { if (!getLangOpts().CPlusPlus) return; // Must have 'used' attribute, or else inline assembly can't rely on // the name existing. if (!D->template hasAttr()) return; // Must have internal linkage and an ordinary name. if (!D->getIdentifier() || D->getFormalLinkage() != InternalLinkage) return; // Must be in an extern "C" context. Entities declared directly within // a record are not extern "C" even if the record is in such a context. const SomeDecl *First = D->getFirstDecl(); if (First->getDeclContext()->isRecord() || !First->isInExternCContext()) return; // OK, this is an internal linkage entity inside an extern "C" linkage // specification. Make a note of that so we can give it the "expected" // mangled name if nothing else is using that name. std::pair R = StaticExternCValues.insert(std::make_pair(D->getIdentifier(), GV)); // If we have multiple internal linkage entities with the same name // in extern "C" regions, none of them gets that name. if (!R.second) R.first->second = nullptr; } static bool shouldBeInCOMDAT(CodeGenModule &CGM, const Decl &D) { if (!CGM.supportsCOMDAT()) return false; // Do not set COMDAT attribute for CUDA/HIP stub functions to prevent // them being "merged" by the COMDAT Folding linker optimization. if (D.hasAttr()) return false; if (D.hasAttr()) return true; GVALinkage Linkage; if (auto *VD = dyn_cast(&D)) Linkage = CGM.getContext().GetGVALinkageForVariable(VD); else Linkage = CGM.getContext().GetGVALinkageForFunction(cast(&D)); switch (Linkage) { case GVA_Internal: case GVA_AvailableExternally: case GVA_StrongExternal: return false; case GVA_DiscardableODR: case GVA_StrongODR: return true; } llvm_unreachable("No such linkage"); } void CodeGenModule::maybeSetTrivialComdat(const Decl &D, llvm::GlobalObject &GO) { if (!shouldBeInCOMDAT(*this, D)) return; GO.setComdat(TheModule.getOrInsertComdat(GO.getName())); } /// Pass IsTentative as true if you want to create a tentative definition. void CodeGenModule::EmitGlobalVarDefinition(const VarDecl *D, bool IsTentative) { // OpenCL global variables of sampler type are translated to function calls, // therefore no need to be translated. QualType ASTTy = D->getType(); if (getLangOpts().OpenCL && ASTTy->isSamplerT()) return; // If this is OpenMP device, check if it is legal to emit this global // normally. if (LangOpts.OpenMPIsDevice && OpenMPRuntime && OpenMPRuntime->emitTargetGlobalVariable(D)) return; llvm::Constant *Init = nullptr; bool NeedsGlobalCtor = false; bool NeedsGlobalDtor = D->needsDestruction(getContext()) == QualType::DK_cxx_destructor; const VarDecl *InitDecl; const Expr *InitExpr = D->getAnyInitializer(InitDecl); Optional emitter; // CUDA E.2.4.1 "__shared__ variables cannot have an initialization // as part of their declaration." Sema has already checked for // error cases, so we just need to set Init to UndefValue. bool IsCUDASharedVar = getLangOpts().CUDAIsDevice && D->hasAttr(); // Shadows of initialized device-side global variables are also left // undefined. bool IsCUDAShadowVar = !getLangOpts().CUDAIsDevice && (D->hasAttr() || D->hasAttr() || D->hasAttr()); bool IsCUDADeviceShadowVar = getLangOpts().CUDAIsDevice && (D->getType()->isCUDADeviceBuiltinSurfaceType() || D->getType()->isCUDADeviceBuiltinTextureType()); // HIP pinned shadow of initialized host-side global variables are also // left undefined. if (getLangOpts().CUDA && (IsCUDASharedVar || IsCUDAShadowVar || IsCUDADeviceShadowVar)) Init = llvm::UndefValue::get(getTypes().ConvertType(ASTTy)); else if (D->hasAttr()) Init = llvm::UndefValue::get(getTypes().ConvertType(ASTTy)); else if (!InitExpr) { // This is a tentative definition; tentative definitions are // implicitly initialized with { 0 }. // // Note that tentative definitions are only emitted at the end of // a translation unit, so they should never have incomplete // type. In addition, EmitTentativeDefinition makes sure that we // never attempt to emit a tentative definition if a real one // exists. A use may still exists, however, so we still may need // to do a RAUW. assert(!ASTTy->isIncompleteType() && "Unexpected incomplete type"); Init = EmitNullConstant(D->getType()); } else { initializedGlobalDecl = GlobalDecl(D); emitter.emplace(*this); Init = emitter->tryEmitForInitializer(*InitDecl); if (!Init) { QualType T = InitExpr->getType(); if (D->getType()->isReferenceType()) T = D->getType(); if (getLangOpts().CPlusPlus) { Init = EmitNullConstant(T); NeedsGlobalCtor = true; } else { ErrorUnsupported(D, "static initializer"); Init = llvm::UndefValue::get(getTypes().ConvertType(T)); } } else { // We don't need an initializer, so remove the entry for the delayed // initializer position (just in case this entry was delayed) if we // also don't need to register a destructor. if (getLangOpts().CPlusPlus && !NeedsGlobalDtor) DelayedCXXInitPosition.erase(D); } } llvm::Type* InitType = Init->getType(); llvm::Constant *Entry = GetAddrOfGlobalVar(D, InitType, ForDefinition_t(!IsTentative)); // Strip off pointer casts if we got them. Entry = Entry->stripPointerCasts(); // Entry is now either a Function or GlobalVariable. auto *GV = dyn_cast(Entry); // We have a definition after a declaration with the wrong type. // We must make a new GlobalVariable* and update everything that used OldGV // (a declaration or tentative definition) with the new GlobalVariable* // (which will be a definition). // // This happens if there is a prototype for a global (e.g. // "extern int x[];") and then a definition of a different type (e.g. // "int x[10];"). This also happens when an initializer has a different type // from the type of the global (this happens with unions). if (!GV || GV->getValueType() != InitType || GV->getType()->getAddressSpace() != getContext().getTargetAddressSpace(GetGlobalVarAddressSpace(D))) { // Move the old entry aside so that we'll create a new one. Entry->setName(StringRef()); // Make a new global with the correct type, this is now guaranteed to work. GV = cast( GetAddrOfGlobalVar(D, InitType, ForDefinition_t(!IsTentative)) ->stripPointerCasts()); // Replace all uses of the old global with the new global llvm::Constant *NewPtrForOldDecl = llvm::ConstantExpr::getBitCast(GV, Entry->getType()); Entry->replaceAllUsesWith(NewPtrForOldDecl); // Erase the old global, since it is no longer used. cast(Entry)->eraseFromParent(); } MaybeHandleStaticInExternC(D, GV); if (D->hasAttr()) AddGlobalAnnotations(D, GV); // Set the llvm linkage type as appropriate. llvm::GlobalValue::LinkageTypes Linkage = getLLVMLinkageVarDefinition(D, GV->isConstant()); // CUDA B.2.1 "The __device__ qualifier declares a variable that resides on // the device. [...]" // CUDA B.2.2 "The __constant__ qualifier, optionally used together with // __device__, declares a variable that: [...] // Is accessible from all the threads within the grid and from the host // through the runtime library (cudaGetSymbolAddress() / cudaGetSymbolSize() // / cudaMemcpyToSymbol() / cudaMemcpyFromSymbol())." if (GV && LangOpts.CUDA) { if (LangOpts.CUDAIsDevice) { if (Linkage != llvm::GlobalValue::InternalLinkage && (D->hasAttr() || D->hasAttr())) GV->setExternallyInitialized(true); } else { // Host-side shadows of external declarations of device-side // global variables become internal definitions. These have to // be internal in order to prevent name conflicts with global // host variables with the same name in a different TUs. if (D->hasAttr() || D->hasAttr()) { Linkage = llvm::GlobalValue::InternalLinkage; // Shadow variables and their properties must be registered with CUDA // runtime. Skip Extern global variables, which will be registered in // the TU where they are defined. if (!D->hasExternalStorage()) getCUDARuntime().registerDeviceVar(D, *GV, !D->hasDefinition(), D->hasAttr()); } else if (D->hasAttr()) { // __shared__ variables are odd. Shadows do get created, but // they are not registered with the CUDA runtime, so they // can't really be used to access their device-side // counterparts. It's not clear yet whether it's nvcc's bug or // a feature, but we've got to do the same for compatibility. Linkage = llvm::GlobalValue::InternalLinkage; } else if (D->getType()->isCUDADeviceBuiltinSurfaceType() || D->getType()->isCUDADeviceBuiltinTextureType()) { // Builtin surfaces and textures and their template arguments are // also registered with CUDA runtime. Linkage = llvm::GlobalValue::InternalLinkage; const ClassTemplateSpecializationDecl *TD = cast( D->getType()->getAs()->getDecl()); const TemplateArgumentList &Args = TD->getTemplateArgs(); if (TD->hasAttr()) { assert(Args.size() == 2 && "Unexpected number of template arguments of CUDA device " "builtin surface type."); auto SurfType = Args[1].getAsIntegral(); if (!D->hasExternalStorage()) getCUDARuntime().registerDeviceSurf(D, *GV, !D->hasDefinition(), SurfType.getSExtValue()); } else { assert(Args.size() == 3 && "Unexpected number of template arguments of CUDA device " "builtin texture type."); auto TexType = Args[1].getAsIntegral(); auto Normalized = Args[2].getAsIntegral(); if (!D->hasExternalStorage()) getCUDARuntime().registerDeviceTex(D, *GV, !D->hasDefinition(), TexType.getSExtValue(), Normalized.getZExtValue()); } } } } GV->setInitializer(Init); if (emitter) emitter->finalize(GV); // If it is safe to mark the global 'constant', do so now. GV->setConstant(!NeedsGlobalCtor && !NeedsGlobalDtor && isTypeConstant(D->getType(), true)); // If it is in a read-only section, mark it 'constant'. if (const SectionAttr *SA = D->getAttr()) { const ASTContext::SectionInfo &SI = Context.SectionInfos[SA->getName()]; if ((SI.SectionFlags & ASTContext::PSF_Write) == 0) GV->setConstant(true); } GV->setAlignment(getContext().getDeclAlign(D).getAsAlign()); // On Darwin, unlike other Itanium C++ ABI platforms, the thread-wrapper // function is only defined alongside the variable, not also alongside // callers. Normally, all accesses to a thread_local go through the // thread-wrapper in order to ensure initialization has occurred, underlying // variable will never be used other than the thread-wrapper, so it can be // converted to internal linkage. // // However, if the variable has the 'constinit' attribute, it _can_ be // referenced directly, without calling the thread-wrapper, so the linkage // must not be changed. // // Additionally, if the variable isn't plain external linkage, e.g. if it's // weak or linkonce, the de-duplication semantics are important to preserve, // so we don't change the linkage. if (D->getTLSKind() == VarDecl::TLS_Dynamic && Linkage == llvm::GlobalValue::ExternalLinkage && Context.getTargetInfo().getTriple().isOSDarwin() && !D->hasAttr()) Linkage = llvm::GlobalValue::InternalLinkage; GV->setLinkage(Linkage); if (D->hasAttr()) GV->setDLLStorageClass(llvm::GlobalVariable::DLLImportStorageClass); else if (D->hasAttr()) GV->setDLLStorageClass(llvm::GlobalVariable::DLLExportStorageClass); else GV->setDLLStorageClass(llvm::GlobalVariable::DefaultStorageClass); if (Linkage == llvm::GlobalVariable::CommonLinkage) { // common vars aren't constant even if declared const. GV->setConstant(false); // Tentative definition of global variables may be initialized with // non-zero null pointers. In this case they should have weak linkage // since common linkage must have zero initializer and must not have // explicit section therefore cannot have non-zero initial value. if (!GV->getInitializer()->isNullValue()) GV->setLinkage(llvm::GlobalVariable::WeakAnyLinkage); } setNonAliasAttributes(D, GV); if (D->getTLSKind() && !GV->isThreadLocal()) { if (D->getTLSKind() == VarDecl::TLS_Dynamic) CXXThreadLocals.push_back(D); setTLSMode(GV, *D); } maybeSetTrivialComdat(*D, *GV); // Emit the initializer function if necessary. if (NeedsGlobalCtor || NeedsGlobalDtor) EmitCXXGlobalVarDeclInitFunc(D, GV, NeedsGlobalCtor); SanitizerMD->reportGlobalToASan(GV, *D, NeedsGlobalCtor); // Emit global variable debug information. if (CGDebugInfo *DI = getModuleDebugInfo()) if (getCodeGenOpts().hasReducedDebugInfo()) DI->EmitGlobalVariable(GV, D); } void CodeGenModule::EmitExternalVarDeclaration(const VarDecl *D) { if (CGDebugInfo *DI = getModuleDebugInfo()) if (getCodeGenOpts().hasReducedDebugInfo()) { QualType ASTTy = D->getType(); llvm::Type *Ty = getTypes().ConvertTypeForMem(D->getType()); llvm::PointerType *PTy = llvm::PointerType::get(Ty, getContext().getTargetAddressSpace(ASTTy)); llvm::Constant *GV = GetOrCreateLLVMGlobal(D->getName(), PTy, D); DI->EmitExternalVariable( cast(GV->stripPointerCasts()), D); } } static bool isVarDeclStrongDefinition(const ASTContext &Context, CodeGenModule &CGM, const VarDecl *D, bool NoCommon) { // Don't give variables common linkage if -fno-common was specified unless it // was overridden by a NoCommon attribute. if ((NoCommon || D->hasAttr()) && !D->hasAttr()) return true; // C11 6.9.2/2: // A declaration of an identifier for an object that has file scope without // an initializer, and without a storage-class specifier or with the // storage-class specifier static, constitutes a tentative definition. if (D->getInit() || D->hasExternalStorage()) return true; // A variable cannot be both common and exist in a section. if (D->hasAttr()) return true; // A variable cannot be both common and exist in a section. // We don't try to determine which is the right section in the front-end. // If no specialized section name is applicable, it will resort to default. if (D->hasAttr() || D->hasAttr() || D->hasAttr() || D->hasAttr()) return true; // Thread local vars aren't considered common linkage. if (D->getTLSKind()) return true; // Tentative definitions marked with WeakImportAttr are true definitions. if (D->hasAttr()) return true; // A variable cannot be both common and exist in a comdat. if (shouldBeInCOMDAT(CGM, *D)) return true; // Declarations with a required alignment do not have common linkage in MSVC // mode. if (Context.getTargetInfo().getCXXABI().isMicrosoft()) { if (D->hasAttr()) return true; QualType VarType = D->getType(); if (Context.isAlignmentRequired(VarType)) return true; if (const auto *RT = VarType->getAs()) { const RecordDecl *RD = RT->getDecl(); for (const FieldDecl *FD : RD->fields()) { if (FD->isBitField()) continue; if (FD->hasAttr()) return true; if (Context.isAlignmentRequired(FD->getType())) return true; } } } // Microsoft's link.exe doesn't support alignments greater than 32 bytes for // common symbols, so symbols with greater alignment requirements cannot be // common. // Other COFF linkers (ld.bfd and LLD) support arbitrary power-of-two // alignments for common symbols via the aligncomm directive, so this // restriction only applies to MSVC environments. if (Context.getTargetInfo().getTriple().isKnownWindowsMSVCEnvironment() && Context.getTypeAlignIfKnown(D->getType()) > Context.toBits(CharUnits::fromQuantity(32))) return true; return false; } llvm::GlobalValue::LinkageTypes CodeGenModule::getLLVMLinkageForDeclarator( const DeclaratorDecl *D, GVALinkage Linkage, bool IsConstantVariable) { if (Linkage == GVA_Internal) return llvm::Function::InternalLinkage; if (D->hasAttr()) { if (IsConstantVariable) return llvm::GlobalVariable::WeakODRLinkage; else return llvm::GlobalVariable::WeakAnyLinkage; } if (const auto *FD = D->getAsFunction()) if (FD->isMultiVersion() && Linkage == GVA_AvailableExternally) return llvm::GlobalVariable::LinkOnceAnyLinkage; // We are guaranteed to have a strong definition somewhere else, // so we can use available_externally linkage. if (Linkage == GVA_AvailableExternally) return llvm::GlobalValue::AvailableExternallyLinkage; // Note that Apple's kernel linker doesn't support symbol // coalescing, so we need to avoid linkonce and weak linkages there. // Normally, this means we just map to internal, but for explicit // instantiations we'll map to external. // In C++, the compiler has to emit a definition in every translation unit // that references the function. We should use linkonce_odr because // a) if all references in this translation unit are optimized away, we // don't need to codegen it. b) if the function persists, it needs to be // merged with other definitions. c) C++ has the ODR, so we know the // definition is dependable. if (Linkage == GVA_DiscardableODR) return !Context.getLangOpts().AppleKext ? llvm::Function::LinkOnceODRLinkage : llvm::Function::InternalLinkage; // An explicit instantiation of a template has weak linkage, since // explicit instantiations can occur in multiple translation units // and must all be equivalent. However, we are not allowed to // throw away these explicit instantiations. // // We don't currently support CUDA device code spread out across multiple TUs, // so say that CUDA templates are either external (for kernels) or internal. // This lets llvm perform aggressive inter-procedural optimizations. if (Linkage == GVA_StrongODR) { if (Context.getLangOpts().AppleKext) return llvm::Function::ExternalLinkage; if (Context.getLangOpts().CUDA && Context.getLangOpts().CUDAIsDevice) return D->hasAttr() ? llvm::Function::ExternalLinkage : llvm::Function::InternalLinkage; return llvm::Function::WeakODRLinkage; } // C++ doesn't have tentative definitions and thus cannot have common // linkage. if (!getLangOpts().CPlusPlus && isa(D) && !isVarDeclStrongDefinition(Context, *this, cast(D), CodeGenOpts.NoCommon)) return llvm::GlobalVariable::CommonLinkage; // selectany symbols are externally visible, so use weak instead of // linkonce. MSVC optimizes away references to const selectany globals, so // all definitions should be the same and ODR linkage should be used. // http://msdn.microsoft.com/en-us/library/5tkz6s71.aspx if (D->hasAttr()) return llvm::GlobalVariable::WeakODRLinkage; // Otherwise, we have strong external linkage. assert(Linkage == GVA_StrongExternal); return llvm::GlobalVariable::ExternalLinkage; } llvm::GlobalValue::LinkageTypes CodeGenModule::getLLVMLinkageVarDefinition( const VarDecl *VD, bool IsConstant) { GVALinkage Linkage = getContext().GetGVALinkageForVariable(VD); return getLLVMLinkageForDeclarator(VD, Linkage, IsConstant); } /// Replace the uses of a function that was declared with a non-proto type. /// We want to silently drop extra arguments from call sites static void replaceUsesOfNonProtoConstant(llvm::Constant *old, llvm::Function *newFn) { // Fast path. if (old->use_empty()) return; llvm::Type *newRetTy = newFn->getReturnType(); SmallVector newArgs; SmallVector newBundles; for (llvm::Value::use_iterator ui = old->use_begin(), ue = old->use_end(); ui != ue; ) { llvm::Value::use_iterator use = ui++; // Increment before the use is erased. llvm::User *user = use->getUser(); // Recognize and replace uses of bitcasts. Most calls to // unprototyped functions will use bitcasts. if (auto *bitcast = dyn_cast(user)) { if (bitcast->getOpcode() == llvm::Instruction::BitCast) replaceUsesOfNonProtoConstant(bitcast, newFn); continue; } // Recognize calls to the function. llvm::CallBase *callSite = dyn_cast(user); if (!callSite) continue; if (!callSite->isCallee(&*use)) continue; // If the return types don't match exactly, then we can't // transform this call unless it's dead. if (callSite->getType() != newRetTy && !callSite->use_empty()) continue; // Get the call site's attribute list. SmallVector newArgAttrs; llvm::AttributeList oldAttrs = callSite->getAttributes(); // If the function was passed too few arguments, don't transform. unsigned newNumArgs = newFn->arg_size(); if (callSite->arg_size() < newNumArgs) continue; // If extra arguments were passed, we silently drop them. // If any of the types mismatch, we don't transform. unsigned argNo = 0; bool dontTransform = false; for (llvm::Argument &A : newFn->args()) { if (callSite->getArgOperand(argNo)->getType() != A.getType()) { dontTransform = true; break; } // Add any parameter attributes. newArgAttrs.push_back(oldAttrs.getParamAttributes(argNo)); argNo++; } if (dontTransform) continue; // Okay, we can transform this. Create the new call instruction and copy // over the required information. newArgs.append(callSite->arg_begin(), callSite->arg_begin() + argNo); // Copy over any operand bundles. callSite->getOperandBundlesAsDefs(newBundles); llvm::CallBase *newCall; if (dyn_cast(callSite)) { newCall = llvm::CallInst::Create(newFn, newArgs, newBundles, "", callSite); } else { auto *oldInvoke = cast(callSite); newCall = llvm::InvokeInst::Create(newFn, oldInvoke->getNormalDest(), oldInvoke->getUnwindDest(), newArgs, newBundles, "", callSite); } newArgs.clear(); // for the next iteration if (!newCall->getType()->isVoidTy()) newCall->takeName(callSite); newCall->setAttributes(llvm::AttributeList::get( newFn->getContext(), oldAttrs.getFnAttributes(), oldAttrs.getRetAttributes(), newArgAttrs)); newCall->setCallingConv(callSite->getCallingConv()); // Finally, remove the old call, replacing any uses with the new one. if (!callSite->use_empty()) callSite->replaceAllUsesWith(newCall); // Copy debug location attached to CI. if (callSite->getDebugLoc()) newCall->setDebugLoc(callSite->getDebugLoc()); callSite->eraseFromParent(); } } /// ReplaceUsesOfNonProtoTypeWithRealFunction - This function is called when we /// implement a function with no prototype, e.g. "int foo() {}". If there are /// existing call uses of the old function in the module, this adjusts them to /// call the new function directly. /// /// This is not just a cleanup: the always_inline pass requires direct calls to /// functions to be able to inline them. If there is a bitcast in the way, it /// won't inline them. Instcombine normally deletes these calls, but it isn't /// run at -O0. static void ReplaceUsesOfNonProtoTypeWithRealFunction(llvm::GlobalValue *Old, llvm::Function *NewFn) { // If we're redefining a global as a function, don't transform it. if (!isa(Old)) return; replaceUsesOfNonProtoConstant(Old, NewFn); } void CodeGenModule::HandleCXXStaticMemberVarInstantiation(VarDecl *VD) { auto DK = VD->isThisDeclarationADefinition(); if (DK == VarDecl::Definition && VD->hasAttr()) return; TemplateSpecializationKind TSK = VD->getTemplateSpecializationKind(); // If we have a definition, this might be a deferred decl. If the // instantiation is explicit, make sure we emit it at the end. if (VD->getDefinition() && TSK == TSK_ExplicitInstantiationDefinition) GetAddrOfGlobalVar(VD); EmitTopLevelDecl(VD); } void CodeGenModule::EmitGlobalFunctionDefinition(GlobalDecl GD, llvm::GlobalValue *GV) { const auto *D = cast(GD.getDecl()); // Compute the function info and LLVM type. const CGFunctionInfo &FI = getTypes().arrangeGlobalDeclaration(GD); llvm::FunctionType *Ty = getTypes().GetFunctionType(FI); // Get or create the prototype for the function. if (!GV || (GV->getValueType() != Ty)) GV = cast(GetAddrOfFunction(GD, Ty, /*ForVTable=*/false, /*DontDefer=*/true, ForDefinition)); // Already emitted. if (!GV->isDeclaration()) return; // We need to set linkage and visibility on the function before // generating code for it because various parts of IR generation // want to propagate this information down (e.g. to local static // declarations). auto *Fn = cast(GV); setFunctionLinkage(GD, Fn); // FIXME: this is redundant with part of setFunctionDefinitionAttributes setGVProperties(Fn, GD); MaybeHandleStaticInExternC(D, Fn); maybeSetTrivialComdat(*D, *Fn); CodeGenFunction(*this).GenerateCode(GD, Fn, FI); setNonAliasAttributes(GD, Fn); SetLLVMFunctionAttributesForDefinition(D, Fn); if (const ConstructorAttr *CA = D->getAttr()) AddGlobalCtor(Fn, CA->getPriority()); if (const DestructorAttr *DA = D->getAttr()) AddGlobalDtor(Fn, DA->getPriority()); if (D->hasAttr()) AddGlobalAnnotations(D, Fn); } void CodeGenModule::EmitAliasDefinition(GlobalDecl GD) { const auto *D = cast(GD.getDecl()); const AliasAttr *AA = D->getAttr(); assert(AA && "Not an alias?"); StringRef MangledName = getMangledName(GD); if (AA->getAliasee() == MangledName) { Diags.Report(AA->getLocation(), diag::err_cyclic_alias) << 0; return; } // If there is a definition in the module, then it wins over the alias. // This is dubious, but allow it to be safe. Just ignore the alias. llvm::GlobalValue *Entry = GetGlobalValue(MangledName); if (Entry && !Entry->isDeclaration()) return; Aliases.push_back(GD); llvm::Type *DeclTy = getTypes().ConvertTypeForMem(D->getType()); // Create a reference to the named value. This ensures that it is emitted // if a deferred decl. llvm::Constant *Aliasee; llvm::GlobalValue::LinkageTypes LT; if (isa(DeclTy)) { Aliasee = GetOrCreateLLVMFunction(AA->getAliasee(), DeclTy, GD, /*ForVTable=*/false); LT = getFunctionLinkage(GD); } else { Aliasee = GetOrCreateLLVMGlobal(AA->getAliasee(), llvm::PointerType::getUnqual(DeclTy), /*D=*/nullptr); LT = getLLVMLinkageVarDefinition(cast(GD.getDecl()), D->getType().isConstQualified()); } // Create the new alias itself, but don't set a name yet. unsigned AS = Aliasee->getType()->getPointerAddressSpace(); auto *GA = llvm::GlobalAlias::create(DeclTy, AS, LT, "", Aliasee, &getModule()); if (Entry) { if (GA->getAliasee() == Entry) { Diags.Report(AA->getLocation(), diag::err_cyclic_alias) << 0; return; } assert(Entry->isDeclaration()); // If there is a declaration in the module, then we had an extern followed // by the alias, as in: // extern int test6(); // ... // int test6() __attribute__((alias("test7"))); // // Remove it and replace uses of it with the alias. GA->takeName(Entry); Entry->replaceAllUsesWith(llvm::ConstantExpr::getBitCast(GA, Entry->getType())); Entry->eraseFromParent(); } else { GA->setName(MangledName); } // Set attributes which are particular to an alias; this is a // specialization of the attributes which may be set on a global // variable/function. if (D->hasAttr() || D->hasAttr() || D->isWeakImported()) { GA->setLinkage(llvm::Function::WeakAnyLinkage); } if (const auto *VD = dyn_cast(D)) if (VD->getTLSKind()) setTLSMode(GA, *VD); SetCommonAttributes(GD, GA); } void CodeGenModule::emitIFuncDefinition(GlobalDecl GD) { const auto *D = cast(GD.getDecl()); const IFuncAttr *IFA = D->getAttr(); assert(IFA && "Not an ifunc?"); StringRef MangledName = getMangledName(GD); if (IFA->getResolver() == MangledName) { Diags.Report(IFA->getLocation(), diag::err_cyclic_alias) << 1; return; } // Report an error if some definition overrides ifunc. llvm::GlobalValue *Entry = GetGlobalValue(MangledName); if (Entry && !Entry->isDeclaration()) { GlobalDecl OtherGD; if (lookupRepresentativeDecl(MangledName, OtherGD) && DiagnosedConflictingDefinitions.insert(GD).second) { Diags.Report(D->getLocation(), diag::err_duplicate_mangled_name) << MangledName; Diags.Report(OtherGD.getDecl()->getLocation(), diag::note_previous_definition); } return; } Aliases.push_back(GD); llvm::Type *DeclTy = getTypes().ConvertTypeForMem(D->getType()); llvm::Constant *Resolver = GetOrCreateLLVMFunction(IFA->getResolver(), DeclTy, GD, /*ForVTable=*/false); llvm::GlobalIFunc *GIF = llvm::GlobalIFunc::create(DeclTy, 0, llvm::Function::ExternalLinkage, "", Resolver, &getModule()); if (Entry) { if (GIF->getResolver() == Entry) { Diags.Report(IFA->getLocation(), diag::err_cyclic_alias) << 1; return; } assert(Entry->isDeclaration()); // If there is a declaration in the module, then we had an extern followed // by the ifunc, as in: // extern int test(); // ... // int test() __attribute__((ifunc("resolver"))); // // Remove it and replace uses of it with the ifunc. GIF->takeName(Entry); Entry->replaceAllUsesWith(llvm::ConstantExpr::getBitCast(GIF, Entry->getType())); Entry->eraseFromParent(); } else GIF->setName(MangledName); SetCommonAttributes(GD, GIF); } llvm::Function *CodeGenModule::getIntrinsic(unsigned IID, ArrayRef Tys) { return llvm::Intrinsic::getDeclaration(&getModule(), (llvm::Intrinsic::ID)IID, Tys); } static llvm::StringMapEntry & GetConstantCFStringEntry(llvm::StringMap &Map, const StringLiteral *Literal, bool TargetIsLSB, bool &IsUTF16, unsigned &StringLength) { StringRef String = Literal->getString(); unsigned NumBytes = String.size(); // Check for simple case. if (!Literal->containsNonAsciiOrNull()) { StringLength = NumBytes; return *Map.insert(std::make_pair(String, nullptr)).first; } // Otherwise, convert the UTF8 literals into a string of shorts. IsUTF16 = true; SmallVector ToBuf(NumBytes + 1); // +1 for ending nulls. const llvm::UTF8 *FromPtr = (const llvm::UTF8 *)String.data(); llvm::UTF16 *ToPtr = &ToBuf[0]; (void)llvm::ConvertUTF8toUTF16(&FromPtr, FromPtr + NumBytes, &ToPtr, ToPtr + NumBytes, llvm::strictConversion); // ConvertUTF8toUTF16 returns the length in ToPtr. StringLength = ToPtr - &ToBuf[0]; // Add an explicit null. *ToPtr = 0; return *Map.insert(std::make_pair( StringRef(reinterpret_cast(ToBuf.data()), (StringLength + 1) * 2), nullptr)).first; } ConstantAddress CodeGenModule::GetAddrOfConstantCFString(const StringLiteral *Literal) { unsigned StringLength = 0; bool isUTF16 = false; llvm::StringMapEntry &Entry = GetConstantCFStringEntry(CFConstantStringMap, Literal, getDataLayout().isLittleEndian(), isUTF16, StringLength); if (auto *C = Entry.second) return ConstantAddress(C, CharUnits::fromQuantity(C->getAlignment())); llvm::Constant *Zero = llvm::Constant::getNullValue(Int32Ty); llvm::Constant *Zeros[] = { Zero, Zero }; const ASTContext &Context = getContext(); const llvm::Triple &Triple = getTriple(); const auto CFRuntime = getLangOpts().CFRuntime; const bool IsSwiftABI = static_cast(CFRuntime) >= static_cast(LangOptions::CoreFoundationABI::Swift); const bool IsSwift4_1 = CFRuntime == LangOptions::CoreFoundationABI::Swift4_1; // If we don't already have it, get __CFConstantStringClassReference. if (!CFConstantStringClassRef) { const char *CFConstantStringClassName = "__CFConstantStringClassReference"; llvm::Type *Ty = getTypes().ConvertType(getContext().IntTy); Ty = llvm::ArrayType::get(Ty, 0); switch (CFRuntime) { default: break; case LangOptions::CoreFoundationABI::Swift: LLVM_FALLTHROUGH; case LangOptions::CoreFoundationABI::Swift5_0: CFConstantStringClassName = Triple.isOSDarwin() ? "$s15SwiftFoundation19_NSCFConstantStringCN" : "$s10Foundation19_NSCFConstantStringCN"; Ty = IntPtrTy; break; case LangOptions::CoreFoundationABI::Swift4_2: CFConstantStringClassName = Triple.isOSDarwin() ? "$S15SwiftFoundation19_NSCFConstantStringCN" : "$S10Foundation19_NSCFConstantStringCN"; Ty = IntPtrTy; break; case LangOptions::CoreFoundationABI::Swift4_1: CFConstantStringClassName = Triple.isOSDarwin() ? "__T015SwiftFoundation19_NSCFConstantStringCN" : "__T010Foundation19_NSCFConstantStringCN"; Ty = IntPtrTy; break; } llvm::Constant *C = CreateRuntimeVariable(Ty, CFConstantStringClassName); if (Triple.isOSBinFormatELF() || Triple.isOSBinFormatCOFF()) { llvm::GlobalValue *GV = nullptr; if ((GV = dyn_cast(C))) { IdentifierInfo &II = Context.Idents.get(GV->getName()); TranslationUnitDecl *TUDecl = Context.getTranslationUnitDecl(); DeclContext *DC = TranslationUnitDecl::castToDeclContext(TUDecl); const VarDecl *VD = nullptr; for (const auto &Result : DC->lookup(&II)) if ((VD = dyn_cast(Result))) break; if (Triple.isOSBinFormatELF()) { if (!VD) GV->setLinkage(llvm::GlobalValue::ExternalLinkage); } else { GV->setLinkage(llvm::GlobalValue::ExternalLinkage); if (!VD || !VD->hasAttr()) GV->setDLLStorageClass(llvm::GlobalValue::DLLImportStorageClass); else GV->setDLLStorageClass(llvm::GlobalValue::DLLExportStorageClass); } setDSOLocal(GV); } } // Decay array -> ptr CFConstantStringClassRef = IsSwiftABI ? llvm::ConstantExpr::getPtrToInt(C, Ty) : llvm::ConstantExpr::getGetElementPtr(Ty, C, Zeros); } QualType CFTy = Context.getCFConstantStringType(); auto *STy = cast(getTypes().ConvertType(CFTy)); ConstantInitBuilder Builder(*this); auto Fields = Builder.beginStruct(STy); // Class pointer. Fields.add(cast(CFConstantStringClassRef)); // Flags. if (IsSwiftABI) { Fields.addInt(IntPtrTy, IsSwift4_1 ? 0x05 : 0x01); Fields.addInt(Int64Ty, isUTF16 ? 0x07d0 : 0x07c8); } else { Fields.addInt(IntTy, isUTF16 ? 0x07d0 : 0x07C8); } // String pointer. llvm::Constant *C = nullptr; if (isUTF16) { auto Arr = llvm::makeArrayRef( reinterpret_cast(const_cast(Entry.first().data())), Entry.first().size() / 2); C = llvm::ConstantDataArray::get(VMContext, Arr); } else { C = llvm::ConstantDataArray::getString(VMContext, Entry.first()); } // Note: -fwritable-strings doesn't make the backing store strings of // CFStrings writable. (See ) auto *GV = new llvm::GlobalVariable(getModule(), C->getType(), /*isConstant=*/true, llvm::GlobalValue::PrivateLinkage, C, ".str"); GV->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global); // Don't enforce the target's minimum global alignment, since the only use // of the string is via this class initializer. CharUnits Align = isUTF16 ? Context.getTypeAlignInChars(Context.ShortTy) : Context.getTypeAlignInChars(Context.CharTy); GV->setAlignment(Align.getAsAlign()); // FIXME: We set the section explicitly to avoid a bug in ld64 224.1. // Without it LLVM can merge the string with a non unnamed_addr one during // LTO. Doing that changes the section it ends in, which surprises ld64. if (Triple.isOSBinFormatMachO()) GV->setSection(isUTF16 ? "__TEXT,__ustring" : "__TEXT,__cstring,cstring_literals"); // Make sure the literal ends up in .rodata to allow for safe ICF and for // the static linker to adjust permissions to read-only later on. else if (Triple.isOSBinFormatELF()) GV->setSection(".rodata"); // String. llvm::Constant *Str = llvm::ConstantExpr::getGetElementPtr(GV->getValueType(), GV, Zeros); if (isUTF16) // Cast the UTF16 string to the correct type. Str = llvm::ConstantExpr::getBitCast(Str, Int8PtrTy); Fields.add(Str); // String length. llvm::IntegerType *LengthTy = llvm::IntegerType::get(getModule().getContext(), Context.getTargetInfo().getLongWidth()); if (IsSwiftABI) { if (CFRuntime == LangOptions::CoreFoundationABI::Swift4_1 || CFRuntime == LangOptions::CoreFoundationABI::Swift4_2) LengthTy = Int32Ty; else LengthTy = IntPtrTy; } Fields.addInt(LengthTy, StringLength); // Swift ABI requires 8-byte alignment to ensure that the _Atomic(uint64_t) is // properly aligned on 32-bit platforms. CharUnits Alignment = IsSwiftABI ? Context.toCharUnitsFromBits(64) : getPointerAlign(); // The struct. GV = Fields.finishAndCreateGlobal("_unnamed_cfstring_", Alignment, /*isConstant=*/false, llvm::GlobalVariable::PrivateLinkage); GV->addAttribute("objc_arc_inert"); switch (Triple.getObjectFormat()) { case llvm::Triple::UnknownObjectFormat: llvm_unreachable("unknown file format"); case llvm::Triple::GOFF: llvm_unreachable("GOFF is not yet implemented"); case llvm::Triple::XCOFF: llvm_unreachable("XCOFF is not yet implemented"); case llvm::Triple::COFF: case llvm::Triple::ELF: case llvm::Triple::Wasm: GV->setSection("cfstring"); break; case llvm::Triple::MachO: GV->setSection("__DATA,__cfstring"); break; } Entry.second = GV; return ConstantAddress(GV, Alignment); } bool CodeGenModule::getExpressionLocationsEnabled() const { return !CodeGenOpts.EmitCodeView || CodeGenOpts.DebugColumnInfo; } QualType CodeGenModule::getObjCFastEnumerationStateType() { if (ObjCFastEnumerationStateType.isNull()) { RecordDecl *D = Context.buildImplicitRecord("__objcFastEnumerationState"); D->startDefinition(); QualType FieldTypes[] = { Context.UnsignedLongTy, Context.getPointerType(Context.getObjCIdType()), Context.getPointerType(Context.UnsignedLongTy), Context.getConstantArrayType(Context.UnsignedLongTy, llvm::APInt(32, 5), nullptr, ArrayType::Normal, 0) }; for (size_t i = 0; i < 4; ++i) { FieldDecl *Field = FieldDecl::Create(Context, D, SourceLocation(), SourceLocation(), nullptr, FieldTypes[i], /*TInfo=*/nullptr, /*BitWidth=*/nullptr, /*Mutable=*/false, ICIS_NoInit); Field->setAccess(AS_public); D->addDecl(Field); } D->completeDefinition(); ObjCFastEnumerationStateType = Context.getTagDeclType(D); } return ObjCFastEnumerationStateType; } llvm::Constant * CodeGenModule::GetConstantArrayFromStringLiteral(const StringLiteral *E) { assert(!E->getType()->isPointerType() && "Strings are always arrays"); // Don't emit it as the address of the string, emit the string data itself // as an inline array. if (E->getCharByteWidth() == 1) { SmallString<64> Str(E->getString()); // Resize the string to the right size, which is indicated by its type. const ConstantArrayType *CAT = Context.getAsConstantArrayType(E->getType()); Str.resize(CAT->getSize().getZExtValue()); return llvm::ConstantDataArray::getString(VMContext, Str, false); } auto *AType = cast(getTypes().ConvertType(E->getType())); llvm::Type *ElemTy = AType->getElementType(); unsigned NumElements = AType->getNumElements(); // Wide strings have either 2-byte or 4-byte elements. if (ElemTy->getPrimitiveSizeInBits() == 16) { SmallVector Elements; Elements.reserve(NumElements); for(unsigned i = 0, e = E->getLength(); i != e; ++i) Elements.push_back(E->getCodeUnit(i)); Elements.resize(NumElements); return llvm::ConstantDataArray::get(VMContext, Elements); } assert(ElemTy->getPrimitiveSizeInBits() == 32); SmallVector Elements; Elements.reserve(NumElements); for(unsigned i = 0, e = E->getLength(); i != e; ++i) Elements.push_back(E->getCodeUnit(i)); Elements.resize(NumElements); return llvm::ConstantDataArray::get(VMContext, Elements); } static llvm::GlobalVariable * GenerateStringLiteral(llvm::Constant *C, llvm::GlobalValue::LinkageTypes LT, CodeGenModule &CGM, StringRef GlobalName, CharUnits Alignment) { unsigned AddrSpace = CGM.getContext().getTargetAddressSpace( CGM.getStringLiteralAddressSpace()); llvm::Module &M = CGM.getModule(); // Create a global variable for this string auto *GV = new llvm::GlobalVariable( M, C->getType(), !CGM.getLangOpts().WritableStrings, LT, C, GlobalName, nullptr, llvm::GlobalVariable::NotThreadLocal, AddrSpace); GV->setAlignment(Alignment.getAsAlign()); GV->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global); if (GV->isWeakForLinker()) { assert(CGM.supportsCOMDAT() && "Only COFF uses weak string literals"); GV->setComdat(M.getOrInsertComdat(GV->getName())); } CGM.setDSOLocal(GV); return GV; } /// GetAddrOfConstantStringFromLiteral - Return a pointer to a /// constant array for the given string literal. ConstantAddress CodeGenModule::GetAddrOfConstantStringFromLiteral(const StringLiteral *S, StringRef Name) { CharUnits Alignment = getContext().getAlignOfGlobalVarInChars(S->getType()); llvm::Constant *C = GetConstantArrayFromStringLiteral(S); llvm::GlobalVariable **Entry = nullptr; if (!LangOpts.WritableStrings) { Entry = &ConstantStringMap[C]; if (auto GV = *Entry) { if (Alignment.getQuantity() > GV->getAlignment()) GV->setAlignment(Alignment.getAsAlign()); return ConstantAddress(castStringLiteralToDefaultAddressSpace(*this, GV), Alignment); } } SmallString<256> MangledNameBuffer; StringRef GlobalVariableName; llvm::GlobalValue::LinkageTypes LT; // Mangle the string literal if that's how the ABI merges duplicate strings. // Don't do it if they are writable, since we don't want writes in one TU to // affect strings in another. if (getCXXABI().getMangleContext().shouldMangleStringLiteral(S) && !LangOpts.WritableStrings) { llvm::raw_svector_ostream Out(MangledNameBuffer); getCXXABI().getMangleContext().mangleStringLiteral(S, Out); LT = llvm::GlobalValue::LinkOnceODRLinkage; GlobalVariableName = MangledNameBuffer; } else { LT = llvm::GlobalValue::PrivateLinkage; GlobalVariableName = Name; } auto GV = GenerateStringLiteral(C, LT, *this, GlobalVariableName, Alignment); if (Entry) *Entry = GV; SanitizerMD->reportGlobalToASan(GV, S->getStrTokenLoc(0), "", QualType()); return ConstantAddress(castStringLiteralToDefaultAddressSpace(*this, GV), Alignment); } /// GetAddrOfConstantStringFromObjCEncode - Return a pointer to a constant /// array for the given ObjCEncodeExpr node. ConstantAddress CodeGenModule::GetAddrOfConstantStringFromObjCEncode(const ObjCEncodeExpr *E) { std::string Str; getContext().getObjCEncodingForType(E->getEncodedType(), Str); return GetAddrOfConstantCString(Str); } /// GetAddrOfConstantCString - Returns a pointer to a character array containing /// the literal and a terminating '\0' character. /// The result has pointer to array type. ConstantAddress CodeGenModule::GetAddrOfConstantCString( const std::string &Str, const char *GlobalName) { StringRef StrWithNull(Str.c_str(), Str.size() + 1); CharUnits Alignment = getContext().getAlignOfGlobalVarInChars(getContext().CharTy); llvm::Constant *C = llvm::ConstantDataArray::getString(getLLVMContext(), StrWithNull, false); // Don't share any string literals if strings aren't constant. llvm::GlobalVariable **Entry = nullptr; if (!LangOpts.WritableStrings) { Entry = &ConstantStringMap[C]; if (auto GV = *Entry) { if (Alignment.getQuantity() > GV->getAlignment()) GV->setAlignment(Alignment.getAsAlign()); return ConstantAddress(castStringLiteralToDefaultAddressSpace(*this, GV), Alignment); } } // Get the default prefix if a name wasn't specified. if (!GlobalName) GlobalName = ".str"; // Create a global variable for this. auto GV = GenerateStringLiteral(C, llvm::GlobalValue::PrivateLinkage, *this, GlobalName, Alignment); if (Entry) *Entry = GV; return ConstantAddress(castStringLiteralToDefaultAddressSpace(*this, GV), Alignment); } ConstantAddress CodeGenModule::GetAddrOfGlobalTemporary( const MaterializeTemporaryExpr *E, const Expr *Init) { assert((E->getStorageDuration() == SD_Static || E->getStorageDuration() == SD_Thread) && "not a global temporary"); const auto *VD = cast(E->getExtendingDecl()); // If we're not materializing a subobject of the temporary, keep the // cv-qualifiers from the type of the MaterializeTemporaryExpr. QualType MaterializedType = Init->getType(); if (Init == E->getSubExpr()) MaterializedType = E->getType(); CharUnits Align = getContext().getTypeAlignInChars(MaterializedType); if (llvm::Constant *Slot = MaterializedGlobalTemporaryMap[E]) return ConstantAddress(Slot, Align); // FIXME: If an externally-visible declaration extends multiple temporaries, // we need to give each temporary the same name in every translation unit (and // we also need to make the temporaries externally-visible). SmallString<256> Name; llvm::raw_svector_ostream Out(Name); getCXXABI().getMangleContext().mangleReferenceTemporary( VD, E->getManglingNumber(), Out); APValue *Value = nullptr; if (E->getStorageDuration() == SD_Static && VD && VD->evaluateValue()) { // If the initializer of the extending declaration is a constant // initializer, we should have a cached constant initializer for this // temporary. Note that this might have a different value from the value // computed by evaluating the initializer if the surrounding constant // expression modifies the temporary. Value = E->getOrCreateValue(false); } // Try evaluating it now, it might have a constant initializer. Expr::EvalResult EvalResult; if (!Value && Init->EvaluateAsRValue(EvalResult, getContext()) && !EvalResult.hasSideEffects()) Value = &EvalResult.Val; LangAS AddrSpace = VD ? GetGlobalVarAddressSpace(VD) : MaterializedType.getAddressSpace(); Optional emitter; llvm::Constant *InitialValue = nullptr; bool Constant = false; llvm::Type *Type; if (Value) { // The temporary has a constant initializer, use it. emitter.emplace(*this); InitialValue = emitter->emitForInitializer(*Value, AddrSpace, MaterializedType); Constant = isTypeConstant(MaterializedType, /*ExcludeCtor*/Value); Type = InitialValue->getType(); } else { // No initializer, the initialization will be provided when we // initialize the declaration which performed lifetime extension. Type = getTypes().ConvertTypeForMem(MaterializedType); } // Create a global variable for this lifetime-extended temporary. llvm::GlobalValue::LinkageTypes Linkage = getLLVMLinkageVarDefinition(VD, Constant); if (Linkage == llvm::GlobalVariable::ExternalLinkage) { const VarDecl *InitVD; if (VD->isStaticDataMember() && VD->getAnyInitializer(InitVD) && isa(InitVD->getLexicalDeclContext())) { // Temporaries defined inside a class get linkonce_odr linkage because the // class can be defined in multiple translation units. Linkage = llvm::GlobalVariable::LinkOnceODRLinkage; } else { // There is no need for this temporary to have external linkage if the // VarDecl has external linkage. Linkage = llvm::GlobalVariable::InternalLinkage; } } auto TargetAS = getContext().getTargetAddressSpace(AddrSpace); auto *GV = new llvm::GlobalVariable( getModule(), Type, Constant, Linkage, InitialValue, Name.c_str(), /*InsertBefore=*/nullptr, llvm::GlobalVariable::NotThreadLocal, TargetAS); if (emitter) emitter->finalize(GV); setGVProperties(GV, VD); GV->setAlignment(Align.getAsAlign()); if (supportsCOMDAT() && GV->isWeakForLinker()) GV->setComdat(TheModule.getOrInsertComdat(GV->getName())); if (VD->getTLSKind()) setTLSMode(GV, *VD); llvm::Constant *CV = GV; if (AddrSpace != LangAS::Default) CV = getTargetCodeGenInfo().performAddrSpaceCast( *this, GV, AddrSpace, LangAS::Default, Type->getPointerTo( getContext().getTargetAddressSpace(LangAS::Default))); MaterializedGlobalTemporaryMap[E] = CV; return ConstantAddress(CV, Align); } /// EmitObjCPropertyImplementations - Emit information for synthesized /// properties for an implementation. void CodeGenModule::EmitObjCPropertyImplementations(const ObjCImplementationDecl *D) { for (const auto *PID : D->property_impls()) { // Dynamic is just for type-checking. if (PID->getPropertyImplementation() == ObjCPropertyImplDecl::Synthesize) { ObjCPropertyDecl *PD = PID->getPropertyDecl(); // Determine which methods need to be implemented, some may have // been overridden. Note that ::isPropertyAccessor is not the method // we want, that just indicates if the decl came from a // property. What we want to know is if the method is defined in // this implementation. auto *Getter = PID->getGetterMethodDecl(); if (!Getter || Getter->isSynthesizedAccessorStub()) CodeGenFunction(*this).GenerateObjCGetter( const_cast(D), PID); auto *Setter = PID->getSetterMethodDecl(); if (!PD->isReadOnly() && (!Setter || Setter->isSynthesizedAccessorStub())) CodeGenFunction(*this).GenerateObjCSetter( const_cast(D), PID); } } } static bool needsDestructMethod(ObjCImplementationDecl *impl) { const ObjCInterfaceDecl *iface = impl->getClassInterface(); for (const ObjCIvarDecl *ivar = iface->all_declared_ivar_begin(); ivar; ivar = ivar->getNextIvar()) if (ivar->getType().isDestructedType()) return true; return false; } static bool AllTrivialInitializers(CodeGenModule &CGM, ObjCImplementationDecl *D) { CodeGenFunction CGF(CGM); for (ObjCImplementationDecl::init_iterator B = D->init_begin(), E = D->init_end(); B != E; ++B) { CXXCtorInitializer *CtorInitExp = *B; Expr *Init = CtorInitExp->getInit(); if (!CGF.isTrivialInitializer(Init)) return false; } return true; } /// EmitObjCIvarInitializations - Emit information for ivar initialization /// for an implementation. void CodeGenModule::EmitObjCIvarInitializations(ObjCImplementationDecl *D) { // We might need a .cxx_destruct even if we don't have any ivar initializers. if (needsDestructMethod(D)) { IdentifierInfo *II = &getContext().Idents.get(".cxx_destruct"); Selector cxxSelector = getContext().Selectors.getSelector(0, &II); ObjCMethodDecl *DTORMethod = ObjCMethodDecl::Create( getContext(), D->getLocation(), D->getLocation(), cxxSelector, getContext().VoidTy, nullptr, D, /*isInstance=*/true, /*isVariadic=*/false, /*isPropertyAccessor=*/true, /*isSynthesizedAccessorStub=*/false, /*isImplicitlyDeclared=*/true, /*isDefined=*/false, ObjCMethodDecl::Required); D->addInstanceMethod(DTORMethod); CodeGenFunction(*this).GenerateObjCCtorDtorMethod(D, DTORMethod, false); D->setHasDestructors(true); } // If the implementation doesn't have any ivar initializers, we don't need // a .cxx_construct. if (D->getNumIvarInitializers() == 0 || AllTrivialInitializers(*this, D)) return; IdentifierInfo *II = &getContext().Idents.get(".cxx_construct"); Selector cxxSelector = getContext().Selectors.getSelector(0, &II); // The constructor returns 'self'. ObjCMethodDecl *CTORMethod = ObjCMethodDecl::Create( getContext(), D->getLocation(), D->getLocation(), cxxSelector, getContext().getObjCIdType(), nullptr, D, /*isInstance=*/true, /*isVariadic=*/false, /*isPropertyAccessor=*/true, /*isSynthesizedAccessorStub=*/false, /*isImplicitlyDeclared=*/true, /*isDefined=*/false, ObjCMethodDecl::Required); D->addInstanceMethod(CTORMethod); CodeGenFunction(*this).GenerateObjCCtorDtorMethod(D, CTORMethod, true); D->setHasNonZeroConstructors(true); } // EmitLinkageSpec - Emit all declarations in a linkage spec. void CodeGenModule::EmitLinkageSpec(const LinkageSpecDecl *LSD) { if (LSD->getLanguage() != LinkageSpecDecl::lang_c && LSD->getLanguage() != LinkageSpecDecl::lang_cxx) { ErrorUnsupported(LSD, "linkage spec"); return; } EmitDeclContext(LSD); } void CodeGenModule::EmitDeclContext(const DeclContext *DC) { for (auto *I : DC->decls()) { // Unlike other DeclContexts, the contents of an ObjCImplDecl at TU scope // are themselves considered "top-level", so EmitTopLevelDecl on an // ObjCImplDecl does not recursively visit them. We need to do that in // case they're nested inside another construct (LinkageSpecDecl / // ExportDecl) that does stop them from being considered "top-level". if (auto *OID = dyn_cast(I)) { for (auto *M : OID->methods()) EmitTopLevelDecl(M); } EmitTopLevelDecl(I); } } /// EmitTopLevelDecl - Emit code for a single top level declaration. void CodeGenModule::EmitTopLevelDecl(Decl *D) { // Ignore dependent declarations. if (D->isTemplated()) return; // Consteval function shouldn't be emitted. if (auto *FD = dyn_cast(D)) if (FD->isConsteval()) return; switch (D->getKind()) { case Decl::CXXConversion: case Decl::CXXMethod: case Decl::Function: EmitGlobal(cast(D)); // Always provide some coverage mapping // even for the functions that aren't emitted. AddDeferredUnusedCoverageMapping(D); break; case Decl::CXXDeductionGuide: // Function-like, but does not result in code emission. break; case Decl::Var: case Decl::Decomposition: case Decl::VarTemplateSpecialization: EmitGlobal(cast(D)); if (auto *DD = dyn_cast(D)) for (auto *B : DD->bindings()) if (auto *HD = B->getHoldingVar()) EmitGlobal(HD); break; // Indirect fields from global anonymous structs and unions can be // ignored; only the actual variable requires IR gen support. case Decl::IndirectField: break; // C++ Decls case Decl::Namespace: EmitDeclContext(cast(D)); break; case Decl::ClassTemplateSpecialization: { const auto *Spec = cast(D); if (CGDebugInfo *DI = getModuleDebugInfo()) if (Spec->getSpecializationKind() == TSK_ExplicitInstantiationDefinition && Spec->hasDefinition()) DI->completeTemplateDefinition(*Spec); } LLVM_FALLTHROUGH; case Decl::CXXRecord: { CXXRecordDecl *CRD = cast(D); if (CGDebugInfo *DI = getModuleDebugInfo()) { if (CRD->hasDefinition()) DI->EmitAndRetainType(getContext().getRecordType(cast(D))); if (auto *ES = D->getASTContext().getExternalSource()) if (ES->hasExternalDefinitions(D) == ExternalASTSource::EK_Never) DI->completeUnusedClass(*CRD); } // Emit any static data members, they may be definitions. for (auto *I : CRD->decls()) if (isa(I) || isa(I)) EmitTopLevelDecl(I); break; } // No code generation needed. case Decl::UsingShadow: case Decl::ClassTemplate: case Decl::VarTemplate: case Decl::Concept: case Decl::VarTemplatePartialSpecialization: case Decl::FunctionTemplate: case Decl::TypeAliasTemplate: case Decl::Block: case Decl::Empty: case Decl::Binding: break; case Decl::Using: // using X; [C++] if (CGDebugInfo *DI = getModuleDebugInfo()) DI->EmitUsingDecl(cast(*D)); break; case Decl::NamespaceAlias: if (CGDebugInfo *DI = getModuleDebugInfo()) DI->EmitNamespaceAlias(cast(*D)); break; case Decl::UsingDirective: // using namespace X; [C++] if (CGDebugInfo *DI = getModuleDebugInfo()) DI->EmitUsingDirective(cast(*D)); break; case Decl::CXXConstructor: getCXXABI().EmitCXXConstructors(cast(D)); break; case Decl::CXXDestructor: getCXXABI().EmitCXXDestructors(cast(D)); break; case Decl::StaticAssert: // Nothing to do. break; // Objective-C Decls // Forward declarations, no (immediate) code generation. case Decl::ObjCInterface: case Decl::ObjCCategory: break; case Decl::ObjCProtocol: { auto *Proto = cast(D); if (Proto->isThisDeclarationADefinition()) ObjCRuntime->GenerateProtocol(Proto); break; } case Decl::ObjCCategoryImpl: // Categories have properties but don't support synthesize so we // can ignore them here. ObjCRuntime->GenerateCategory(cast(D)); break; case Decl::ObjCImplementation: { auto *OMD = cast(D); EmitObjCPropertyImplementations(OMD); EmitObjCIvarInitializations(OMD); ObjCRuntime->GenerateClass(OMD); // Emit global variable debug information. if (CGDebugInfo *DI = getModuleDebugInfo()) if (getCodeGenOpts().hasReducedDebugInfo()) DI->getOrCreateInterfaceType(getContext().getObjCInterfaceType( OMD->getClassInterface()), OMD->getLocation()); break; } case Decl::ObjCMethod: { auto *OMD = cast(D); // If this is not a prototype, emit the body. if (OMD->getBody()) CodeGenFunction(*this).GenerateObjCMethod(OMD); break; } case Decl::ObjCCompatibleAlias: ObjCRuntime->RegisterAlias(cast(D)); break; case Decl::PragmaComment: { const auto *PCD = cast(D); switch (PCD->getCommentKind()) { case PCK_Unknown: llvm_unreachable("unexpected pragma comment kind"); case PCK_Linker: AppendLinkerOptions(PCD->getArg()); break; case PCK_Lib: AddDependentLib(PCD->getArg()); break; case PCK_Compiler: case PCK_ExeStr: case PCK_User: break; // We ignore all of these. } break; } case Decl::PragmaDetectMismatch: { const auto *PDMD = cast(D); AddDetectMismatch(PDMD->getName(), PDMD->getValue()); break; } case Decl::LinkageSpec: EmitLinkageSpec(cast(D)); break; case Decl::FileScopeAsm: { // File-scope asm is ignored during device-side CUDA compilation. if (LangOpts.CUDA && LangOpts.CUDAIsDevice) break; // File-scope asm is ignored during device-side OpenMP compilation. if (LangOpts.OpenMPIsDevice) break; auto *AD = cast(D); getModule().appendModuleInlineAsm(AD->getAsmString()->getString()); break; } case Decl::Import: { auto *Import = cast(D); // If we've already imported this module, we're done. if (!ImportedModules.insert(Import->getImportedModule())) break; // Emit debug information for direct imports. if (!Import->getImportedOwningModule()) { if (CGDebugInfo *DI = getModuleDebugInfo()) DI->EmitImportDecl(*Import); } // Find all of the submodules and emit the module initializers. llvm::SmallPtrSet Visited; SmallVector Stack; Visited.insert(Import->getImportedModule()); Stack.push_back(Import->getImportedModule()); while (!Stack.empty()) { clang::Module *Mod = Stack.pop_back_val(); if (!EmittedModuleInitializers.insert(Mod).second) continue; for (auto *D : Context.getModuleInitializers(Mod)) EmitTopLevelDecl(D); // Visit the submodules of this module. for (clang::Module::submodule_iterator Sub = Mod->submodule_begin(), SubEnd = Mod->submodule_end(); Sub != SubEnd; ++Sub) { // Skip explicit children; they need to be explicitly imported to emit // the initializers. if ((*Sub)->IsExplicit) continue; if (Visited.insert(*Sub).second) Stack.push_back(*Sub); } } break; } case Decl::Export: EmitDeclContext(cast(D)); break; case Decl::OMPThreadPrivate: EmitOMPThreadPrivateDecl(cast(D)); break; case Decl::OMPAllocate: break; case Decl::OMPDeclareReduction: EmitOMPDeclareReduction(cast(D)); break; case Decl::OMPDeclareMapper: EmitOMPDeclareMapper(cast(D)); break; case Decl::OMPRequires: EmitOMPRequiresDecl(cast(D)); break; case Decl::Typedef: case Decl::TypeAlias: // using foo = bar; [C++11] if (CGDebugInfo *DI = getModuleDebugInfo()) DI->EmitAndRetainType( getContext().getTypedefType(cast(D))); break; case Decl::Record: if (CGDebugInfo *DI = getModuleDebugInfo()) if (cast(D)->getDefinition()) DI->EmitAndRetainType(getContext().getRecordType(cast(D))); break; case Decl::Enum: if (CGDebugInfo *DI = getModuleDebugInfo()) if (cast(D)->getDefinition()) DI->EmitAndRetainType(getContext().getEnumType(cast(D))); break; default: // Make sure we handled everything we should, every other kind is a // non-top-level decl. FIXME: Would be nice to have an isTopLevelDeclKind // function. Need to recode Decl::Kind to do that easily. assert(isa(D) && "Unsupported decl kind"); break; } } void CodeGenModule::AddDeferredUnusedCoverageMapping(Decl *D) { // Do we need to generate coverage mapping? if (!CodeGenOpts.CoverageMapping) return; switch (D->getKind()) { case Decl::CXXConversion: case Decl::CXXMethod: case Decl::Function: case Decl::ObjCMethod: case Decl::CXXConstructor: case Decl::CXXDestructor: { if (!cast(D)->doesThisDeclarationHaveABody()) break; SourceManager &SM = getContext().getSourceManager(); if (LimitedCoverage && SM.getMainFileID() != SM.getFileID(D->getBeginLoc())) break; auto I = DeferredEmptyCoverageMappingDecls.find(D); if (I == DeferredEmptyCoverageMappingDecls.end()) DeferredEmptyCoverageMappingDecls[D] = true; break; } default: break; }; } void CodeGenModule::ClearUnusedCoverageMapping(const Decl *D) { // Do we need to generate coverage mapping? if (!CodeGenOpts.CoverageMapping) return; if (const auto *Fn = dyn_cast(D)) { if (Fn->isTemplateInstantiation()) ClearUnusedCoverageMapping(Fn->getTemplateInstantiationPattern()); } auto I = DeferredEmptyCoverageMappingDecls.find(D); if (I == DeferredEmptyCoverageMappingDecls.end()) DeferredEmptyCoverageMappingDecls[D] = false; else I->second = false; } void CodeGenModule::EmitDeferredUnusedCoverageMappings() { // We call takeVector() here to avoid use-after-free. // FIXME: DeferredEmptyCoverageMappingDecls is getting mutated because // we deserialize function bodies to emit coverage info for them, and that // deserializes more declarations. How should we handle that case? for (const auto &Entry : DeferredEmptyCoverageMappingDecls.takeVector()) { if (!Entry.second) continue; const Decl *D = Entry.first; switch (D->getKind()) { case Decl::CXXConversion: case Decl::CXXMethod: case Decl::Function: case Decl::ObjCMethod: { CodeGenPGO PGO(*this); GlobalDecl GD(cast(D)); PGO.emitEmptyCounterMapping(D, getMangledName(GD), getFunctionLinkage(GD)); break; } case Decl::CXXConstructor: { CodeGenPGO PGO(*this); GlobalDecl GD(cast(D), Ctor_Base); PGO.emitEmptyCounterMapping(D, getMangledName(GD), getFunctionLinkage(GD)); break; } case Decl::CXXDestructor: { CodeGenPGO PGO(*this); GlobalDecl GD(cast(D), Dtor_Base); PGO.emitEmptyCounterMapping(D, getMangledName(GD), getFunctionLinkage(GD)); break; } default: break; }; } } void CodeGenModule::EmitMainVoidAlias() { // In order to transition away from "__original_main" gracefully, emit an // alias for "main" in the no-argument case so that libc can detect when // new-style no-argument main is in used. if (llvm::Function *F = getModule().getFunction("main")) { if (!F->isDeclaration() && F->arg_size() == 0 && !F->isVarArg() && F->getReturnType()->isIntegerTy(Context.getTargetInfo().getIntWidth())) addUsedGlobal(llvm::GlobalAlias::create("__main_void", F)); } } /// Turns the given pointer into a constant. static llvm::Constant *GetPointerConstant(llvm::LLVMContext &Context, const void *Ptr) { uintptr_t PtrInt = reinterpret_cast(Ptr); llvm::Type *i64 = llvm::Type::getInt64Ty(Context); return llvm::ConstantInt::get(i64, PtrInt); } static void EmitGlobalDeclMetadata(CodeGenModule &CGM, llvm::NamedMDNode *&GlobalMetadata, GlobalDecl D, llvm::GlobalValue *Addr) { if (!GlobalMetadata) GlobalMetadata = CGM.getModule().getOrInsertNamedMetadata("clang.global.decl.ptrs"); // TODO: should we report variant information for ctors/dtors? llvm::Metadata *Ops[] = {llvm::ConstantAsMetadata::get(Addr), llvm::ConstantAsMetadata::get(GetPointerConstant( CGM.getLLVMContext(), D.getDecl()))}; GlobalMetadata->addOperand(llvm::MDNode::get(CGM.getLLVMContext(), Ops)); } /// For each function which is declared within an extern "C" region and marked /// as 'used', but has internal linkage, create an alias from the unmangled /// name to the mangled name if possible. People expect to be able to refer /// to such functions with an unmangled name from inline assembly within the /// same translation unit. void CodeGenModule::EmitStaticExternCAliases() { if (!getTargetCodeGenInfo().shouldEmitStaticExternCAliases()) return; for (auto &I : StaticExternCValues) { IdentifierInfo *Name = I.first; llvm::GlobalValue *Val = I.second; if (Val && !getModule().getNamedValue(Name->getName())) addUsedGlobal(llvm::GlobalAlias::create(Name->getName(), Val)); } } bool CodeGenModule::lookupRepresentativeDecl(StringRef MangledName, GlobalDecl &Result) const { auto Res = Manglings.find(MangledName); if (Res == Manglings.end()) return false; Result = Res->getValue(); return true; } /// Emits metadata nodes associating all the global values in the /// current module with the Decls they came from. This is useful for /// projects using IR gen as a subroutine. /// /// Since there's currently no way to associate an MDNode directly /// with an llvm::GlobalValue, we create a global named metadata /// with the name 'clang.global.decl.ptrs'. void CodeGenModule::EmitDeclMetadata() { llvm::NamedMDNode *GlobalMetadata = nullptr; for (auto &I : MangledDeclNames) { llvm::GlobalValue *Addr = getModule().getNamedValue(I.second); // Some mangled names don't necessarily have an associated GlobalValue // in this module, e.g. if we mangled it for DebugInfo. if (Addr) EmitGlobalDeclMetadata(*this, GlobalMetadata, I.first, Addr); } } /// Emits metadata nodes for all the local variables in the current /// function. void CodeGenFunction::EmitDeclMetadata() { if (LocalDeclMap.empty()) return; llvm::LLVMContext &Context = getLLVMContext(); // Find the unique metadata ID for this name. unsigned DeclPtrKind = Context.getMDKindID("clang.decl.ptr"); llvm::NamedMDNode *GlobalMetadata = nullptr; for (auto &I : LocalDeclMap) { const Decl *D = I.first; llvm::Value *Addr = I.second.getPointer(); if (auto *Alloca = dyn_cast(Addr)) { llvm::Value *DAddr = GetPointerConstant(getLLVMContext(), D); Alloca->setMetadata( DeclPtrKind, llvm::MDNode::get( Context, llvm::ValueAsMetadata::getConstant(DAddr))); } else if (auto *GV = dyn_cast(Addr)) { GlobalDecl GD = GlobalDecl(cast(D)); EmitGlobalDeclMetadata(CGM, GlobalMetadata, GD, GV); } } } void CodeGenModule::EmitVersionIdentMetadata() { llvm::NamedMDNode *IdentMetadata = TheModule.getOrInsertNamedMetadata("llvm.ident"); std::string Version = getClangFullVersion(); llvm::LLVMContext &Ctx = TheModule.getContext(); llvm::Metadata *IdentNode[] = {llvm::MDString::get(Ctx, Version)}; IdentMetadata->addOperand(llvm::MDNode::get(Ctx, IdentNode)); } void CodeGenModule::EmitCommandLineMetadata() { llvm::NamedMDNode *CommandLineMetadata = TheModule.getOrInsertNamedMetadata("llvm.commandline"); std::string CommandLine = getCodeGenOpts().RecordCommandLine; llvm::LLVMContext &Ctx = TheModule.getContext(); llvm::Metadata *CommandLineNode[] = {llvm::MDString::get(Ctx, CommandLine)}; CommandLineMetadata->addOperand(llvm::MDNode::get(Ctx, CommandLineNode)); } void CodeGenModule::EmitCoverageFile() { if (getCodeGenOpts().CoverageDataFile.empty() && getCodeGenOpts().CoverageNotesFile.empty()) return; llvm::NamedMDNode *CUNode = TheModule.getNamedMetadata("llvm.dbg.cu"); if (!CUNode) return; llvm::NamedMDNode *GCov = TheModule.getOrInsertNamedMetadata("llvm.gcov"); llvm::LLVMContext &Ctx = TheModule.getContext(); auto *CoverageDataFile = llvm::MDString::get(Ctx, getCodeGenOpts().CoverageDataFile); auto *CoverageNotesFile = llvm::MDString::get(Ctx, getCodeGenOpts().CoverageNotesFile); for (int i = 0, e = CUNode->getNumOperands(); i != e; ++i) { llvm::MDNode *CU = CUNode->getOperand(i); llvm::Metadata *Elts[] = {CoverageNotesFile, CoverageDataFile, CU}; GCov->addOperand(llvm::MDNode::get(Ctx, Elts)); } } llvm::Constant *CodeGenModule::GetAddrOfRTTIDescriptor(QualType Ty, bool ForEH) { // Return a bogus pointer if RTTI is disabled, unless it's for EH. // FIXME: should we even be calling this method if RTTI is disabled // and it's not for EH? if ((!ForEH && !getLangOpts().RTTI) || getLangOpts().CUDAIsDevice || (getLangOpts().OpenMP && getLangOpts().OpenMPIsDevice && getTriple().isNVPTX())) return llvm::Constant::getNullValue(Int8PtrTy); if (ForEH && Ty->isObjCObjectPointerType() && LangOpts.ObjCRuntime.isGNUFamily()) return ObjCRuntime->GetEHType(Ty); return getCXXABI().getAddrOfRTTIDescriptor(Ty); } void CodeGenModule::EmitOMPThreadPrivateDecl(const OMPThreadPrivateDecl *D) { // Do not emit threadprivates in simd-only mode. if (LangOpts.OpenMP && LangOpts.OpenMPSimd) return; for (auto RefExpr : D->varlists()) { auto *VD = cast(cast(RefExpr)->getDecl()); bool PerformInit = VD->getAnyInitializer() && !VD->getAnyInitializer()->isConstantInitializer(getContext(), /*ForRef=*/false); Address Addr(GetAddrOfGlobalVar(VD), getContext().getDeclAlign(VD)); if (auto InitFunction = getOpenMPRuntime().emitThreadPrivateVarDefinition( VD, Addr, RefExpr->getBeginLoc(), PerformInit)) CXXGlobalInits.push_back(InitFunction); } } llvm::Metadata * CodeGenModule::CreateMetadataIdentifierImpl(QualType T, MetadataTypeMap &Map, StringRef Suffix) { llvm::Metadata *&InternalId = Map[T.getCanonicalType()]; if (InternalId) return InternalId; if (isExternallyVisible(T->getLinkage())) { std::string OutName; llvm::raw_string_ostream Out(OutName); getCXXABI().getMangleContext().mangleTypeName(T, Out); Out << Suffix; InternalId = llvm::MDString::get(getLLVMContext(), Out.str()); } else { InternalId = llvm::MDNode::getDistinct(getLLVMContext(), llvm::ArrayRef()); } return InternalId; } llvm::Metadata *CodeGenModule::CreateMetadataIdentifierForType(QualType T) { return CreateMetadataIdentifierImpl(T, MetadataIdMap, ""); } llvm::Metadata * CodeGenModule::CreateMetadataIdentifierForVirtualMemPtrType(QualType T) { return CreateMetadataIdentifierImpl(T, VirtualMetadataIdMap, ".virtual"); } // Generalize pointer types to a void pointer with the qualifiers of the // originally pointed-to type, e.g. 'const char *' and 'char * const *' // generalize to 'const void *' while 'char *' and 'const char **' generalize to // 'void *'. static QualType GeneralizeType(ASTContext &Ctx, QualType Ty) { if (!Ty->isPointerType()) return Ty; return Ctx.getPointerType( QualType(Ctx.VoidTy).withCVRQualifiers( Ty->getPointeeType().getCVRQualifiers())); } // Apply type generalization to a FunctionType's return and argument types static QualType GeneralizeFunctionType(ASTContext &Ctx, QualType Ty) { if (auto *FnType = Ty->getAs()) { SmallVector GeneralizedParams; for (auto &Param : FnType->param_types()) GeneralizedParams.push_back(GeneralizeType(Ctx, Param)); return Ctx.getFunctionType( GeneralizeType(Ctx, FnType->getReturnType()), GeneralizedParams, FnType->getExtProtoInfo()); } if (auto *FnType = Ty->getAs()) return Ctx.getFunctionNoProtoType( GeneralizeType(Ctx, FnType->getReturnType())); llvm_unreachable("Encountered unknown FunctionType"); } llvm::Metadata *CodeGenModule::CreateMetadataIdentifierGeneralized(QualType T) { return CreateMetadataIdentifierImpl(GeneralizeFunctionType(getContext(), T), GeneralizedMetadataIdMap, ".generalized"); } /// Returns whether this module needs the "all-vtables" type identifier. bool CodeGenModule::NeedAllVtablesTypeId() const { // Returns true if at least one of vtable-based CFI checkers is enabled and // is not in the trapping mode. return ((LangOpts.Sanitize.has(SanitizerKind::CFIVCall) && !CodeGenOpts.SanitizeTrap.has(SanitizerKind::CFIVCall)) || (LangOpts.Sanitize.has(SanitizerKind::CFINVCall) && !CodeGenOpts.SanitizeTrap.has(SanitizerKind::CFINVCall)) || (LangOpts.Sanitize.has(SanitizerKind::CFIDerivedCast) && !CodeGenOpts.SanitizeTrap.has(SanitizerKind::CFIDerivedCast)) || (LangOpts.Sanitize.has(SanitizerKind::CFIUnrelatedCast) && !CodeGenOpts.SanitizeTrap.has(SanitizerKind::CFIUnrelatedCast))); } void CodeGenModule::AddVTableTypeMetadata(llvm::GlobalVariable *VTable, CharUnits Offset, const CXXRecordDecl *RD) { llvm::Metadata *MD = CreateMetadataIdentifierForType(QualType(RD->getTypeForDecl(), 0)); VTable->addTypeMetadata(Offset.getQuantity(), MD); if (CodeGenOpts.SanitizeCfiCrossDso) if (auto CrossDsoTypeId = CreateCrossDsoCfiTypeId(MD)) VTable->addTypeMetadata(Offset.getQuantity(), llvm::ConstantAsMetadata::get(CrossDsoTypeId)); if (NeedAllVtablesTypeId()) { llvm::Metadata *MD = llvm::MDString::get(getLLVMContext(), "all-vtables"); VTable->addTypeMetadata(Offset.getQuantity(), MD); } } llvm::SanitizerStatReport &CodeGenModule::getSanStats() { if (!SanStats) SanStats = std::make_unique(&getModule()); return *SanStats; } llvm::Value * CodeGenModule::createOpenCLIntToSamplerConversion(const Expr *E, CodeGenFunction &CGF) { llvm::Constant *C = ConstantEmitter(CGF).emitAbstract(E, E->getType()); auto SamplerT = getOpenCLRuntime().getSamplerType(E->getType().getTypePtr()); auto FTy = llvm::FunctionType::get(SamplerT, {C->getType()}, false); return CGF.Builder.CreateCall(CreateRuntimeFunction(FTy, "__translate_sampler_initializer"), {C}); } CharUnits CodeGenModule::getNaturalPointeeTypeAlignment( QualType T, LValueBaseInfo *BaseInfo, TBAAAccessInfo *TBAAInfo) { return getNaturalTypeAlignment(T->getPointeeType(), BaseInfo, TBAAInfo, /* forPointeeType= */ true); } CharUnits CodeGenModule::getNaturalTypeAlignment(QualType T, LValueBaseInfo *BaseInfo, TBAAAccessInfo *TBAAInfo, bool forPointeeType) { if (TBAAInfo) *TBAAInfo = getTBAAAccessInfo(T); // FIXME: This duplicates logic in ASTContext::getTypeAlignIfKnown. But // that doesn't return the information we need to compute BaseInfo. // Honor alignment typedef attributes even on incomplete types. // We also honor them straight for C++ class types, even as pointees; // there's an expressivity gap here. if (auto TT = T->getAs()) { if (auto Align = TT->getDecl()->getMaxAlignment()) { if (BaseInfo) *BaseInfo = LValueBaseInfo(AlignmentSource::AttributedType); return getContext().toCharUnitsFromBits(Align); } } bool AlignForArray = T->isArrayType(); // Analyze the base element type, so we don't get confused by incomplete // array types. T = getContext().getBaseElementType(T); if (T->isIncompleteType()) { // We could try to replicate the logic from // ASTContext::getTypeAlignIfKnown, but nothing uses the alignment if the // type is incomplete, so it's impossible to test. We could try to reuse // getTypeAlignIfKnown, but that doesn't return the information we need // to set BaseInfo. So just ignore the possibility that the alignment is // greater than one. if (BaseInfo) *BaseInfo = LValueBaseInfo(AlignmentSource::Type); return CharUnits::One(); } if (BaseInfo) *BaseInfo = LValueBaseInfo(AlignmentSource::Type); CharUnits Alignment; // For C++ class pointees, we don't know whether we're pointing at a // base or a complete object, so we generally need to use the // non-virtual alignment. const CXXRecordDecl *RD; if (forPointeeType && !AlignForArray && (RD = T->getAsCXXRecordDecl())) { Alignment = getClassPointerAlignment(RD); } else { Alignment = getContext().getTypeAlignInChars(T); if (T.getQualifiers().hasUnaligned()) Alignment = CharUnits::One(); } // Cap to the global maximum type alignment unless the alignment // was somehow explicit on the type. if (unsigned MaxAlign = getLangOpts().MaxTypeAlign) { if (Alignment.getQuantity() > MaxAlign && !getContext().isAlignmentRequired(T)) Alignment = CharUnits::fromQuantity(MaxAlign); } return Alignment; } bool CodeGenModule::stopAutoInit() { unsigned StopAfter = getContext().getLangOpts().TrivialAutoVarInitStopAfter; if (StopAfter) { // This number is positive only when -ftrivial-auto-var-init-stop-after=* is // used if (NumAutoVarInit >= StopAfter) { return true; } if (!NumAutoVarInit) { unsigned DiagID = getDiags().getCustomDiagID( DiagnosticsEngine::Warning, "-ftrivial-auto-var-init-stop-after=%0 has been enabled to limit the " "number of times ftrivial-auto-var-init=%1 gets applied."); getDiags().Report(DiagID) << StopAfter << (getContext().getLangOpts().getTrivialAutoVarInit() == LangOptions::TrivialAutoVarInitKind::Zero ? "zero" : "pattern"); } ++NumAutoVarInit; } return false; } diff --git a/clang/lib/CodeGen/TargetInfo.cpp b/clang/lib/CodeGen/TargetInfo.cpp index 5ebf432a4cd3..2f3f4c281079 100644 --- a/clang/lib/CodeGen/TargetInfo.cpp +++ b/clang/lib/CodeGen/TargetInfo.cpp @@ -1,11187 +1,11180 @@ //===---- TargetInfo.cpp - Encapsulate target details -----------*- C++ -*-===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // // These classes wrap the information about a call or function // definition used to handle ABI compliancy. // //===----------------------------------------------------------------------===// #include "TargetInfo.h" #include "ABIInfo.h" #include "CGBlocks.h" #include "CGCXXABI.h" #include "CGValue.h" #include "CodeGenFunction.h" #include "clang/AST/Attr.h" #include "clang/AST/RecordLayout.h" #include "clang/Basic/CodeGenOptions.h" #include "clang/Basic/DiagnosticFrontend.h" #include "clang/CodeGen/CGFunctionInfo.h" #include "clang/CodeGen/SwiftCallingConv.h" #include "llvm/ADT/SmallBitVector.h" #include "llvm/ADT/StringExtras.h" #include "llvm/ADT/StringSwitch.h" #include "llvm/ADT/Triple.h" #include "llvm/ADT/Twine.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/IntrinsicsNVPTX.h" #include "llvm/IR/Type.h" #include "llvm/Support/raw_ostream.h" #include // std::sort using namespace clang; using namespace CodeGen; // Helper for coercing an aggregate argument or return value into an integer // array of the same size (including padding) and alignment. This alternate // coercion happens only for the RenderScript ABI and can be removed after // runtimes that rely on it are no longer supported. // // RenderScript assumes that the size of the argument / return value in the IR // is the same as the size of the corresponding qualified type. This helper // coerces the aggregate type into an array of the same size (including // padding). This coercion is used in lieu of expansion of struct members or // other canonical coercions that return a coerced-type of larger size. // // Ty - The argument / return value type // Context - The associated ASTContext // LLVMContext - The associated LLVMContext static ABIArgInfo coerceToIntArray(QualType Ty, ASTContext &Context, llvm::LLVMContext &LLVMContext) { // Alignment and Size are measured in bits. const uint64_t Size = Context.getTypeSize(Ty); const uint64_t Alignment = Context.getTypeAlign(Ty); llvm::Type *IntType = llvm::Type::getIntNTy(LLVMContext, Alignment); const uint64_t NumElements = (Size + Alignment - 1) / Alignment; return ABIArgInfo::getDirect(llvm::ArrayType::get(IntType, NumElements)); } static void AssignToArrayRange(CodeGen::CGBuilderTy &Builder, llvm::Value *Array, llvm::Value *Value, unsigned FirstIndex, unsigned LastIndex) { // Alternatively, we could emit this as a loop in the source. for (unsigned I = FirstIndex; I <= LastIndex; ++I) { llvm::Value *Cell = Builder.CreateConstInBoundsGEP1_32(Builder.getInt8Ty(), Array, I); Builder.CreateAlignedStore(Value, Cell, CharUnits::One()); } } static bool isAggregateTypeForABI(QualType T) { return !CodeGenFunction::hasScalarEvaluationKind(T) || T->isMemberFunctionPointerType(); } ABIArgInfo ABIInfo::getNaturalAlignIndirect(QualType Ty, bool ByVal, bool Realign, llvm::Type *Padding) const { return ABIArgInfo::getIndirect(getContext().getTypeAlignInChars(Ty), ByVal, Realign, Padding); } ABIArgInfo ABIInfo::getNaturalAlignIndirectInReg(QualType Ty, bool Realign) const { return ABIArgInfo::getIndirectInReg(getContext().getTypeAlignInChars(Ty), /*ByVal*/ false, Realign); } Address ABIInfo::EmitMSVAArg(CodeGenFunction &CGF, Address VAListAddr, QualType Ty) const { return Address::invalid(); } bool ABIInfo::isPromotableIntegerTypeForABI(QualType Ty) const { if (Ty->isPromotableIntegerType()) return true; if (const auto *EIT = Ty->getAs()) if (EIT->getNumBits() < getContext().getTypeSize(getContext().IntTy)) return true; return false; } ABIInfo::~ABIInfo() {} /// Does the given lowering require more than the given number of /// registers when expanded? /// /// This is intended to be the basis of a reasonable basic implementation /// of should{Pass,Return}IndirectlyForSwift. /// /// For most targets, a limit of four total registers is reasonable; this /// limits the amount of code required in order to move around the value /// in case it wasn't produced immediately prior to the call by the caller /// (or wasn't produced in exactly the right registers) or isn't used /// immediately within the callee. But some targets may need to further /// limit the register count due to an inability to support that many /// return registers. static bool occupiesMoreThan(CodeGenTypes &cgt, ArrayRef scalarTypes, unsigned maxAllRegisters) { unsigned intCount = 0, fpCount = 0; for (llvm::Type *type : scalarTypes) { if (type->isPointerTy()) { intCount++; } else if (auto intTy = dyn_cast(type)) { auto ptrWidth = cgt.getTarget().getPointerWidth(0); intCount += (intTy->getBitWidth() + ptrWidth - 1) / ptrWidth; } else { assert(type->isVectorTy() || type->isFloatingPointTy()); fpCount++; } } return (intCount + fpCount > maxAllRegisters); } bool SwiftABIInfo::isLegalVectorTypeForSwift(CharUnits vectorSize, llvm::Type *eltTy, unsigned numElts) const { // The default implementation of this assumes that the target guarantees // 128-bit SIMD support but nothing more. return (vectorSize.getQuantity() > 8 && vectorSize.getQuantity() <= 16); } static CGCXXABI::RecordArgABI getRecordArgABI(const RecordType *RT, CGCXXABI &CXXABI) { const CXXRecordDecl *RD = dyn_cast(RT->getDecl()); if (!RD) { if (!RT->getDecl()->canPassInRegisters()) return CGCXXABI::RAA_Indirect; return CGCXXABI::RAA_Default; } return CXXABI.getRecordArgABI(RD); } static CGCXXABI::RecordArgABI getRecordArgABI(QualType T, CGCXXABI &CXXABI) { const RecordType *RT = T->getAs(); if (!RT) return CGCXXABI::RAA_Default; return getRecordArgABI(RT, CXXABI); } static bool classifyReturnType(const CGCXXABI &CXXABI, CGFunctionInfo &FI, const ABIInfo &Info) { QualType Ty = FI.getReturnType(); if (const auto *RT = Ty->getAs()) if (!isa(RT->getDecl()) && !RT->getDecl()->canPassInRegisters()) { FI.getReturnInfo() = Info.getNaturalAlignIndirect(Ty); return true; } return CXXABI.classifyReturnType(FI); } /// Pass transparent unions as if they were the type of the first element. Sema /// should ensure that all elements of the union have the same "machine type". static QualType useFirstFieldIfTransparentUnion(QualType Ty) { if (const RecordType *UT = Ty->getAsUnionType()) { const RecordDecl *UD = UT->getDecl(); if (UD->hasAttr()) { assert(!UD->field_empty() && "sema created an empty transparent union"); return UD->field_begin()->getType(); } } return Ty; } CGCXXABI &ABIInfo::getCXXABI() const { return CGT.getCXXABI(); } ASTContext &ABIInfo::getContext() const { return CGT.getContext(); } llvm::LLVMContext &ABIInfo::getVMContext() const { return CGT.getLLVMContext(); } const llvm::DataLayout &ABIInfo::getDataLayout() const { return CGT.getDataLayout(); } const TargetInfo &ABIInfo::getTarget() const { return CGT.getTarget(); } const CodeGenOptions &ABIInfo::getCodeGenOpts() const { return CGT.getCodeGenOpts(); } bool ABIInfo::isAndroid() const { return getTarget().getTriple().isAndroid(); } bool ABIInfo::isHomogeneousAggregateBaseType(QualType Ty) const { return false; } bool ABIInfo::isHomogeneousAggregateSmallEnough(const Type *Base, uint64_t Members) const { return false; } LLVM_DUMP_METHOD void ABIArgInfo::dump() const { raw_ostream &OS = llvm::errs(); OS << "(ABIArgInfo Kind="; switch (TheKind) { case Direct: OS << "Direct Type="; if (llvm::Type *Ty = getCoerceToType()) Ty->print(OS); else OS << "null"; break; case Extend: OS << "Extend"; break; case Ignore: OS << "Ignore"; break; case InAlloca: OS << "InAlloca Offset=" << getInAllocaFieldIndex(); break; case Indirect: OS << "Indirect Align=" << getIndirectAlign().getQuantity() << " ByVal=" << getIndirectByVal() << " Realign=" << getIndirectRealign(); break; case IndirectAliased: OS << "Indirect Align=" << getIndirectAlign().getQuantity() << " AadrSpace=" << getIndirectAddrSpace() << " Realign=" << getIndirectRealign(); break; case Expand: OS << "Expand"; break; case CoerceAndExpand: OS << "CoerceAndExpand Type="; getCoerceAndExpandType()->print(OS); break; } OS << ")\n"; } // Dynamically round a pointer up to a multiple of the given alignment. static llvm::Value *emitRoundPointerUpToAlignment(CodeGenFunction &CGF, llvm::Value *Ptr, CharUnits Align) { llvm::Value *PtrAsInt = Ptr; // OverflowArgArea = (OverflowArgArea + Align - 1) & -Align; PtrAsInt = CGF.Builder.CreatePtrToInt(PtrAsInt, CGF.IntPtrTy); PtrAsInt = CGF.Builder.CreateAdd(PtrAsInt, llvm::ConstantInt::get(CGF.IntPtrTy, Align.getQuantity() - 1)); PtrAsInt = CGF.Builder.CreateAnd(PtrAsInt, llvm::ConstantInt::get(CGF.IntPtrTy, -Align.getQuantity())); PtrAsInt = CGF.Builder.CreateIntToPtr(PtrAsInt, Ptr->getType(), Ptr->getName() + ".aligned"); return PtrAsInt; } /// Emit va_arg for a platform using the common void* representation, /// where arguments are simply emitted in an array of slots on the stack. /// /// This version implements the core direct-value passing rules. /// /// \param SlotSize - The size and alignment of a stack slot. /// Each argument will be allocated to a multiple of this number of /// slots, and all the slots will be aligned to this value. /// \param AllowHigherAlign - The slot alignment is not a cap; /// an argument type with an alignment greater than the slot size /// will be emitted on a higher-alignment address, potentially /// leaving one or more empty slots behind as padding. If this /// is false, the returned address might be less-aligned than /// DirectAlign. static Address emitVoidPtrDirectVAArg(CodeGenFunction &CGF, Address VAListAddr, llvm::Type *DirectTy, CharUnits DirectSize, CharUnits DirectAlign, CharUnits SlotSize, bool AllowHigherAlign) { // Cast the element type to i8* if necessary. Some platforms define // va_list as a struct containing an i8* instead of just an i8*. if (VAListAddr.getElementType() != CGF.Int8PtrTy) VAListAddr = CGF.Builder.CreateElementBitCast(VAListAddr, CGF.Int8PtrTy); llvm::Value *Ptr = CGF.Builder.CreateLoad(VAListAddr, "argp.cur"); // If the CC aligns values higher than the slot size, do so if needed. Address Addr = Address::invalid(); if (AllowHigherAlign && DirectAlign > SlotSize) { Addr = Address(emitRoundPointerUpToAlignment(CGF, Ptr, DirectAlign), DirectAlign); } else { Addr = Address(Ptr, SlotSize); } // Advance the pointer past the argument, then store that back. CharUnits FullDirectSize = DirectSize.alignTo(SlotSize); Address NextPtr = CGF.Builder.CreateConstInBoundsByteGEP(Addr, FullDirectSize, "argp.next"); CGF.Builder.CreateStore(NextPtr.getPointer(), VAListAddr); // If the argument is smaller than a slot, and this is a big-endian // target, the argument will be right-adjusted in its slot. if (DirectSize < SlotSize && CGF.CGM.getDataLayout().isBigEndian() && !DirectTy->isStructTy()) { Addr = CGF.Builder.CreateConstInBoundsByteGEP(Addr, SlotSize - DirectSize); } Addr = CGF.Builder.CreateElementBitCast(Addr, DirectTy); return Addr; } /// Emit va_arg for a platform using the common void* representation, /// where arguments are simply emitted in an array of slots on the stack. /// /// \param IsIndirect - Values of this type are passed indirectly. /// \param ValueInfo - The size and alignment of this type, generally /// computed with getContext().getTypeInfoInChars(ValueTy). /// \param SlotSizeAndAlign - The size and alignment of a stack slot. /// Each argument will be allocated to a multiple of this number of /// slots, and all the slots will be aligned to this value. /// \param AllowHigherAlign - The slot alignment is not a cap; /// an argument type with an alignment greater than the slot size /// will be emitted on a higher-alignment address, potentially /// leaving one or more empty slots behind as padding. static Address emitVoidPtrVAArg(CodeGenFunction &CGF, Address VAListAddr, QualType ValueTy, bool IsIndirect, std::pair ValueInfo, CharUnits SlotSizeAndAlign, bool AllowHigherAlign) { // The size and alignment of the value that was passed directly. CharUnits DirectSize, DirectAlign; if (IsIndirect) { DirectSize = CGF.getPointerSize(); DirectAlign = CGF.getPointerAlign(); } else { DirectSize = ValueInfo.first; DirectAlign = ValueInfo.second; } // Cast the address we've calculated to the right type. llvm::Type *DirectTy = CGF.ConvertTypeForMem(ValueTy); if (IsIndirect) DirectTy = DirectTy->getPointerTo(0); Address Addr = emitVoidPtrDirectVAArg(CGF, VAListAddr, DirectTy, DirectSize, DirectAlign, SlotSizeAndAlign, AllowHigherAlign); if (IsIndirect) { Addr = Address(CGF.Builder.CreateLoad(Addr), ValueInfo.second); } return Addr; } static Address emitMergePHI(CodeGenFunction &CGF, Address Addr1, llvm::BasicBlock *Block1, Address Addr2, llvm::BasicBlock *Block2, const llvm::Twine &Name = "") { assert(Addr1.getType() == Addr2.getType()); llvm::PHINode *PHI = CGF.Builder.CreatePHI(Addr1.getType(), 2, Name); PHI->addIncoming(Addr1.getPointer(), Block1); PHI->addIncoming(Addr2.getPointer(), Block2); CharUnits Align = std::min(Addr1.getAlignment(), Addr2.getAlignment()); return Address(PHI, Align); } TargetCodeGenInfo::~TargetCodeGenInfo() = default; // If someone can figure out a general rule for this, that would be great. // It's probably just doomed to be platform-dependent, though. unsigned TargetCodeGenInfo::getSizeOfUnwindException() const { // Verified for: // x86-64 FreeBSD, Linux, Darwin // x86-32 FreeBSD, Linux, Darwin // PowerPC Linux, Darwin // ARM Darwin (*not* EABI) // AArch64 Linux return 32; } bool TargetCodeGenInfo::isNoProtoCallVariadic(const CallArgList &args, const FunctionNoProtoType *fnType) const { // The following conventions are known to require this to be false: // x86_stdcall // MIPS // For everything else, we just prefer false unless we opt out. return false; } void TargetCodeGenInfo::getDependentLibraryOption(llvm::StringRef Lib, llvm::SmallString<24> &Opt) const { // This assumes the user is passing a library name like "rt" instead of a // filename like "librt.a/so", and that they don't care whether it's static or // dynamic. Opt = "-l"; Opt += Lib; } unsigned TargetCodeGenInfo::getOpenCLKernelCallingConv() const { // OpenCL kernels are called via an explicit runtime API with arguments // set with clSetKernelArg(), not as normal sub-functions. // Return SPIR_KERNEL by default as the kernel calling convention to // ensure the fingerprint is fixed such way that each OpenCL argument // gets one matching argument in the produced kernel function argument // list to enable feasible implementation of clSetKernelArg() with // aggregates etc. In case we would use the default C calling conv here, // clSetKernelArg() might break depending on the target-specific // conventions; different targets might split structs passed as values // to multiple function arguments etc. return llvm::CallingConv::SPIR_KERNEL; } llvm::Constant *TargetCodeGenInfo::getNullPointer(const CodeGen::CodeGenModule &CGM, llvm::PointerType *T, QualType QT) const { return llvm::ConstantPointerNull::get(T); } LangAS TargetCodeGenInfo::getGlobalVarAddressSpace(CodeGenModule &CGM, const VarDecl *D) const { assert(!CGM.getLangOpts().OpenCL && !(CGM.getLangOpts().CUDA && CGM.getLangOpts().CUDAIsDevice) && "Address space agnostic languages only"); return D ? D->getType().getAddressSpace() : LangAS::Default; } llvm::Value *TargetCodeGenInfo::performAddrSpaceCast( CodeGen::CodeGenFunction &CGF, llvm::Value *Src, LangAS SrcAddr, LangAS DestAddr, llvm::Type *DestTy, bool isNonNull) const { // Since target may map different address spaces in AST to the same address // space, an address space conversion may end up as a bitcast. if (auto *C = dyn_cast(Src)) return performAddrSpaceCast(CGF.CGM, C, SrcAddr, DestAddr, DestTy); // Try to preserve the source's name to make IR more readable. return CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( Src, DestTy, Src->hasName() ? Src->getName() + ".ascast" : ""); } llvm::Constant * TargetCodeGenInfo::performAddrSpaceCast(CodeGenModule &CGM, llvm::Constant *Src, LangAS SrcAddr, LangAS DestAddr, llvm::Type *DestTy) const { // Since target may map different address spaces in AST to the same address // space, an address space conversion may end up as a bitcast. return llvm::ConstantExpr::getPointerCast(Src, DestTy); } llvm::SyncScope::ID TargetCodeGenInfo::getLLVMSyncScopeID(const LangOptions &LangOpts, SyncScope Scope, llvm::AtomicOrdering Ordering, llvm::LLVMContext &Ctx) const { return Ctx.getOrInsertSyncScopeID(""); /* default sync scope */ } static bool isEmptyRecord(ASTContext &Context, QualType T, bool AllowArrays); /// isEmptyField - Return true iff a the field is "empty", that is it /// is an unnamed bit-field or an (array of) empty record(s). static bool isEmptyField(ASTContext &Context, const FieldDecl *FD, bool AllowArrays) { if (FD->isUnnamedBitfield()) return true; QualType FT = FD->getType(); // Constant arrays of empty records count as empty, strip them off. // Constant arrays of zero length always count as empty. bool WasArray = false; if (AllowArrays) while (const ConstantArrayType *AT = Context.getAsConstantArrayType(FT)) { if (AT->getSize() == 0) return true; FT = AT->getElementType(); // The [[no_unique_address]] special case below does not apply to // arrays of C++ empty records, so we need to remember this fact. WasArray = true; } const RecordType *RT = FT->getAs(); if (!RT) return false; // C++ record fields are never empty, at least in the Itanium ABI. // // FIXME: We should use a predicate for whether this behavior is true in the // current ABI. // // The exception to the above rule are fields marked with the // [[no_unique_address]] attribute (since C++20). Those do count as empty // according to the Itanium ABI. The exception applies only to records, // not arrays of records, so we must also check whether we stripped off an // array type above. if (isa(RT->getDecl()) && (WasArray || !FD->hasAttr())) return false; return isEmptyRecord(Context, FT, AllowArrays); } /// isEmptyRecord - Return true iff a structure contains only empty /// fields. Note that a structure with a flexible array member is not /// considered empty. static bool isEmptyRecord(ASTContext &Context, QualType T, bool AllowArrays) { const RecordType *RT = T->getAs(); if (!RT) return false; const RecordDecl *RD = RT->getDecl(); if (RD->hasFlexibleArrayMember()) return false; // If this is a C++ record, check the bases first. if (const CXXRecordDecl *CXXRD = dyn_cast(RD)) for (const auto &I : CXXRD->bases()) if (!isEmptyRecord(Context, I.getType(), true)) return false; for (const auto *I : RD->fields()) if (!isEmptyField(Context, I, AllowArrays)) return false; return true; } /// isSingleElementStruct - Determine if a structure is a "single /// element struct", i.e. it has exactly one non-empty field or /// exactly one field which is itself a single element /// struct. Structures with flexible array members are never /// considered single element structs. /// /// \return The field declaration for the single non-empty field, if /// it exists. static const Type *isSingleElementStruct(QualType T, ASTContext &Context) { const RecordType *RT = T->getAs(); if (!RT) return nullptr; const RecordDecl *RD = RT->getDecl(); if (RD->hasFlexibleArrayMember()) return nullptr; const Type *Found = nullptr; // If this is a C++ record, check the bases first. if (const CXXRecordDecl *CXXRD = dyn_cast(RD)) { for (const auto &I : CXXRD->bases()) { // Ignore empty records. if (isEmptyRecord(Context, I.getType(), true)) continue; // If we already found an element then this isn't a single-element struct. if (Found) return nullptr; // If this is non-empty and not a single element struct, the composite // cannot be a single element struct. Found = isSingleElementStruct(I.getType(), Context); if (!Found) return nullptr; } } // Check for single element. for (const auto *FD : RD->fields()) { QualType FT = FD->getType(); // Ignore empty fields. if (isEmptyField(Context, FD, true)) continue; // If we already found an element then this isn't a single-element // struct. if (Found) return nullptr; // Treat single element arrays as the element. while (const ConstantArrayType *AT = Context.getAsConstantArrayType(FT)) { if (AT->getSize().getZExtValue() != 1) break; FT = AT->getElementType(); } if (!isAggregateTypeForABI(FT)) { Found = FT.getTypePtr(); } else { Found = isSingleElementStruct(FT, Context); if (!Found) return nullptr; } } // We don't consider a struct a single-element struct if it has // padding beyond the element type. if (Found && Context.getTypeSize(Found) != Context.getTypeSize(T)) return nullptr; return Found; } namespace { Address EmitVAArgInstr(CodeGenFunction &CGF, Address VAListAddr, QualType Ty, const ABIArgInfo &AI) { // This default implementation defers to the llvm backend's va_arg // instruction. It can handle only passing arguments directly // (typically only handled in the backend for primitive types), or // aggregates passed indirectly by pointer (NOTE: if the "byval" // flag has ABI impact in the callee, this implementation cannot // work.) // Only a few cases are covered here at the moment -- those needed // by the default abi. llvm::Value *Val; if (AI.isIndirect()) { assert(!AI.getPaddingType() && "Unexpected PaddingType seen in arginfo in generic VAArg emitter!"); assert( !AI.getIndirectRealign() && "Unexpected IndirectRealign seen in arginfo in generic VAArg emitter!"); auto TyInfo = CGF.getContext().getTypeInfoInChars(Ty); CharUnits TyAlignForABI = TyInfo.second; llvm::Type *BaseTy = llvm::PointerType::getUnqual(CGF.ConvertTypeForMem(Ty)); llvm::Value *Addr = CGF.Builder.CreateVAArg(VAListAddr.getPointer(), BaseTy); return Address(Addr, TyAlignForABI); } else { assert((AI.isDirect() || AI.isExtend()) && "Unexpected ArgInfo Kind in generic VAArg emitter!"); assert(!AI.getInReg() && "Unexpected InReg seen in arginfo in generic VAArg emitter!"); assert(!AI.getPaddingType() && "Unexpected PaddingType seen in arginfo in generic VAArg emitter!"); assert(!AI.getDirectOffset() && "Unexpected DirectOffset seen in arginfo in generic VAArg emitter!"); assert(!AI.getCoerceToType() && "Unexpected CoerceToType seen in arginfo in generic VAArg emitter!"); Address Temp = CGF.CreateMemTemp(Ty, "varet"); Val = CGF.Builder.CreateVAArg(VAListAddr.getPointer(), CGF.ConvertType(Ty)); CGF.Builder.CreateStore(Val, Temp); return Temp; } } /// DefaultABIInfo - The default implementation for ABI specific /// details. This implementation provides information which results in /// self-consistent and sensible LLVM IR generation, but does not /// conform to any particular ABI. class DefaultABIInfo : public ABIInfo { public: DefaultABIInfo(CodeGen::CodeGenTypes &CGT) : ABIInfo(CGT) {} ABIArgInfo classifyReturnType(QualType RetTy) const; ABIArgInfo classifyArgumentType(QualType RetTy) const; void computeInfo(CGFunctionInfo &FI) const override { if (!getCXXABI().classifyReturnType(FI)) FI.getReturnInfo() = classifyReturnType(FI.getReturnType()); for (auto &I : FI.arguments()) I.info = classifyArgumentType(I.type); } Address EmitVAArg(CodeGenFunction &CGF, Address VAListAddr, QualType Ty) const override { return EmitVAArgInstr(CGF, VAListAddr, Ty, classifyArgumentType(Ty)); } }; class DefaultTargetCodeGenInfo : public TargetCodeGenInfo { public: DefaultTargetCodeGenInfo(CodeGen::CodeGenTypes &CGT) : TargetCodeGenInfo(std::make_unique(CGT)) {} }; ABIArgInfo DefaultABIInfo::classifyArgumentType(QualType Ty) const { Ty = useFirstFieldIfTransparentUnion(Ty); if (isAggregateTypeForABI(Ty)) { // Records with non-trivial destructors/copy-constructors should not be // passed by value. if (CGCXXABI::RecordArgABI RAA = getRecordArgABI(Ty, getCXXABI())) return getNaturalAlignIndirect(Ty, RAA == CGCXXABI::RAA_DirectInMemory); return getNaturalAlignIndirect(Ty); } // Treat an enum type as its underlying type. if (const EnumType *EnumTy = Ty->getAs()) Ty = EnumTy->getDecl()->getIntegerType(); ASTContext &Context = getContext(); if (const auto *EIT = Ty->getAs()) if (EIT->getNumBits() > Context.getTypeSize(Context.getTargetInfo().hasInt128Type() ? Context.Int128Ty : Context.LongLongTy)) return getNaturalAlignIndirect(Ty); return (isPromotableIntegerTypeForABI(Ty) ? ABIArgInfo::getExtend(Ty) : ABIArgInfo::getDirect()); } ABIArgInfo DefaultABIInfo::classifyReturnType(QualType RetTy) const { if (RetTy->isVoidType()) return ABIArgInfo::getIgnore(); if (isAggregateTypeForABI(RetTy)) return getNaturalAlignIndirect(RetTy); // Treat an enum type as its underlying type. if (const EnumType *EnumTy = RetTy->getAs()) RetTy = EnumTy->getDecl()->getIntegerType(); if (const auto *EIT = RetTy->getAs()) if (EIT->getNumBits() > getContext().getTypeSize(getContext().getTargetInfo().hasInt128Type() ? getContext().Int128Ty : getContext().LongLongTy)) return getNaturalAlignIndirect(RetTy); return (isPromotableIntegerTypeForABI(RetTy) ? ABIArgInfo::getExtend(RetTy) : ABIArgInfo::getDirect()); } //===----------------------------------------------------------------------===// // WebAssembly ABI Implementation // // This is a very simple ABI that relies a lot on DefaultABIInfo. //===----------------------------------------------------------------------===// class WebAssemblyABIInfo final : public SwiftABIInfo { public: enum ABIKind { MVP = 0, ExperimentalMV = 1, }; private: DefaultABIInfo defaultInfo; ABIKind Kind; public: explicit WebAssemblyABIInfo(CodeGen::CodeGenTypes &CGT, ABIKind Kind) : SwiftABIInfo(CGT), defaultInfo(CGT), Kind(Kind) {} private: ABIArgInfo classifyReturnType(QualType RetTy) const; ABIArgInfo classifyArgumentType(QualType Ty) const; // DefaultABIInfo's classifyReturnType and classifyArgumentType are // non-virtual, but computeInfo and EmitVAArg are virtual, so we // overload them. void computeInfo(CGFunctionInfo &FI) const override { if (!getCXXABI().classifyReturnType(FI)) FI.getReturnInfo() = classifyReturnType(FI.getReturnType()); for (auto &Arg : FI.arguments()) Arg.info = classifyArgumentType(Arg.type); } Address EmitVAArg(CodeGenFunction &CGF, Address VAListAddr, QualType Ty) const override; bool shouldPassIndirectlyForSwift(ArrayRef scalars, bool asReturnValue) const override { return occupiesMoreThan(CGT, scalars, /*total*/ 4); } bool isSwiftErrorInRegister() const override { return false; } }; class WebAssemblyTargetCodeGenInfo final : public TargetCodeGenInfo { public: explicit WebAssemblyTargetCodeGenInfo(CodeGen::CodeGenTypes &CGT, WebAssemblyABIInfo::ABIKind K) : TargetCodeGenInfo(std::make_unique(CGT, K)) {} void setTargetAttributes(const Decl *D, llvm::GlobalValue *GV, CodeGen::CodeGenModule &CGM) const override { TargetCodeGenInfo::setTargetAttributes(D, GV, CGM); if (const auto *FD = dyn_cast_or_null(D)) { if (const auto *Attr = FD->getAttr()) { llvm::Function *Fn = cast(GV); llvm::AttrBuilder B; B.addAttribute("wasm-import-module", Attr->getImportModule()); Fn->addAttributes(llvm::AttributeList::FunctionIndex, B); } if (const auto *Attr = FD->getAttr()) { llvm::Function *Fn = cast(GV); llvm::AttrBuilder B; B.addAttribute("wasm-import-name", Attr->getImportName()); Fn->addAttributes(llvm::AttributeList::FunctionIndex, B); } if (const auto *Attr = FD->getAttr()) { llvm::Function *Fn = cast(GV); llvm::AttrBuilder B; B.addAttribute("wasm-export-name", Attr->getExportName()); Fn->addAttributes(llvm::AttributeList::FunctionIndex, B); } } if (auto *FD = dyn_cast_or_null(D)) { llvm::Function *Fn = cast(GV); if (!FD->doesThisDeclarationHaveABody() && !FD->hasPrototype()) Fn->addFnAttr("no-prototype"); } } }; /// Classify argument of given type \p Ty. ABIArgInfo WebAssemblyABIInfo::classifyArgumentType(QualType Ty) const { Ty = useFirstFieldIfTransparentUnion(Ty); if (isAggregateTypeForABI(Ty)) { // Records with non-trivial destructors/copy-constructors should not be // passed by value. if (auto RAA = getRecordArgABI(Ty, getCXXABI())) return getNaturalAlignIndirect(Ty, RAA == CGCXXABI::RAA_DirectInMemory); // Ignore empty structs/unions. if (isEmptyRecord(getContext(), Ty, true)) return ABIArgInfo::getIgnore(); // Lower single-element structs to just pass a regular value. TODO: We // could do reasonable-size multiple-element structs too, using getExpand(), // though watch out for things like bitfields. if (const Type *SeltTy = isSingleElementStruct(Ty, getContext())) return ABIArgInfo::getDirect(CGT.ConvertType(QualType(SeltTy, 0))); // For the experimental multivalue ABI, fully expand all other aggregates if (Kind == ABIKind::ExperimentalMV) { const RecordType *RT = Ty->getAs(); assert(RT); bool HasBitField = false; for (auto *Field : RT->getDecl()->fields()) { if (Field->isBitField()) { HasBitField = true; break; } } if (!HasBitField) return ABIArgInfo::getExpand(); } } // Otherwise just do the default thing. return defaultInfo.classifyArgumentType(Ty); } ABIArgInfo WebAssemblyABIInfo::classifyReturnType(QualType RetTy) const { if (isAggregateTypeForABI(RetTy)) { // Records with non-trivial destructors/copy-constructors should not be // returned by value. if (!getRecordArgABI(RetTy, getCXXABI())) { // Ignore empty structs/unions. if (isEmptyRecord(getContext(), RetTy, true)) return ABIArgInfo::getIgnore(); // Lower single-element structs to just return a regular value. TODO: We // could do reasonable-size multiple-element structs too, using // ABIArgInfo::getDirect(). if (const Type *SeltTy = isSingleElementStruct(RetTy, getContext())) return ABIArgInfo::getDirect(CGT.ConvertType(QualType(SeltTy, 0))); // For the experimental multivalue ABI, return all other aggregates if (Kind == ABIKind::ExperimentalMV) return ABIArgInfo::getDirect(); } } // Otherwise just do the default thing. return defaultInfo.classifyReturnType(RetTy); } Address WebAssemblyABIInfo::EmitVAArg(CodeGenFunction &CGF, Address VAListAddr, QualType Ty) const { bool IsIndirect = isAggregateTypeForABI(Ty) && !isEmptyRecord(getContext(), Ty, true) && !isSingleElementStruct(Ty, getContext()); return emitVoidPtrVAArg(CGF, VAListAddr, Ty, IsIndirect, getContext().getTypeInfoInChars(Ty), CharUnits::fromQuantity(4), /*AllowHigherAlign=*/true); } //===----------------------------------------------------------------------===// // le32/PNaCl bitcode ABI Implementation // // This is a simplified version of the x86_32 ABI. Arguments and return values // are always passed on the stack. //===----------------------------------------------------------------------===// class PNaClABIInfo : public ABIInfo { public: PNaClABIInfo(CodeGen::CodeGenTypes &CGT) : ABIInfo(CGT) {} ABIArgInfo classifyReturnType(QualType RetTy) const; ABIArgInfo classifyArgumentType(QualType RetTy) const; void computeInfo(CGFunctionInfo &FI) const override; Address EmitVAArg(CodeGenFunction &CGF, Address VAListAddr, QualType Ty) const override; }; class PNaClTargetCodeGenInfo : public TargetCodeGenInfo { public: PNaClTargetCodeGenInfo(CodeGen::CodeGenTypes &CGT) : TargetCodeGenInfo(std::make_unique(CGT)) {} }; void PNaClABIInfo::computeInfo(CGFunctionInfo &FI) const { if (!getCXXABI().classifyReturnType(FI)) FI.getReturnInfo() = classifyReturnType(FI.getReturnType()); for (auto &I : FI.arguments()) I.info = classifyArgumentType(I.type); } Address PNaClABIInfo::EmitVAArg(CodeGenFunction &CGF, Address VAListAddr, QualType Ty) const { // The PNaCL ABI is a bit odd, in that varargs don't use normal // function classification. Structs get passed directly for varargs // functions, through a rewriting transform in // pnacl-llvm/lib/Transforms/NaCl/ExpandVarArgs.cpp, which allows // this target to actually support a va_arg instructions with an // aggregate type, unlike other targets. return EmitVAArgInstr(CGF, VAListAddr, Ty, ABIArgInfo::getDirect()); } /// Classify argument of given type \p Ty. ABIArgInfo PNaClABIInfo::classifyArgumentType(QualType Ty) const { if (isAggregateTypeForABI(Ty)) { if (CGCXXABI::RecordArgABI RAA = getRecordArgABI(Ty, getCXXABI())) return getNaturalAlignIndirect(Ty, RAA == CGCXXABI::RAA_DirectInMemory); return getNaturalAlignIndirect(Ty); } else if (const EnumType *EnumTy = Ty->getAs()) { // Treat an enum type as its underlying type. Ty = EnumTy->getDecl()->getIntegerType(); } else if (Ty->isFloatingType()) { // Floating-point types don't go inreg. return ABIArgInfo::getDirect(); } else if (const auto *EIT = Ty->getAs()) { // Treat extended integers as integers if <=64, otherwise pass indirectly. if (EIT->getNumBits() > 64) return getNaturalAlignIndirect(Ty); return ABIArgInfo::getDirect(); } return (isPromotableIntegerTypeForABI(Ty) ? ABIArgInfo::getExtend(Ty) : ABIArgInfo::getDirect()); } ABIArgInfo PNaClABIInfo::classifyReturnType(QualType RetTy) const { if (RetTy->isVoidType()) return ABIArgInfo::getIgnore(); // In the PNaCl ABI we always return records/structures on the stack. if (isAggregateTypeForABI(RetTy)) return getNaturalAlignIndirect(RetTy); // Treat extended integers as integers if <=64, otherwise pass indirectly. if (const auto *EIT = RetTy->getAs()) { if (EIT->getNumBits() > 64) return getNaturalAlignIndirect(RetTy); return ABIArgInfo::getDirect(); } // Treat an enum type as its underlying type. if (const EnumType *EnumTy = RetTy->getAs()) RetTy = EnumTy->getDecl()->getIntegerType(); return (isPromotableIntegerTypeForABI(RetTy) ? ABIArgInfo::getExtend(RetTy) : ABIArgInfo::getDirect()); } /// IsX86_MMXType - Return true if this is an MMX type. bool IsX86_MMXType(llvm::Type *IRType) { // Return true if the type is an MMX type <2 x i32>, <4 x i16>, or <8 x i8>. return IRType->isVectorTy() && IRType->getPrimitiveSizeInBits() == 64 && cast(IRType)->getElementType()->isIntegerTy() && IRType->getScalarSizeInBits() != 64; } static llvm::Type* X86AdjustInlineAsmType(CodeGen::CodeGenFunction &CGF, StringRef Constraint, llvm::Type* Ty) { bool IsMMXCons = llvm::StringSwitch(Constraint) .Cases("y", "&y", "^Ym", true) .Default(false); if (IsMMXCons && Ty->isVectorTy()) { if (cast(Ty)->getPrimitiveSizeInBits().getFixedSize() != 64) { // Invalid MMX constraint return nullptr; } return llvm::Type::getX86_MMXTy(CGF.getLLVMContext()); } // No operation needed return Ty; } /// Returns true if this type can be passed in SSE registers with the /// X86_VectorCall calling convention. Shared between x86_32 and x86_64. static bool isX86VectorTypeForVectorCall(ASTContext &Context, QualType Ty) { if (const BuiltinType *BT = Ty->getAs()) { if (BT->isFloatingPoint() && BT->getKind() != BuiltinType::Half) { if (BT->getKind() == BuiltinType::LongDouble) { if (&Context.getTargetInfo().getLongDoubleFormat() == &llvm::APFloat::x87DoubleExtended()) return false; } return true; } } else if (const VectorType *VT = Ty->getAs()) { // vectorcall can pass XMM, YMM, and ZMM vectors. We don't pass SSE1 MMX // registers specially. unsigned VecSize = Context.getTypeSize(VT); if (VecSize == 128 || VecSize == 256 || VecSize == 512) return true; } return false; } /// Returns true if this aggregate is small enough to be passed in SSE registers /// in the X86_VectorCall calling convention. Shared between x86_32 and x86_64. static bool isX86VectorCallAggregateSmallEnough(uint64_t NumMembers) { return NumMembers <= 4; } /// Returns a Homogeneous Vector Aggregate ABIArgInfo, used in X86. static ABIArgInfo getDirectX86Hva(llvm::Type* T = nullptr) { auto AI = ABIArgInfo::getDirect(T); AI.setInReg(true); AI.setCanBeFlattened(false); return AI; } //===----------------------------------------------------------------------===// // X86-32 ABI Implementation //===----------------------------------------------------------------------===// /// Similar to llvm::CCState, but for Clang. struct CCState { CCState(CGFunctionInfo &FI) : IsPreassigned(FI.arg_size()), CC(FI.getCallingConvention()) {} llvm::SmallBitVector IsPreassigned; unsigned CC = CallingConv::CC_C; unsigned FreeRegs = 0; unsigned FreeSSERegs = 0; }; enum { // Vectorcall only allows the first 6 parameters to be passed in registers. VectorcallMaxParamNumAsReg = 6 }; /// X86_32ABIInfo - The X86-32 ABI information. class X86_32ABIInfo : public SwiftABIInfo { enum Class { Integer, Float }; static const unsigned MinABIStackAlignInBytes = 4; bool IsDarwinVectorABI; bool IsRetSmallStructInRegABI; bool IsWin32StructABI; bool IsSoftFloatABI; bool IsMCUABI; unsigned DefaultNumRegisterParameters; static bool isRegisterSize(unsigned Size) { return (Size == 8 || Size == 16 || Size == 32 || Size == 64); } bool isHomogeneousAggregateBaseType(QualType Ty) const override { // FIXME: Assumes vectorcall is in use. return isX86VectorTypeForVectorCall(getContext(), Ty); } bool isHomogeneousAggregateSmallEnough(const Type *Ty, uint64_t NumMembers) const override { // FIXME: Assumes vectorcall is in use. return isX86VectorCallAggregateSmallEnough(NumMembers); } bool shouldReturnTypeInRegister(QualType Ty, ASTContext &Context) const; /// getIndirectResult - Give a source type \arg Ty, return a suitable result /// such that the argument will be passed in memory. ABIArgInfo getIndirectResult(QualType Ty, bool ByVal, CCState &State) const; ABIArgInfo getIndirectReturnResult(QualType Ty, CCState &State) const; /// Return the alignment to use for the given type on the stack. unsigned getTypeStackAlignInBytes(QualType Ty, unsigned Align) const; Class classify(QualType Ty) const; ABIArgInfo classifyReturnType(QualType RetTy, CCState &State) const; ABIArgInfo classifyArgumentType(QualType RetTy, CCState &State) const; /// Updates the number of available free registers, returns /// true if any registers were allocated. bool updateFreeRegs(QualType Ty, CCState &State) const; bool shouldAggregateUseDirect(QualType Ty, CCState &State, bool &InReg, bool &NeedsPadding) const; bool shouldPrimitiveUseInReg(QualType Ty, CCState &State) const; bool canExpandIndirectArgument(QualType Ty) const; /// Rewrite the function info so that all memory arguments use /// inalloca. void rewriteWithInAlloca(CGFunctionInfo &FI) const; void addFieldToArgStruct(SmallVector &FrameFields, CharUnits &StackOffset, ABIArgInfo &Info, QualType Type) const; void runVectorCallFirstPass(CGFunctionInfo &FI, CCState &State) const; public: void computeInfo(CGFunctionInfo &FI) const override; Address EmitVAArg(CodeGenFunction &CGF, Address VAListAddr, QualType Ty) const override; X86_32ABIInfo(CodeGen::CodeGenTypes &CGT, bool DarwinVectorABI, bool RetSmallStructInRegABI, bool Win32StructABI, unsigned NumRegisterParameters, bool SoftFloatABI) : SwiftABIInfo(CGT), IsDarwinVectorABI(DarwinVectorABI), IsRetSmallStructInRegABI(RetSmallStructInRegABI), IsWin32StructABI(Win32StructABI), IsSoftFloatABI(SoftFloatABI), IsMCUABI(CGT.getTarget().getTriple().isOSIAMCU()), DefaultNumRegisterParameters(NumRegisterParameters) {} bool shouldPassIndirectlyForSwift(ArrayRef scalars, bool asReturnValue) const override { // LLVM's x86-32 lowering currently only assigns up to three // integer registers and three fp registers. Oddly, it'll use up to // four vector registers for vectors, but those can overlap with the // scalar registers. return occupiesMoreThan(CGT, scalars, /*total*/ 3); } bool isSwiftErrorInRegister() const override { // x86-32 lowering does not support passing swifterror in a register. return false; } }; class X86_32TargetCodeGenInfo : public TargetCodeGenInfo { public: X86_32TargetCodeGenInfo(CodeGen::CodeGenTypes &CGT, bool DarwinVectorABI, bool RetSmallStructInRegABI, bool Win32StructABI, unsigned NumRegisterParameters, bool SoftFloatABI) : TargetCodeGenInfo(std::make_unique( CGT, DarwinVectorABI, RetSmallStructInRegABI, Win32StructABI, NumRegisterParameters, SoftFloatABI)) {} static bool isStructReturnInRegABI( const llvm::Triple &Triple, const CodeGenOptions &Opts); void setTargetAttributes(const Decl *D, llvm::GlobalValue *GV, CodeGen::CodeGenModule &CGM) const override; int getDwarfEHStackPointer(CodeGen::CodeGenModule &CGM) const override { // Darwin uses different dwarf register numbers for EH. if (CGM.getTarget().getTriple().isOSDarwin()) return 5; return 4; } bool initDwarfEHRegSizeTable(CodeGen::CodeGenFunction &CGF, llvm::Value *Address) const override; llvm::Type* adjustInlineAsmType(CodeGen::CodeGenFunction &CGF, StringRef Constraint, llvm::Type* Ty) const override { return X86AdjustInlineAsmType(CGF, Constraint, Ty); } void addReturnRegisterOutputs(CodeGenFunction &CGF, LValue ReturnValue, std::string &Constraints, std::vector &ResultRegTypes, std::vector &ResultTruncRegTypes, std::vector &ResultRegDests, std::string &AsmString, unsigned NumOutputs) const override; llvm::Constant * getUBSanFunctionSignature(CodeGen::CodeGenModule &CGM) const override { unsigned Sig = (0xeb << 0) | // jmp rel8 (0x06 << 8) | // .+0x08 ('v' << 16) | ('2' << 24); return llvm::ConstantInt::get(CGM.Int32Ty, Sig); } StringRef getARCRetainAutoreleasedReturnValueMarker() const override { return "movl\t%ebp, %ebp" "\t\t// marker for objc_retainAutoreleaseReturnValue"; } }; } /// Rewrite input constraint references after adding some output constraints. /// In the case where there is one output and one input and we add one output, /// we need to replace all operand references greater than or equal to 1: /// mov $0, $1 /// mov eax, $1 /// The result will be: /// mov $0, $2 /// mov eax, $2 static void rewriteInputConstraintReferences(unsigned FirstIn, unsigned NumNewOuts, std::string &AsmString) { std::string Buf; llvm::raw_string_ostream OS(Buf); size_t Pos = 0; while (Pos < AsmString.size()) { size_t DollarStart = AsmString.find('$', Pos); if (DollarStart == std::string::npos) DollarStart = AsmString.size(); size_t DollarEnd = AsmString.find_first_not_of('$', DollarStart); if (DollarEnd == std::string::npos) DollarEnd = AsmString.size(); OS << StringRef(&AsmString[Pos], DollarEnd - Pos); Pos = DollarEnd; size_t NumDollars = DollarEnd - DollarStart; if (NumDollars % 2 != 0 && Pos < AsmString.size()) { // We have an operand reference. size_t DigitStart = Pos; if (AsmString[DigitStart] == '{') { OS << '{'; ++DigitStart; } size_t DigitEnd = AsmString.find_first_not_of("0123456789", DigitStart); if (DigitEnd == std::string::npos) DigitEnd = AsmString.size(); StringRef OperandStr(&AsmString[DigitStart], DigitEnd - DigitStart); unsigned OperandIndex; if (!OperandStr.getAsInteger(10, OperandIndex)) { if (OperandIndex >= FirstIn) OperandIndex += NumNewOuts; OS << OperandIndex; } else { OS << OperandStr; } Pos = DigitEnd; } } AsmString = std::move(OS.str()); } /// Add output constraints for EAX:EDX because they are return registers. void X86_32TargetCodeGenInfo::addReturnRegisterOutputs( CodeGenFunction &CGF, LValue ReturnSlot, std::string &Constraints, std::vector &ResultRegTypes, std::vector &ResultTruncRegTypes, std::vector &ResultRegDests, std::string &AsmString, unsigned NumOutputs) const { uint64_t RetWidth = CGF.getContext().getTypeSize(ReturnSlot.getType()); // Use the EAX constraint if the width is 32 or smaller and EAX:EDX if it is // larger. if (!Constraints.empty()) Constraints += ','; if (RetWidth <= 32) { Constraints += "={eax}"; ResultRegTypes.push_back(CGF.Int32Ty); } else { // Use the 'A' constraint for EAX:EDX. Constraints += "=A"; ResultRegTypes.push_back(CGF.Int64Ty); } // Truncate EAX or EAX:EDX to an integer of the appropriate size. llvm::Type *CoerceTy = llvm::IntegerType::get(CGF.getLLVMContext(), RetWidth); ResultTruncRegTypes.push_back(CoerceTy); // Coerce the integer by bitcasting the return slot pointer. ReturnSlot.setAddress(CGF.Builder.CreateBitCast(ReturnSlot.getAddress(CGF), CoerceTy->getPointerTo())); ResultRegDests.push_back(ReturnSlot); rewriteInputConstraintReferences(NumOutputs, 1, AsmString); } /// shouldReturnTypeInRegister - Determine if the given type should be /// returned in a register (for the Darwin and MCU ABI). bool X86_32ABIInfo::shouldReturnTypeInRegister(QualType Ty, ASTContext &Context) const { uint64_t Size = Context.getTypeSize(Ty); // For i386, type must be register sized. // For the MCU ABI, it only needs to be <= 8-byte if ((IsMCUABI && Size > 64) || (!IsMCUABI && !isRegisterSize(Size))) return false; if (Ty->isVectorType()) { // 64- and 128- bit vectors inside structures are not returned in // registers. if (Size == 64 || Size == 128) return false; return true; } // If this is a builtin, pointer, enum, complex type, member pointer, or // member function pointer it is ok. if (Ty->getAs() || Ty->hasPointerRepresentation() || Ty->isAnyComplexType() || Ty->isEnumeralType() || Ty->isBlockPointerType() || Ty->isMemberPointerType()) return true; // Arrays are treated like records. if (const ConstantArrayType *AT = Context.getAsConstantArrayType(Ty)) return shouldReturnTypeInRegister(AT->getElementType(), Context); // Otherwise, it must be a record type. const RecordType *RT = Ty->getAs(); if (!RT) return false; // FIXME: Traverse bases here too. // Structure types are passed in register if all fields would be // passed in a register. for (const auto *FD : RT->getDecl()->fields()) { // Empty fields are ignored. if (isEmptyField(Context, FD, true)) continue; // Check fields recursively. if (!shouldReturnTypeInRegister(FD->getType(), Context)) return false; } return true; } static bool is32Or64BitBasicType(QualType Ty, ASTContext &Context) { // Treat complex types as the element type. if (const ComplexType *CTy = Ty->getAs()) Ty = CTy->getElementType(); // Check for a type which we know has a simple scalar argument-passing // convention without any padding. (We're specifically looking for 32 // and 64-bit integer and integer-equivalents, float, and double.) if (!Ty->getAs() && !Ty->hasPointerRepresentation() && !Ty->isEnumeralType() && !Ty->isBlockPointerType()) return false; uint64_t Size = Context.getTypeSize(Ty); return Size == 32 || Size == 64; } static bool addFieldSizes(ASTContext &Context, const RecordDecl *RD, uint64_t &Size) { for (const auto *FD : RD->fields()) { // Scalar arguments on the stack get 4 byte alignment on x86. If the // argument is smaller than 32-bits, expanding the struct will create // alignment padding. if (!is32Or64BitBasicType(FD->getType(), Context)) return false; // FIXME: Reject bit-fields wholesale; there are two problems, we don't know // how to expand them yet, and the predicate for telling if a bitfield still // counts as "basic" is more complicated than what we were doing previously. if (FD->isBitField()) return false; Size += Context.getTypeSize(FD->getType()); } return true; } static bool addBaseAndFieldSizes(ASTContext &Context, const CXXRecordDecl *RD, uint64_t &Size) { // Don't do this if there are any non-empty bases. for (const CXXBaseSpecifier &Base : RD->bases()) { if (!addBaseAndFieldSizes(Context, Base.getType()->getAsCXXRecordDecl(), Size)) return false; } if (!addFieldSizes(Context, RD, Size)) return false; return true; } /// Test whether an argument type which is to be passed indirectly (on the /// stack) would have the equivalent layout if it was expanded into separate /// arguments. If so, we prefer to do the latter to avoid inhibiting /// optimizations. bool X86_32ABIInfo::canExpandIndirectArgument(QualType Ty) const { // We can only expand structure types. const RecordType *RT = Ty->getAs(); if (!RT) return false; const RecordDecl *RD = RT->getDecl(); uint64_t Size = 0; if (const CXXRecordDecl *CXXRD = dyn_cast(RD)) { if (!IsWin32StructABI) { // On non-Windows, we have to conservatively match our old bitcode // prototypes in order to be ABI-compatible at the bitcode level. if (!CXXRD->isCLike()) return false; } else { // Don't do this for dynamic classes. if (CXXRD->isDynamicClass()) return false; } if (!addBaseAndFieldSizes(getContext(), CXXRD, Size)) return false; } else { if (!addFieldSizes(getContext(), RD, Size)) return false; } // We can do this if there was no alignment padding. return Size == getContext().getTypeSize(Ty); } ABIArgInfo X86_32ABIInfo::getIndirectReturnResult(QualType RetTy, CCState &State) const { // If the return value is indirect, then the hidden argument is consuming one // integer register. if (State.FreeRegs) { --State.FreeRegs; if (!IsMCUABI) return getNaturalAlignIndirectInReg(RetTy); } return getNaturalAlignIndirect(RetTy, /*ByVal=*/false); } ABIArgInfo X86_32ABIInfo::classifyReturnType(QualType RetTy, CCState &State) const { if (RetTy->isVoidType()) return ABIArgInfo::getIgnore(); const Type *Base = nullptr; uint64_t NumElts = 0; if ((State.CC == llvm::CallingConv::X86_VectorCall || State.CC == llvm::CallingConv::X86_RegCall) && isHomogeneousAggregate(RetTy, Base, NumElts)) { // The LLVM struct type for such an aggregate should lower properly. return ABIArgInfo::getDirect(); } if (const VectorType *VT = RetTy->getAs()) { // On Darwin, some vectors are returned in registers. if (IsDarwinVectorABI) { uint64_t Size = getContext().getTypeSize(RetTy); // 128-bit vectors are a special case; they are returned in // registers and we need to make sure to pick a type the LLVM // backend will like. if (Size == 128) return ABIArgInfo::getDirect(llvm::FixedVectorType::get( llvm::Type::getInt64Ty(getVMContext()), 2)); // Always return in register if it fits in a general purpose // register, or if it is 64 bits and has a single element. if ((Size == 8 || Size == 16 || Size == 32) || (Size == 64 && VT->getNumElements() == 1)) return ABIArgInfo::getDirect(llvm::IntegerType::get(getVMContext(), Size)); return getIndirectReturnResult(RetTy, State); } return ABIArgInfo::getDirect(); } if (isAggregateTypeForABI(RetTy)) { if (const RecordType *RT = RetTy->getAs()) { // Structures with flexible arrays are always indirect. if (RT->getDecl()->hasFlexibleArrayMember()) return getIndirectReturnResult(RetTy, State); } // If specified, structs and unions are always indirect. if (!IsRetSmallStructInRegABI && !RetTy->isAnyComplexType()) return getIndirectReturnResult(RetTy, State); // Ignore empty structs/unions. if (isEmptyRecord(getContext(), RetTy, true)) return ABIArgInfo::getIgnore(); // Small structures which are register sized are generally returned // in a register. if (shouldReturnTypeInRegister(RetTy, getContext())) { uint64_t Size = getContext().getTypeSize(RetTy); // As a special-case, if the struct is a "single-element" struct, and // the field is of type "float" or "double", return it in a // floating-point register. (MSVC does not apply this special case.) // We apply a similar transformation for pointer types to improve the // quality of the generated IR. if (const Type *SeltTy = isSingleElementStruct(RetTy, getContext())) if ((!IsWin32StructABI && SeltTy->isRealFloatingType()) || SeltTy->hasPointerRepresentation()) return ABIArgInfo::getDirect(CGT.ConvertType(QualType(SeltTy, 0))); // FIXME: We should be able to narrow this integer in cases with dead // padding. return ABIArgInfo::getDirect(llvm::IntegerType::get(getVMContext(),Size)); } return getIndirectReturnResult(RetTy, State); } // Treat an enum type as its underlying type. if (const EnumType *EnumTy = RetTy->getAs()) RetTy = EnumTy->getDecl()->getIntegerType(); if (const auto *EIT = RetTy->getAs()) if (EIT->getNumBits() > 64) return getIndirectReturnResult(RetTy, State); return (isPromotableIntegerTypeForABI(RetTy) ? ABIArgInfo::getExtend(RetTy) : ABIArgInfo::getDirect()); } static bool isSIMDVectorType(ASTContext &Context, QualType Ty) { return Ty->getAs() && Context.getTypeSize(Ty) == 128; } static bool isRecordWithSIMDVectorType(ASTContext &Context, QualType Ty) { const RecordType *RT = Ty->getAs(); if (!RT) return 0; const RecordDecl *RD = RT->getDecl(); // If this is a C++ record, check the bases first. if (const CXXRecordDecl *CXXRD = dyn_cast(RD)) for (const auto &I : CXXRD->bases()) if (!isRecordWithSIMDVectorType(Context, I.getType())) return false; for (const auto *i : RD->fields()) { QualType FT = i->getType(); if (isSIMDVectorType(Context, FT)) return true; if (isRecordWithSIMDVectorType(Context, FT)) return true; } return false; } unsigned X86_32ABIInfo::getTypeStackAlignInBytes(QualType Ty, unsigned Align) const { // Otherwise, if the alignment is less than or equal to the minimum ABI // alignment, just use the default; the backend will handle this. if (Align <= MinABIStackAlignInBytes) return 0; // Use default alignment. // On non-Darwin, the stack type alignment is always 4. if (!IsDarwinVectorABI) { // Set explicit alignment, since we may need to realign the top. return MinABIStackAlignInBytes; } // Otherwise, if the type contains an SSE vector type, the alignment is 16. if (Align >= 16 && (isSIMDVectorType(getContext(), Ty) || isRecordWithSIMDVectorType(getContext(), Ty))) return 16; return MinABIStackAlignInBytes; } ABIArgInfo X86_32ABIInfo::getIndirectResult(QualType Ty, bool ByVal, CCState &State) const { if (!ByVal) { if (State.FreeRegs) { --State.FreeRegs; // Non-byval indirects just use one pointer. if (!IsMCUABI) return getNaturalAlignIndirectInReg(Ty); } return getNaturalAlignIndirect(Ty, false); } // Compute the byval alignment. unsigned TypeAlign = getContext().getTypeAlign(Ty) / 8; unsigned StackAlign = getTypeStackAlignInBytes(Ty, TypeAlign); if (StackAlign == 0) return ABIArgInfo::getIndirect(CharUnits::fromQuantity(4), /*ByVal=*/true); // If the stack alignment is less than the type alignment, realign the // argument. bool Realign = TypeAlign > StackAlign; return ABIArgInfo::getIndirect(CharUnits::fromQuantity(StackAlign), /*ByVal=*/true, Realign); } X86_32ABIInfo::Class X86_32ABIInfo::classify(QualType Ty) const { const Type *T = isSingleElementStruct(Ty, getContext()); if (!T) T = Ty.getTypePtr(); if (const BuiltinType *BT = T->getAs()) { BuiltinType::Kind K = BT->getKind(); if (K == BuiltinType::Float || K == BuiltinType::Double) return Float; } return Integer; } bool X86_32ABIInfo::updateFreeRegs(QualType Ty, CCState &State) const { if (!IsSoftFloatABI) { Class C = classify(Ty); if (C == Float) return false; } unsigned Size = getContext().getTypeSize(Ty); unsigned SizeInRegs = (Size + 31) / 32; if (SizeInRegs == 0) return false; if (!IsMCUABI) { if (SizeInRegs > State.FreeRegs) { State.FreeRegs = 0; return false; } } else { // The MCU psABI allows passing parameters in-reg even if there are // earlier parameters that are passed on the stack. Also, // it does not allow passing >8-byte structs in-register, // even if there are 3 free registers available. if (SizeInRegs > State.FreeRegs || SizeInRegs > 2) return false; } State.FreeRegs -= SizeInRegs; return true; } bool X86_32ABIInfo::shouldAggregateUseDirect(QualType Ty, CCState &State, bool &InReg, bool &NeedsPadding) const { // On Windows, aggregates other than HFAs are never passed in registers, and // they do not consume register slots. Homogenous floating-point aggregates // (HFAs) have already been dealt with at this point. if (IsWin32StructABI && isAggregateTypeForABI(Ty)) return false; NeedsPadding = false; InReg = !IsMCUABI; if (!updateFreeRegs(Ty, State)) return false; if (IsMCUABI) return true; if (State.CC == llvm::CallingConv::X86_FastCall || State.CC == llvm::CallingConv::X86_VectorCall || State.CC == llvm::CallingConv::X86_RegCall) { if (getContext().getTypeSize(Ty) <= 32 && State.FreeRegs) NeedsPadding = true; return false; } return true; } bool X86_32ABIInfo::shouldPrimitiveUseInReg(QualType Ty, CCState &State) const { if (!updateFreeRegs(Ty, State)) return false; if (IsMCUABI) return false; if (State.CC == llvm::CallingConv::X86_FastCall || State.CC == llvm::CallingConv::X86_VectorCall || State.CC == llvm::CallingConv::X86_RegCall) { if (getContext().getTypeSize(Ty) > 32) return false; return (Ty->isIntegralOrEnumerationType() || Ty->isPointerType() || Ty->isReferenceType()); } return true; } void X86_32ABIInfo::runVectorCallFirstPass(CGFunctionInfo &FI, CCState &State) const { // Vectorcall x86 works subtly different than in x64, so the format is // a bit different than the x64 version. First, all vector types (not HVAs) // are assigned, with the first 6 ending up in the [XYZ]MM0-5 registers. // This differs from the x64 implementation, where the first 6 by INDEX get // registers. // In the second pass over the arguments, HVAs are passed in the remaining // vector registers if possible, or indirectly by address. The address will be // passed in ECX/EDX if available. Any other arguments are passed according to // the usual fastcall rules. MutableArrayRef Args = FI.arguments(); for (int I = 0, E = Args.size(); I < E; ++I) { const Type *Base = nullptr; uint64_t NumElts = 0; const QualType &Ty = Args[I].type; if ((Ty->isVectorType() || Ty->isBuiltinType()) && isHomogeneousAggregate(Ty, Base, NumElts)) { if (State.FreeSSERegs >= NumElts) { State.FreeSSERegs -= NumElts; Args[I].info = ABIArgInfo::getDirectInReg(); State.IsPreassigned.set(I); } } } } ABIArgInfo X86_32ABIInfo::classifyArgumentType(QualType Ty, CCState &State) const { // FIXME: Set alignment on indirect arguments. bool IsFastCall = State.CC == llvm::CallingConv::X86_FastCall; bool IsRegCall = State.CC == llvm::CallingConv::X86_RegCall; bool IsVectorCall = State.CC == llvm::CallingConv::X86_VectorCall; Ty = useFirstFieldIfTransparentUnion(Ty); TypeInfo TI = getContext().getTypeInfo(Ty); // Check with the C++ ABI first. const RecordType *RT = Ty->getAs(); if (RT) { CGCXXABI::RecordArgABI RAA = getRecordArgABI(RT, getCXXABI()); if (RAA == CGCXXABI::RAA_Indirect) { return getIndirectResult(Ty, false, State); } else if (RAA == CGCXXABI::RAA_DirectInMemory) { // The field index doesn't matter, we'll fix it up later. return ABIArgInfo::getInAlloca(/*FieldIndex=*/0); } } // Regcall uses the concept of a homogenous vector aggregate, similar // to other targets. const Type *Base = nullptr; uint64_t NumElts = 0; if ((IsRegCall || IsVectorCall) && isHomogeneousAggregate(Ty, Base, NumElts)) { if (State.FreeSSERegs >= NumElts) { State.FreeSSERegs -= NumElts; // Vectorcall passes HVAs directly and does not flatten them, but regcall // does. if (IsVectorCall) return getDirectX86Hva(); if (Ty->isBuiltinType() || Ty->isVectorType()) return ABIArgInfo::getDirect(); return ABIArgInfo::getExpand(); } return getIndirectResult(Ty, /*ByVal=*/false, State); } if (isAggregateTypeForABI(Ty)) { // Structures with flexible arrays are always indirect. // FIXME: This should not be byval! if (RT && RT->getDecl()->hasFlexibleArrayMember()) return getIndirectResult(Ty, true, State); // Ignore empty structs/unions on non-Windows. if (!IsWin32StructABI && isEmptyRecord(getContext(), Ty, true)) return ABIArgInfo::getIgnore(); llvm::LLVMContext &LLVMContext = getVMContext(); llvm::IntegerType *Int32 = llvm::Type::getInt32Ty(LLVMContext); bool NeedsPadding = false; bool InReg; if (shouldAggregateUseDirect(Ty, State, InReg, NeedsPadding)) { unsigned SizeInRegs = (TI.Width + 31) / 32; SmallVector Elements(SizeInRegs, Int32); llvm::Type *Result = llvm::StructType::get(LLVMContext, Elements); if (InReg) return ABIArgInfo::getDirectInReg(Result); else return ABIArgInfo::getDirect(Result); } llvm::IntegerType *PaddingType = NeedsPadding ? Int32 : nullptr; // Pass over-aligned aggregates on Windows indirectly. This behavior was // added in MSVC 2015. if (IsWin32StructABI && TI.AlignIsRequired && TI.Align > 32) return getIndirectResult(Ty, /*ByVal=*/false, State); // Expand small (<= 128-bit) record types when we know that the stack layout // of those arguments will match the struct. This is important because the // LLVM backend isn't smart enough to remove byval, which inhibits many // optimizations. // Don't do this for the MCU if there are still free integer registers // (see X86_64 ABI for full explanation). if (TI.Width <= 4 * 32 && (!IsMCUABI || State.FreeRegs == 0) && canExpandIndirectArgument(Ty)) return ABIArgInfo::getExpandWithPadding( IsFastCall || IsVectorCall || IsRegCall, PaddingType); return getIndirectResult(Ty, true, State); } if (const VectorType *VT = Ty->getAs()) { // On Windows, vectors are passed directly if registers are available, or // indirectly if not. This avoids the need to align argument memory. Pass // user-defined vector types larger than 512 bits indirectly for simplicity. if (IsWin32StructABI) { if (TI.Width <= 512 && State.FreeSSERegs > 0) { --State.FreeSSERegs; return ABIArgInfo::getDirectInReg(); } return getIndirectResult(Ty, /*ByVal=*/false, State); } // On Darwin, some vectors are passed in memory, we handle this by passing // it as an i8/i16/i32/i64. if (IsDarwinVectorABI) { if ((TI.Width == 8 || TI.Width == 16 || TI.Width == 32) || (TI.Width == 64 && VT->getNumElements() == 1)) return ABIArgInfo::getDirect( llvm::IntegerType::get(getVMContext(), TI.Width)); } if (IsX86_MMXType(CGT.ConvertType(Ty))) return ABIArgInfo::getDirect(llvm::IntegerType::get(getVMContext(), 64)); return ABIArgInfo::getDirect(); } if (const EnumType *EnumTy = Ty->getAs()) Ty = EnumTy->getDecl()->getIntegerType(); bool InReg = shouldPrimitiveUseInReg(Ty, State); if (isPromotableIntegerTypeForABI(Ty)) { if (InReg) return ABIArgInfo::getExtendInReg(Ty); return ABIArgInfo::getExtend(Ty); } if (const auto * EIT = Ty->getAs()) { if (EIT->getNumBits() <= 64) { if (InReg) return ABIArgInfo::getDirectInReg(); return ABIArgInfo::getDirect(); } return getIndirectResult(Ty, /*ByVal=*/false, State); } if (InReg) return ABIArgInfo::getDirectInReg(); return ABIArgInfo::getDirect(); } void X86_32ABIInfo::computeInfo(CGFunctionInfo &FI) const { CCState State(FI); if (IsMCUABI) State.FreeRegs = 3; else if (State.CC == llvm::CallingConv::X86_FastCall) { State.FreeRegs = 2; State.FreeSSERegs = 3; } else if (State.CC == llvm::CallingConv::X86_VectorCall) { State.FreeRegs = 2; State.FreeSSERegs = 6; } else if (FI.getHasRegParm()) State.FreeRegs = FI.getRegParm(); else if (State.CC == llvm::CallingConv::X86_RegCall) { State.FreeRegs = 5; State.FreeSSERegs = 8; } else if (IsWin32StructABI) { // Since MSVC 2015, the first three SSE vectors have been passed in // registers. The rest are passed indirectly. State.FreeRegs = DefaultNumRegisterParameters; State.FreeSSERegs = 3; } else State.FreeRegs = DefaultNumRegisterParameters; if (!::classifyReturnType(getCXXABI(), FI, *this)) { FI.getReturnInfo() = classifyReturnType(FI.getReturnType(), State); } else if (FI.getReturnInfo().isIndirect()) { // The C++ ABI is not aware of register usage, so we have to check if the // return value was sret and put it in a register ourselves if appropriate. if (State.FreeRegs) { --State.FreeRegs; // The sret parameter consumes a register. if (!IsMCUABI) FI.getReturnInfo().setInReg(true); } } // The chain argument effectively gives us another free register. if (FI.isChainCall()) ++State.FreeRegs; // For vectorcall, do a first pass over the arguments, assigning FP and vector // arguments to XMM registers as available. if (State.CC == llvm::CallingConv::X86_VectorCall) runVectorCallFirstPass(FI, State); bool UsedInAlloca = false; MutableArrayRef Args = FI.arguments(); for (int I = 0, E = Args.size(); I < E; ++I) { // Skip arguments that have already been assigned. if (State.IsPreassigned.test(I)) continue; Args[I].info = classifyArgumentType(Args[I].type, State); UsedInAlloca |= (Args[I].info.getKind() == ABIArgInfo::InAlloca); } // If we needed to use inalloca for any argument, do a second pass and rewrite // all the memory arguments to use inalloca. if (UsedInAlloca) rewriteWithInAlloca(FI); } void X86_32ABIInfo::addFieldToArgStruct(SmallVector &FrameFields, CharUnits &StackOffset, ABIArgInfo &Info, QualType Type) const { // Arguments are always 4-byte-aligned. CharUnits WordSize = CharUnits::fromQuantity(4); assert(StackOffset.isMultipleOf(WordSize) && "unaligned inalloca struct"); // sret pointers and indirect things will require an extra pointer // indirection, unless they are byval. Most things are byval, and will not // require this indirection. bool IsIndirect = false; if (Info.isIndirect() && !Info.getIndirectByVal()) IsIndirect = true; Info = ABIArgInfo::getInAlloca(FrameFields.size(), IsIndirect); llvm::Type *LLTy = CGT.ConvertTypeForMem(Type); if (IsIndirect) LLTy = LLTy->getPointerTo(0); FrameFields.push_back(LLTy); StackOffset += IsIndirect ? WordSize : getContext().getTypeSizeInChars(Type); // Insert padding bytes to respect alignment. CharUnits FieldEnd = StackOffset; StackOffset = FieldEnd.alignTo(WordSize); if (StackOffset != FieldEnd) { CharUnits NumBytes = StackOffset - FieldEnd; llvm::Type *Ty = llvm::Type::getInt8Ty(getVMContext()); Ty = llvm::ArrayType::get(Ty, NumBytes.getQuantity()); FrameFields.push_back(Ty); } } static bool isArgInAlloca(const ABIArgInfo &Info) { // Leave ignored and inreg arguments alone. switch (Info.getKind()) { case ABIArgInfo::InAlloca: return true; case ABIArgInfo::Ignore: case ABIArgInfo::IndirectAliased: return false; case ABIArgInfo::Indirect: case ABIArgInfo::Direct: case ABIArgInfo::Extend: return !Info.getInReg(); case ABIArgInfo::Expand: case ABIArgInfo::CoerceAndExpand: // These are aggregate types which are never passed in registers when // inalloca is involved. return true; } llvm_unreachable("invalid enum"); } void X86_32ABIInfo::rewriteWithInAlloca(CGFunctionInfo &FI) const { assert(IsWin32StructABI && "inalloca only supported on win32"); // Build a packed struct type for all of the arguments in memory. SmallVector FrameFields; // The stack alignment is always 4. CharUnits StackAlign = CharUnits::fromQuantity(4); CharUnits StackOffset; CGFunctionInfo::arg_iterator I = FI.arg_begin(), E = FI.arg_end(); // Put 'this' into the struct before 'sret', if necessary. bool IsThisCall = FI.getCallingConvention() == llvm::CallingConv::X86_ThisCall; ABIArgInfo &Ret = FI.getReturnInfo(); if (Ret.isIndirect() && Ret.isSRetAfterThis() && !IsThisCall && isArgInAlloca(I->info)) { addFieldToArgStruct(FrameFields, StackOffset, I->info, I->type); ++I; } // Put the sret parameter into the inalloca struct if it's in memory. if (Ret.isIndirect() && !Ret.getInReg()) { addFieldToArgStruct(FrameFields, StackOffset, Ret, FI.getReturnType()); // On Windows, the hidden sret parameter is always returned in eax. Ret.setInAllocaSRet(IsWin32StructABI); } // Skip the 'this' parameter in ecx. if (IsThisCall) ++I; // Put arguments passed in memory into the struct. for (; I != E; ++I) { if (isArgInAlloca(I->info)) addFieldToArgStruct(FrameFields, StackOffset, I->info, I->type); } FI.setArgStruct(llvm::StructType::get(getVMContext(), FrameFields, /*isPacked=*/true), StackAlign); } Address X86_32ABIInfo::EmitVAArg(CodeGenFunction &CGF, Address VAListAddr, QualType Ty) const { auto TypeInfo = getContext().getTypeInfoInChars(Ty); // x86-32 changes the alignment of certain arguments on the stack. // // Just messing with TypeInfo like this works because we never pass // anything indirectly. TypeInfo.second = CharUnits::fromQuantity( getTypeStackAlignInBytes(Ty, TypeInfo.second.getQuantity())); return emitVoidPtrVAArg(CGF, VAListAddr, Ty, /*Indirect*/ false, TypeInfo, CharUnits::fromQuantity(4), /*AllowHigherAlign*/ true); } bool X86_32TargetCodeGenInfo::isStructReturnInRegABI( const llvm::Triple &Triple, const CodeGenOptions &Opts) { assert(Triple.getArch() == llvm::Triple::x86); switch (Opts.getStructReturnConvention()) { case CodeGenOptions::SRCK_Default: break; case CodeGenOptions::SRCK_OnStack: // -fpcc-struct-return return false; case CodeGenOptions::SRCK_InRegs: // -freg-struct-return return true; } if (Triple.isOSDarwin() || Triple.isOSIAMCU()) return true; switch (Triple.getOS()) { case llvm::Triple::DragonFly: case llvm::Triple::FreeBSD: case llvm::Triple::OpenBSD: case llvm::Triple::Win32: return true; default: return false; } } void X86_32TargetCodeGenInfo::setTargetAttributes( const Decl *D, llvm::GlobalValue *GV, CodeGen::CodeGenModule &CGM) const { if (GV->isDeclaration()) return; if (const FunctionDecl *FD = dyn_cast_or_null(D)) { if (FD->hasAttr()) { llvm::Function *Fn = cast(GV); Fn->addFnAttr("stackrealign"); } if (FD->hasAttr()) { llvm::Function *Fn = cast(GV); Fn->setCallingConv(llvm::CallingConv::X86_INTR); } } } bool X86_32TargetCodeGenInfo::initDwarfEHRegSizeTable( CodeGen::CodeGenFunction &CGF, llvm::Value *Address) const { CodeGen::CGBuilderTy &Builder = CGF.Builder; llvm::Value *Four8 = llvm::ConstantInt::get(CGF.Int8Ty, 4); // 0-7 are the eight integer registers; the order is different // on Darwin (for EH), but the range is the same. // 8 is %eip. AssignToArrayRange(Builder, Address, Four8, 0, 8); if (CGF.CGM.getTarget().getTriple().isOSDarwin()) { // 12-16 are st(0..4). Not sure why we stop at 4. // These have size 16, which is sizeof(long double) on // platforms with 8-byte alignment for that type. llvm::Value *Sixteen8 = llvm::ConstantInt::get(CGF.Int8Ty, 16); AssignToArrayRange(Builder, Address, Sixteen8, 12, 16); } else { // 9 is %eflags, which doesn't get a size on Darwin for some // reason. Builder.CreateAlignedStore( Four8, Builder.CreateConstInBoundsGEP1_32(CGF.Int8Ty, Address, 9), CharUnits::One()); // 11-16 are st(0..5). Not sure why we stop at 5. // These have size 12, which is sizeof(long double) on // platforms with 4-byte alignment for that type. llvm::Value *Twelve8 = llvm::ConstantInt::get(CGF.Int8Ty, 12); AssignToArrayRange(Builder, Address, Twelve8, 11, 16); } return false; } //===----------------------------------------------------------------------===// // X86-64 ABI Implementation //===----------------------------------------------------------------------===// namespace { /// The AVX ABI level for X86 targets. enum class X86AVXABILevel { None, AVX, AVX512 }; /// \p returns the size in bits of the largest (native) vector for \p AVXLevel. static unsigned getNativeVectorSizeForAVXABI(X86AVXABILevel AVXLevel) { switch (AVXLevel) { case X86AVXABILevel::AVX512: return 512; case X86AVXABILevel::AVX: return 256; case X86AVXABILevel::None: return 128; } llvm_unreachable("Unknown AVXLevel"); } /// X86_64ABIInfo - The X86_64 ABI information. class X86_64ABIInfo : public SwiftABIInfo { enum Class { Integer = 0, SSE, SSEUp, X87, X87Up, ComplexX87, NoClass, Memory }; /// merge - Implement the X86_64 ABI merging algorithm. /// /// Merge an accumulating classification \arg Accum with a field /// classification \arg Field. /// /// \param Accum - The accumulating classification. This should /// always be either NoClass or the result of a previous merge /// call. In addition, this should never be Memory (the caller /// should just return Memory for the aggregate). static Class merge(Class Accum, Class Field); /// postMerge - Implement the X86_64 ABI post merging algorithm. /// /// Post merger cleanup, reduces a malformed Hi and Lo pair to /// final MEMORY or SSE classes when necessary. /// /// \param AggregateSize - The size of the current aggregate in /// the classification process. /// /// \param Lo - The classification for the parts of the type /// residing in the low word of the containing object. /// /// \param Hi - The classification for the parts of the type /// residing in the higher words of the containing object. /// void postMerge(unsigned AggregateSize, Class &Lo, Class &Hi) const; /// classify - Determine the x86_64 register classes in which the /// given type T should be passed. /// /// \param Lo - The classification for the parts of the type /// residing in the low word of the containing object. /// /// \param Hi - The classification for the parts of the type /// residing in the high word of the containing object. /// /// \param OffsetBase - The bit offset of this type in the /// containing object. Some parameters are classified different /// depending on whether they straddle an eightbyte boundary. /// /// \param isNamedArg - Whether the argument in question is a "named" /// argument, as used in AMD64-ABI 3.5.7. /// /// If a word is unused its result will be NoClass; if a type should /// be passed in Memory then at least the classification of \arg Lo /// will be Memory. /// /// The \arg Lo class will be NoClass iff the argument is ignored. /// /// If the \arg Lo class is ComplexX87, then the \arg Hi class will /// also be ComplexX87. void classify(QualType T, uint64_t OffsetBase, Class &Lo, Class &Hi, bool isNamedArg) const; llvm::Type *GetByteVectorType(QualType Ty) const; llvm::Type *GetSSETypeAtOffset(llvm::Type *IRType, unsigned IROffset, QualType SourceTy, unsigned SourceOffset) const; llvm::Type *GetINTEGERTypeAtOffset(llvm::Type *IRType, unsigned IROffset, QualType SourceTy, unsigned SourceOffset) const; /// getIndirectResult - Give a source type \arg Ty, return a suitable result /// such that the argument will be returned in memory. ABIArgInfo getIndirectReturnResult(QualType Ty) const; /// getIndirectResult - Give a source type \arg Ty, return a suitable result /// such that the argument will be passed in memory. /// /// \param freeIntRegs - The number of free integer registers remaining /// available. ABIArgInfo getIndirectResult(QualType Ty, unsigned freeIntRegs) const; ABIArgInfo classifyReturnType(QualType RetTy) const; ABIArgInfo classifyArgumentType(QualType Ty, unsigned freeIntRegs, unsigned &neededInt, unsigned &neededSSE, bool isNamedArg) const; ABIArgInfo classifyRegCallStructType(QualType Ty, unsigned &NeededInt, unsigned &NeededSSE) const; ABIArgInfo classifyRegCallStructTypeImpl(QualType Ty, unsigned &NeededInt, unsigned &NeededSSE) const; bool IsIllegalVectorType(QualType Ty) const; /// The 0.98 ABI revision clarified a lot of ambiguities, /// unfortunately in ways that were not always consistent with /// certain previous compilers. In particular, platforms which /// required strict binary compatibility with older versions of GCC /// may need to exempt themselves. bool honorsRevision0_98() const { return !getTarget().getTriple().isOSDarwin(); } /// GCC classifies <1 x long long> as SSE but some platform ABIs choose to /// classify it as INTEGER (for compatibility with older clang compilers). bool classifyIntegerMMXAsSSE() const { // Clang <= 3.8 did not do this. if (getContext().getLangOpts().getClangABICompat() <= LangOptions::ClangABI::Ver3_8) return false; const llvm::Triple &Triple = getTarget().getTriple(); if (Triple.isOSDarwin() || Triple.getOS() == llvm::Triple::PS4) return false; if (Triple.isOSFreeBSD() && Triple.getOSMajorVersion() >= 10) return false; return true; } // GCC classifies vectors of __int128 as memory. bool passInt128VectorsInMem() const { // Clang <= 9.0 did not do this. if (getContext().getLangOpts().getClangABICompat() <= LangOptions::ClangABI::Ver9) return false; const llvm::Triple &T = getTarget().getTriple(); return T.isOSLinux() || T.isOSNetBSD(); } X86AVXABILevel AVXLevel; // Some ABIs (e.g. X32 ABI and Native Client OS) use 32 bit pointers on // 64-bit hardware. bool Has64BitPointers; public: X86_64ABIInfo(CodeGen::CodeGenTypes &CGT, X86AVXABILevel AVXLevel) : SwiftABIInfo(CGT), AVXLevel(AVXLevel), Has64BitPointers(CGT.getDataLayout().getPointerSize(0) == 8) { } bool isPassedUsingAVXType(QualType type) const { unsigned neededInt, neededSSE; // The freeIntRegs argument doesn't matter here. ABIArgInfo info = classifyArgumentType(type, 0, neededInt, neededSSE, /*isNamedArg*/true); if (info.isDirect()) { llvm::Type *ty = info.getCoerceToType(); if (llvm::VectorType *vectorTy = dyn_cast_or_null(ty)) return vectorTy->getPrimitiveSizeInBits().getFixedSize() > 128; } return false; } void computeInfo(CGFunctionInfo &FI) const override; Address EmitVAArg(CodeGenFunction &CGF, Address VAListAddr, QualType Ty) const override; Address EmitMSVAArg(CodeGenFunction &CGF, Address VAListAddr, QualType Ty) const override; bool has64BitPointers() const { return Has64BitPointers; } bool shouldPassIndirectlyForSwift(ArrayRef scalars, bool asReturnValue) const override { return occupiesMoreThan(CGT, scalars, /*total*/ 4); } bool isSwiftErrorInRegister() const override { return true; } }; /// WinX86_64ABIInfo - The Windows X86_64 ABI information. class WinX86_64ABIInfo : public SwiftABIInfo { public: WinX86_64ABIInfo(CodeGen::CodeGenTypes &CGT, X86AVXABILevel AVXLevel) : SwiftABIInfo(CGT), AVXLevel(AVXLevel), IsMingw64(getTarget().getTriple().isWindowsGNUEnvironment()) {} void computeInfo(CGFunctionInfo &FI) const override; Address EmitVAArg(CodeGenFunction &CGF, Address VAListAddr, QualType Ty) const override; bool isHomogeneousAggregateBaseType(QualType Ty) const override { // FIXME: Assumes vectorcall is in use. return isX86VectorTypeForVectorCall(getContext(), Ty); } bool isHomogeneousAggregateSmallEnough(const Type *Ty, uint64_t NumMembers) const override { // FIXME: Assumes vectorcall is in use. return isX86VectorCallAggregateSmallEnough(NumMembers); } bool shouldPassIndirectlyForSwift(ArrayRef scalars, bool asReturnValue) const override { return occupiesMoreThan(CGT, scalars, /*total*/ 4); } bool isSwiftErrorInRegister() const override { return true; } private: ABIArgInfo classify(QualType Ty, unsigned &FreeSSERegs, bool IsReturnType, bool IsVectorCall, bool IsRegCall) const; ABIArgInfo reclassifyHvaArgType(QualType Ty, unsigned &FreeSSERegs, const ABIArgInfo ¤t) const; void computeVectorCallArgs(CGFunctionInfo &FI, unsigned FreeSSERegs, bool IsVectorCall, bool IsRegCall) const; X86AVXABILevel AVXLevel; bool IsMingw64; }; class X86_64TargetCodeGenInfo : public TargetCodeGenInfo { public: X86_64TargetCodeGenInfo(CodeGen::CodeGenTypes &CGT, X86AVXABILevel AVXLevel) : TargetCodeGenInfo(std::make_unique(CGT, AVXLevel)) {} const X86_64ABIInfo &getABIInfo() const { return static_cast(TargetCodeGenInfo::getABIInfo()); } /// Disable tail call on x86-64. The epilogue code before the tail jump blocks /// autoreleaseRV/retainRV and autoreleaseRV/unsafeClaimRV optimizations. bool markARCOptimizedReturnCallsAsNoTail() const override { return true; } int getDwarfEHStackPointer(CodeGen::CodeGenModule &CGM) const override { return 7; } bool initDwarfEHRegSizeTable(CodeGen::CodeGenFunction &CGF, llvm::Value *Address) const override { llvm::Value *Eight8 = llvm::ConstantInt::get(CGF.Int8Ty, 8); // 0-15 are the 16 integer registers. // 16 is %rip. AssignToArrayRange(CGF.Builder, Address, Eight8, 0, 16); return false; } llvm::Type* adjustInlineAsmType(CodeGen::CodeGenFunction &CGF, StringRef Constraint, llvm::Type* Ty) const override { return X86AdjustInlineAsmType(CGF, Constraint, Ty); } bool isNoProtoCallVariadic(const CallArgList &args, const FunctionNoProtoType *fnType) const override { // The default CC on x86-64 sets %al to the number of SSA // registers used, and GCC sets this when calling an unprototyped // function, so we override the default behavior. However, don't do // that when AVX types are involved: the ABI explicitly states it is // undefined, and it doesn't work in practice because of how the ABI // defines varargs anyway. if (fnType->getCallConv() == CC_C) { bool HasAVXType = false; for (CallArgList::const_iterator it = args.begin(), ie = args.end(); it != ie; ++it) { if (getABIInfo().isPassedUsingAVXType(it->Ty)) { HasAVXType = true; break; } } if (!HasAVXType) return true; } return TargetCodeGenInfo::isNoProtoCallVariadic(args, fnType); } llvm::Constant * getUBSanFunctionSignature(CodeGen::CodeGenModule &CGM) const override { unsigned Sig = (0xeb << 0) | // jmp rel8 (0x06 << 8) | // .+0x08 ('v' << 16) | ('2' << 24); return llvm::ConstantInt::get(CGM.Int32Ty, Sig); } void setTargetAttributes(const Decl *D, llvm::GlobalValue *GV, CodeGen::CodeGenModule &CGM) const override { if (GV->isDeclaration()) return; if (const FunctionDecl *FD = dyn_cast_or_null(D)) { if (FD->hasAttr()) { llvm::Function *Fn = cast(GV); Fn->addFnAttr("stackrealign"); } if (FD->hasAttr()) { llvm::Function *Fn = cast(GV); Fn->setCallingConv(llvm::CallingConv::X86_INTR); } } } void checkFunctionCallABI(CodeGenModule &CGM, SourceLocation CallLoc, const FunctionDecl *Caller, const FunctionDecl *Callee, const CallArgList &Args) const override; }; static void initFeatureMaps(const ASTContext &Ctx, llvm::StringMap &CallerMap, const FunctionDecl *Caller, llvm::StringMap &CalleeMap, const FunctionDecl *Callee) { if (CalleeMap.empty() && CallerMap.empty()) { // The caller is potentially nullptr in the case where the call isn't in a // function. In this case, the getFunctionFeatureMap ensures we just get // the TU level setting (since it cannot be modified by 'target'.. Ctx.getFunctionFeatureMap(CallerMap, Caller); Ctx.getFunctionFeatureMap(CalleeMap, Callee); } } static bool checkAVXParamFeature(DiagnosticsEngine &Diag, SourceLocation CallLoc, const llvm::StringMap &CallerMap, const llvm::StringMap &CalleeMap, QualType Ty, StringRef Feature, bool IsArgument) { bool CallerHasFeat = CallerMap.lookup(Feature); bool CalleeHasFeat = CalleeMap.lookup(Feature); if (!CallerHasFeat && !CalleeHasFeat) return Diag.Report(CallLoc, diag::warn_avx_calling_convention) << IsArgument << Ty << Feature; // Mixing calling conventions here is very clearly an error. if (!CallerHasFeat || !CalleeHasFeat) return Diag.Report(CallLoc, diag::err_avx_calling_convention) << IsArgument << Ty << Feature; // Else, both caller and callee have the required feature, so there is no need // to diagnose. return false; } static bool checkAVXParam(DiagnosticsEngine &Diag, ASTContext &Ctx, SourceLocation CallLoc, const llvm::StringMap &CallerMap, const llvm::StringMap &CalleeMap, QualType Ty, bool IsArgument) { uint64_t Size = Ctx.getTypeSize(Ty); if (Size > 256) return checkAVXParamFeature(Diag, CallLoc, CallerMap, CalleeMap, Ty, "avx512f", IsArgument); if (Size > 128) return checkAVXParamFeature(Diag, CallLoc, CallerMap, CalleeMap, Ty, "avx", IsArgument); return false; } void X86_64TargetCodeGenInfo::checkFunctionCallABI( CodeGenModule &CGM, SourceLocation CallLoc, const FunctionDecl *Caller, const FunctionDecl *Callee, const CallArgList &Args) const { llvm::StringMap CallerMap; llvm::StringMap CalleeMap; unsigned ArgIndex = 0; // We need to loop through the actual call arguments rather than the the // function's parameters, in case this variadic. for (const CallArg &Arg : Args) { // The "avx" feature changes how vectors >128 in size are passed. "avx512f" // additionally changes how vectors >256 in size are passed. Like GCC, we // warn when a function is called with an argument where this will change. // Unlike GCC, we also error when it is an obvious ABI mismatch, that is, // the caller and callee features are mismatched. // Unfortunately, we cannot do this diagnostic in SEMA, since the callee can // change its ABI with attribute-target after this call. if (Arg.getType()->isVectorType() && CGM.getContext().getTypeSize(Arg.getType()) > 128) { initFeatureMaps(CGM.getContext(), CallerMap, Caller, CalleeMap, Callee); QualType Ty = Arg.getType(); // The CallArg seems to have desugared the type already, so for clearer // diagnostics, replace it with the type in the FunctionDecl if possible. if (ArgIndex < Callee->getNumParams()) Ty = Callee->getParamDecl(ArgIndex)->getType(); if (checkAVXParam(CGM.getDiags(), CGM.getContext(), CallLoc, CallerMap, CalleeMap, Ty, /*IsArgument*/ true)) return; } ++ArgIndex; } // Check return always, as we don't have a good way of knowing in codegen // whether this value is used, tail-called, etc. if (Callee->getReturnType()->isVectorType() && CGM.getContext().getTypeSize(Callee->getReturnType()) > 128) { initFeatureMaps(CGM.getContext(), CallerMap, Caller, CalleeMap, Callee); checkAVXParam(CGM.getDiags(), CGM.getContext(), CallLoc, CallerMap, CalleeMap, Callee->getReturnType(), /*IsArgument*/ false); } } static std::string qualifyWindowsLibrary(llvm::StringRef Lib) { // If the argument does not end in .lib, automatically add the suffix. // If the argument contains a space, enclose it in quotes. // This matches the behavior of MSVC. bool Quote = (Lib.find(" ") != StringRef::npos); std::string ArgStr = Quote ? "\"" : ""; ArgStr += Lib; if (!Lib.endswith_lower(".lib") && !Lib.endswith_lower(".a")) ArgStr += ".lib"; ArgStr += Quote ? "\"" : ""; return ArgStr; } class WinX86_32TargetCodeGenInfo : public X86_32TargetCodeGenInfo { public: WinX86_32TargetCodeGenInfo(CodeGen::CodeGenTypes &CGT, bool DarwinVectorABI, bool RetSmallStructInRegABI, bool Win32StructABI, unsigned NumRegisterParameters) : X86_32TargetCodeGenInfo(CGT, DarwinVectorABI, RetSmallStructInRegABI, Win32StructABI, NumRegisterParameters, false) {} void setTargetAttributes(const Decl *D, llvm::GlobalValue *GV, CodeGen::CodeGenModule &CGM) const override; void getDependentLibraryOption(llvm::StringRef Lib, llvm::SmallString<24> &Opt) const override { Opt = "/DEFAULTLIB:"; Opt += qualifyWindowsLibrary(Lib); } void getDetectMismatchOption(llvm::StringRef Name, llvm::StringRef Value, llvm::SmallString<32> &Opt) const override { Opt = "/FAILIFMISMATCH:\"" + Name.str() + "=" + Value.str() + "\""; } }; static void addStackProbeTargetAttributes(const Decl *D, llvm::GlobalValue *GV, CodeGen::CodeGenModule &CGM) { if (llvm::Function *Fn = dyn_cast_or_null(GV)) { if (CGM.getCodeGenOpts().StackProbeSize != 4096) Fn->addFnAttr("stack-probe-size", llvm::utostr(CGM.getCodeGenOpts().StackProbeSize)); if (CGM.getCodeGenOpts().NoStackArgProbe) Fn->addFnAttr("no-stack-arg-probe"); } } void WinX86_32TargetCodeGenInfo::setTargetAttributes( const Decl *D, llvm::GlobalValue *GV, CodeGen::CodeGenModule &CGM) const { X86_32TargetCodeGenInfo::setTargetAttributes(D, GV, CGM); if (GV->isDeclaration()) return; addStackProbeTargetAttributes(D, GV, CGM); } class WinX86_64TargetCodeGenInfo : public TargetCodeGenInfo { public: WinX86_64TargetCodeGenInfo(CodeGen::CodeGenTypes &CGT, X86AVXABILevel AVXLevel) : TargetCodeGenInfo(std::make_unique(CGT, AVXLevel)) {} void setTargetAttributes(const Decl *D, llvm::GlobalValue *GV, CodeGen::CodeGenModule &CGM) const override; int getDwarfEHStackPointer(CodeGen::CodeGenModule &CGM) const override { return 7; } bool initDwarfEHRegSizeTable(CodeGen::CodeGenFunction &CGF, llvm::Value *Address) const override { llvm::Value *Eight8 = llvm::ConstantInt::get(CGF.Int8Ty, 8); // 0-15 are the 16 integer registers. // 16 is %rip. AssignToArrayRange(CGF.Builder, Address, Eight8, 0, 16); return false; } void getDependentLibraryOption(llvm::StringRef Lib, llvm::SmallString<24> &Opt) const override { Opt = "/DEFAULTLIB:"; Opt += qualifyWindowsLibrary(Lib); } void getDetectMismatchOption(llvm::StringRef Name, llvm::StringRef Value, llvm::SmallString<32> &Opt) const override { Opt = "/FAILIFMISMATCH:\"" + Name.str() + "=" + Value.str() + "\""; } }; void WinX86_64TargetCodeGenInfo::setTargetAttributes( const Decl *D, llvm::GlobalValue *GV, CodeGen::CodeGenModule &CGM) const { TargetCodeGenInfo::setTargetAttributes(D, GV, CGM); if (GV->isDeclaration()) return; if (const FunctionDecl *FD = dyn_cast_or_null(D)) { if (FD->hasAttr()) { llvm::Function *Fn = cast(GV); Fn->addFnAttr("stackrealign"); } if (FD->hasAttr()) { llvm::Function *Fn = cast(GV); Fn->setCallingConv(llvm::CallingConv::X86_INTR); } } addStackProbeTargetAttributes(D, GV, CGM); } } void X86_64ABIInfo::postMerge(unsigned AggregateSize, Class &Lo, Class &Hi) const { // AMD64-ABI 3.2.3p2: Rule 5. Then a post merger cleanup is done: // // (a) If one of the classes is Memory, the whole argument is passed in // memory. // // (b) If X87UP is not preceded by X87, the whole argument is passed in // memory. // // (c) If the size of the aggregate exceeds two eightbytes and the first // eightbyte isn't SSE or any other eightbyte isn't SSEUP, the whole // argument is passed in memory. NOTE: This is necessary to keep the // ABI working for processors that don't support the __m256 type. // // (d) If SSEUP is not preceded by SSE or SSEUP, it is converted to SSE. // // Some of these are enforced by the merging logic. Others can arise // only with unions; for example: // union { _Complex double; unsigned; } // // Note that clauses (b) and (c) were added in 0.98. // if (Hi == Memory) Lo = Memory; if (Hi == X87Up && Lo != X87 && honorsRevision0_98()) Lo = Memory; if (AggregateSize > 128 && (Lo != SSE || Hi != SSEUp)) Lo = Memory; if (Hi == SSEUp && Lo != SSE) Hi = SSE; } X86_64ABIInfo::Class X86_64ABIInfo::merge(Class Accum, Class Field) { // AMD64-ABI 3.2.3p2: Rule 4. Each field of an object is // classified recursively so that always two fields are // considered. The resulting class is calculated according to // the classes of the fields in the eightbyte: // // (a) If both classes are equal, this is the resulting class. // // (b) If one of the classes is NO_CLASS, the resulting class is // the other class. // // (c) If one of the classes is MEMORY, the result is the MEMORY // class. // // (d) If one of the classes is INTEGER, the result is the // INTEGER. // // (e) If one of the classes is X87, X87UP, COMPLEX_X87 class, // MEMORY is used as class. // // (f) Otherwise class SSE is used. // Accum should never be memory (we should have returned) or // ComplexX87 (because this cannot be passed in a structure). assert((Accum != Memory && Accum != ComplexX87) && "Invalid accumulated classification during merge."); if (Accum == Field || Field == NoClass) return Accum; if (Field == Memory) return Memory; if (Accum == NoClass) return Field; if (Accum == Integer || Field == Integer) return Integer; if (Field == X87 || Field == X87Up || Field == ComplexX87 || Accum == X87 || Accum == X87Up) return Memory; return SSE; } void X86_64ABIInfo::classify(QualType Ty, uint64_t OffsetBase, Class &Lo, Class &Hi, bool isNamedArg) const { // FIXME: This code can be simplified by introducing a simple value class for // Class pairs with appropriate constructor methods for the various // situations. // FIXME: Some of the split computations are wrong; unaligned vectors // shouldn't be passed in registers for example, so there is no chance they // can straddle an eightbyte. Verify & simplify. Lo = Hi = NoClass; Class &Current = OffsetBase < 64 ? Lo : Hi; Current = Memory; if (const BuiltinType *BT = Ty->getAs()) { BuiltinType::Kind k = BT->getKind(); if (k == BuiltinType::Void) { Current = NoClass; } else if (k == BuiltinType::Int128 || k == BuiltinType::UInt128) { Lo = Integer; Hi = Integer; } else if (k >= BuiltinType::Bool && k <= BuiltinType::LongLong) { Current = Integer; } else if (k == BuiltinType::Float || k == BuiltinType::Double) { Current = SSE; } else if (k == BuiltinType::LongDouble) { const llvm::fltSemantics *LDF = &getTarget().getLongDoubleFormat(); if (LDF == &llvm::APFloat::IEEEquad()) { Lo = SSE; Hi = SSEUp; } else if (LDF == &llvm::APFloat::x87DoubleExtended()) { Lo = X87; Hi = X87Up; } else if (LDF == &llvm::APFloat::IEEEdouble()) { Current = SSE; } else llvm_unreachable("unexpected long double representation!"); } // FIXME: _Decimal32 and _Decimal64 are SSE. // FIXME: _float128 and _Decimal128 are (SSE, SSEUp). return; } if (const EnumType *ET = Ty->getAs()) { // Classify the underlying integer type. classify(ET->getDecl()->getIntegerType(), OffsetBase, Lo, Hi, isNamedArg); return; } if (Ty->hasPointerRepresentation()) { Current = Integer; return; } if (Ty->isMemberPointerType()) { if (Ty->isMemberFunctionPointerType()) { if (Has64BitPointers) { // If Has64BitPointers, this is an {i64, i64}, so classify both // Lo and Hi now. Lo = Hi = Integer; } else { // Otherwise, with 32-bit pointers, this is an {i32, i32}. If that // straddles an eightbyte boundary, Hi should be classified as well. uint64_t EB_FuncPtr = (OffsetBase) / 64; uint64_t EB_ThisAdj = (OffsetBase + 64 - 1) / 64; if (EB_FuncPtr != EB_ThisAdj) { Lo = Hi = Integer; } else { Current = Integer; } } } else { Current = Integer; } return; } if (const VectorType *VT = Ty->getAs()) { uint64_t Size = getContext().getTypeSize(VT); if (Size == 1 || Size == 8 || Size == 16 || Size == 32) { // gcc passes the following as integer: // 4 bytes - <4 x char>, <2 x short>, <1 x int>, <1 x float> // 2 bytes - <2 x char>, <1 x short> // 1 byte - <1 x char> Current = Integer; // If this type crosses an eightbyte boundary, it should be // split. uint64_t EB_Lo = (OffsetBase) / 64; uint64_t EB_Hi = (OffsetBase + Size - 1) / 64; if (EB_Lo != EB_Hi) Hi = Lo; } else if (Size == 64) { QualType ElementType = VT->getElementType(); // gcc passes <1 x double> in memory. :( if (ElementType->isSpecificBuiltinType(BuiltinType::Double)) return; // gcc passes <1 x long long> as SSE but clang used to unconditionally // pass them as integer. For platforms where clang is the de facto // platform compiler, we must continue to use integer. if (!classifyIntegerMMXAsSSE() && (ElementType->isSpecificBuiltinType(BuiltinType::LongLong) || ElementType->isSpecificBuiltinType(BuiltinType::ULongLong) || ElementType->isSpecificBuiltinType(BuiltinType::Long) || ElementType->isSpecificBuiltinType(BuiltinType::ULong))) Current = Integer; else Current = SSE; // If this type crosses an eightbyte boundary, it should be // split. if (OffsetBase && OffsetBase != 64) Hi = Lo; } else if (Size == 128 || (isNamedArg && Size <= getNativeVectorSizeForAVXABI(AVXLevel))) { QualType ElementType = VT->getElementType(); // gcc passes 256 and 512 bit vectors in memory. :( if (passInt128VectorsInMem() && Size != 128 && (ElementType->isSpecificBuiltinType(BuiltinType::Int128) || ElementType->isSpecificBuiltinType(BuiltinType::UInt128))) return; // Arguments of 256-bits are split into four eightbyte chunks. The // least significant one belongs to class SSE and all the others to class // SSEUP. The original Lo and Hi design considers that types can't be // greater than 128-bits, so a 64-bit split in Hi and Lo makes sense. // This design isn't correct for 256-bits, but since there're no cases // where the upper parts would need to be inspected, avoid adding // complexity and just consider Hi to match the 64-256 part. // // Note that per 3.5.7 of AMD64-ABI, 256-bit args are only passed in // registers if they are "named", i.e. not part of the "..." of a // variadic function. // // Similarly, per 3.2.3. of the AVX512 draft, 512-bits ("named") args are // split into eight eightbyte chunks, one SSE and seven SSEUP. Lo = SSE; Hi = SSEUp; } return; } if (const ComplexType *CT = Ty->getAs()) { QualType ET = getContext().getCanonicalType(CT->getElementType()); uint64_t Size = getContext().getTypeSize(Ty); if (ET->isIntegralOrEnumerationType()) { if (Size <= 64) Current = Integer; else if (Size <= 128) Lo = Hi = Integer; } else if (ET == getContext().FloatTy) { Current = SSE; } else if (ET == getContext().DoubleTy) { Lo = Hi = SSE; } else if (ET == getContext().LongDoubleTy) { const llvm::fltSemantics *LDF = &getTarget().getLongDoubleFormat(); if (LDF == &llvm::APFloat::IEEEquad()) Current = Memory; else if (LDF == &llvm::APFloat::x87DoubleExtended()) Current = ComplexX87; else if (LDF == &llvm::APFloat::IEEEdouble()) Lo = Hi = SSE; else llvm_unreachable("unexpected long double representation!"); } // If this complex type crosses an eightbyte boundary then it // should be split. uint64_t EB_Real = (OffsetBase) / 64; uint64_t EB_Imag = (OffsetBase + getContext().getTypeSize(ET)) / 64; if (Hi == NoClass && EB_Real != EB_Imag) Hi = Lo; return; } if (const auto *EITy = Ty->getAs()) { if (EITy->getNumBits() <= 64) Current = Integer; else if (EITy->getNumBits() <= 128) Lo = Hi = Integer; // Larger values need to get passed in memory. return; } if (const ConstantArrayType *AT = getContext().getAsConstantArrayType(Ty)) { // Arrays are treated like structures. uint64_t Size = getContext().getTypeSize(Ty); // AMD64-ABI 3.2.3p2: Rule 1. If the size of an object is larger // than eight eightbytes, ..., it has class MEMORY. if (Size > 512) return; // AMD64-ABI 3.2.3p2: Rule 1. If ..., or it contains unaligned // fields, it has class MEMORY. // // Only need to check alignment of array base. if (OffsetBase % getContext().getTypeAlign(AT->getElementType())) return; // Otherwise implement simplified merge. We could be smarter about // this, but it isn't worth it and would be harder to verify. Current = NoClass; uint64_t EltSize = getContext().getTypeSize(AT->getElementType()); uint64_t ArraySize = AT->getSize().getZExtValue(); // The only case a 256-bit wide vector could be used is when the array // contains a single 256-bit element. Since Lo and Hi logic isn't extended // to work for sizes wider than 128, early check and fallback to memory. // if (Size > 128 && (Size != EltSize || Size > getNativeVectorSizeForAVXABI(AVXLevel))) return; for (uint64_t i=0, Offset=OffsetBase; igetElementType(), Offset, FieldLo, FieldHi, isNamedArg); Lo = merge(Lo, FieldLo); Hi = merge(Hi, FieldHi); if (Lo == Memory || Hi == Memory) break; } postMerge(Size, Lo, Hi); assert((Hi != SSEUp || Lo == SSE) && "Invalid SSEUp array classification."); return; } if (const RecordType *RT = Ty->getAs()) { uint64_t Size = getContext().getTypeSize(Ty); // AMD64-ABI 3.2.3p2: Rule 1. If the size of an object is larger // than eight eightbytes, ..., it has class MEMORY. if (Size > 512) return; // AMD64-ABI 3.2.3p2: Rule 2. If a C++ object has either a non-trivial // copy constructor or a non-trivial destructor, it is passed by invisible // reference. if (getRecordArgABI(RT, getCXXABI())) return; const RecordDecl *RD = RT->getDecl(); // Assume variable sized types are passed in memory. if (RD->hasFlexibleArrayMember()) return; const ASTRecordLayout &Layout = getContext().getASTRecordLayout(RD); // Reset Lo class, this will be recomputed. Current = NoClass; // If this is a C++ record, classify the bases first. if (const CXXRecordDecl *CXXRD = dyn_cast(RD)) { for (const auto &I : CXXRD->bases()) { assert(!I.isVirtual() && !I.getType()->isDependentType() && "Unexpected base class!"); const auto *Base = cast(I.getType()->castAs()->getDecl()); // Classify this field. // // AMD64-ABI 3.2.3p2: Rule 3. If the size of the aggregate exceeds a // single eightbyte, each is classified separately. Each eightbyte gets // initialized to class NO_CLASS. Class FieldLo, FieldHi; uint64_t Offset = OffsetBase + getContext().toBits(Layout.getBaseClassOffset(Base)); classify(I.getType(), Offset, FieldLo, FieldHi, isNamedArg); Lo = merge(Lo, FieldLo); Hi = merge(Hi, FieldHi); if (Lo == Memory || Hi == Memory) { postMerge(Size, Lo, Hi); return; } } } // Classify the fields one at a time, merging the results. unsigned idx = 0; for (RecordDecl::field_iterator i = RD->field_begin(), e = RD->field_end(); i != e; ++i, ++idx) { uint64_t Offset = OffsetBase + Layout.getFieldOffset(idx); bool BitField = i->isBitField(); // Ignore padding bit-fields. if (BitField && i->isUnnamedBitfield()) continue; // AMD64-ABI 3.2.3p2: Rule 1. If the size of an object is larger than // four eightbytes, or it contains unaligned fields, it has class MEMORY. // // The only case a 256-bit wide vector could be used is when the struct // contains a single 256-bit element. Since Lo and Hi logic isn't extended // to work for sizes wider than 128, early check and fallback to memory. // if (Size > 128 && (Size != getContext().getTypeSize(i->getType()) || Size > getNativeVectorSizeForAVXABI(AVXLevel))) { Lo = Memory; postMerge(Size, Lo, Hi); return; } // Note, skip this test for bit-fields, see below. if (!BitField && Offset % getContext().getTypeAlign(i->getType())) { Lo = Memory; postMerge(Size, Lo, Hi); return; } // Classify this field. // // AMD64-ABI 3.2.3p2: Rule 3. If the size of the aggregate // exceeds a single eightbyte, each is classified // separately. Each eightbyte gets initialized to class // NO_CLASS. Class FieldLo, FieldHi; // Bit-fields require special handling, they do not force the // structure to be passed in memory even if unaligned, and // therefore they can straddle an eightbyte. if (BitField) { assert(!i->isUnnamedBitfield()); uint64_t Offset = OffsetBase + Layout.getFieldOffset(idx); uint64_t Size = i->getBitWidthValue(getContext()); uint64_t EB_Lo = Offset / 64; uint64_t EB_Hi = (Offset + Size - 1) / 64; if (EB_Lo) { assert(EB_Hi == EB_Lo && "Invalid classification, type > 16 bytes."); FieldLo = NoClass; FieldHi = Integer; } else { FieldLo = Integer; FieldHi = EB_Hi ? Integer : NoClass; } } else classify(i->getType(), Offset, FieldLo, FieldHi, isNamedArg); Lo = merge(Lo, FieldLo); Hi = merge(Hi, FieldHi); if (Lo == Memory || Hi == Memory) break; } postMerge(Size, Lo, Hi); } } ABIArgInfo X86_64ABIInfo::getIndirectReturnResult(QualType Ty) const { // If this is a scalar LLVM value then assume LLVM will pass it in the right // place naturally. if (!isAggregateTypeForABI(Ty)) { // Treat an enum type as its underlying type. if (const EnumType *EnumTy = Ty->getAs()) Ty = EnumTy->getDecl()->getIntegerType(); if (Ty->isExtIntType()) return getNaturalAlignIndirect(Ty); return (isPromotableIntegerTypeForABI(Ty) ? ABIArgInfo::getExtend(Ty) : ABIArgInfo::getDirect()); } return getNaturalAlignIndirect(Ty); } bool X86_64ABIInfo::IsIllegalVectorType(QualType Ty) const { if (const VectorType *VecTy = Ty->getAs()) { uint64_t Size = getContext().getTypeSize(VecTy); unsigned LargestVector = getNativeVectorSizeForAVXABI(AVXLevel); if (Size <= 64 || Size > LargestVector) return true; QualType EltTy = VecTy->getElementType(); if (passInt128VectorsInMem() && (EltTy->isSpecificBuiltinType(BuiltinType::Int128) || EltTy->isSpecificBuiltinType(BuiltinType::UInt128))) return true; } return false; } ABIArgInfo X86_64ABIInfo::getIndirectResult(QualType Ty, unsigned freeIntRegs) const { // If this is a scalar LLVM value then assume LLVM will pass it in the right // place naturally. // // This assumption is optimistic, as there could be free registers available // when we need to pass this argument in memory, and LLVM could try to pass // the argument in the free register. This does not seem to happen currently, // but this code would be much safer if we could mark the argument with // 'onstack'. See PR12193. if (!isAggregateTypeForABI(Ty) && !IsIllegalVectorType(Ty) && !Ty->isExtIntType()) { // Treat an enum type as its underlying type. if (const EnumType *EnumTy = Ty->getAs()) Ty = EnumTy->getDecl()->getIntegerType(); return (isPromotableIntegerTypeForABI(Ty) ? ABIArgInfo::getExtend(Ty) : ABIArgInfo::getDirect()); } if (CGCXXABI::RecordArgABI RAA = getRecordArgABI(Ty, getCXXABI())) return getNaturalAlignIndirect(Ty, RAA == CGCXXABI::RAA_DirectInMemory); // Compute the byval alignment. We specify the alignment of the byval in all // cases so that the mid-level optimizer knows the alignment of the byval. unsigned Align = std::max(getContext().getTypeAlign(Ty) / 8, 8U); // Attempt to avoid passing indirect results using byval when possible. This // is important for good codegen. // // We do this by coercing the value into a scalar type which the backend can // handle naturally (i.e., without using byval). // // For simplicity, we currently only do this when we have exhausted all of the // free integer registers. Doing this when there are free integer registers // would require more care, as we would have to ensure that the coerced value // did not claim the unused register. That would require either reording the // arguments to the function (so that any subsequent inreg values came first), // or only doing this optimization when there were no following arguments that // might be inreg. // // We currently expect it to be rare (particularly in well written code) for // arguments to be passed on the stack when there are still free integer // registers available (this would typically imply large structs being passed // by value), so this seems like a fair tradeoff for now. // // We can revisit this if the backend grows support for 'onstack' parameter // attributes. See PR12193. if (freeIntRegs == 0) { uint64_t Size = getContext().getTypeSize(Ty); // If this type fits in an eightbyte, coerce it into the matching integral // type, which will end up on the stack (with alignment 8). if (Align == 8 && Size <= 64) return ABIArgInfo::getDirect(llvm::IntegerType::get(getVMContext(), Size)); } return ABIArgInfo::getIndirect(CharUnits::fromQuantity(Align)); } /// The ABI specifies that a value should be passed in a full vector XMM/YMM /// register. Pick an LLVM IR type that will be passed as a vector register. llvm::Type *X86_64ABIInfo::GetByteVectorType(QualType Ty) const { // Wrapper structs/arrays that only contain vectors are passed just like // vectors; strip them off if present. if (const Type *InnerTy = isSingleElementStruct(Ty, getContext())) Ty = QualType(InnerTy, 0); llvm::Type *IRType = CGT.ConvertType(Ty); if (isa(IRType)) { // Don't pass vXi128 vectors in their native type, the backend can't // legalize them. if (passInt128VectorsInMem() && cast(IRType)->getElementType()->isIntegerTy(128)) { // Use a vXi64 vector. uint64_t Size = getContext().getTypeSize(Ty); return llvm::FixedVectorType::get(llvm::Type::getInt64Ty(getVMContext()), Size / 64); } return IRType; } if (IRType->getTypeID() == llvm::Type::FP128TyID) return IRType; // We couldn't find the preferred IR vector type for 'Ty'. uint64_t Size = getContext().getTypeSize(Ty); assert((Size == 128 || Size == 256 || Size == 512) && "Invalid type found!"); // Return a LLVM IR vector type based on the size of 'Ty'. return llvm::FixedVectorType::get(llvm::Type::getDoubleTy(getVMContext()), Size / 64); } /// BitsContainNoUserData - Return true if the specified [start,end) bit range /// is known to either be off the end of the specified type or being in /// alignment padding. The user type specified is known to be at most 128 bits /// in size, and have passed through X86_64ABIInfo::classify with a successful /// classification that put one of the two halves in the INTEGER class. /// /// It is conservatively correct to return false. static bool BitsContainNoUserData(QualType Ty, unsigned StartBit, unsigned EndBit, ASTContext &Context) { // If the bytes being queried are off the end of the type, there is no user // data hiding here. This handles analysis of builtins, vectors and other // types that don't contain interesting padding. unsigned TySize = (unsigned)Context.getTypeSize(Ty); if (TySize <= StartBit) return true; if (const ConstantArrayType *AT = Context.getAsConstantArrayType(Ty)) { unsigned EltSize = (unsigned)Context.getTypeSize(AT->getElementType()); unsigned NumElts = (unsigned)AT->getSize().getZExtValue(); // Check each element to see if the element overlaps with the queried range. for (unsigned i = 0; i != NumElts; ++i) { // If the element is after the span we care about, then we're done.. unsigned EltOffset = i*EltSize; if (EltOffset >= EndBit) break; unsigned EltStart = EltOffset < StartBit ? StartBit-EltOffset :0; if (!BitsContainNoUserData(AT->getElementType(), EltStart, EndBit-EltOffset, Context)) return false; } // If it overlaps no elements, then it is safe to process as padding. return true; } if (const RecordType *RT = Ty->getAs()) { const RecordDecl *RD = RT->getDecl(); const ASTRecordLayout &Layout = Context.getASTRecordLayout(RD); // If this is a C++ record, check the bases first. if (const CXXRecordDecl *CXXRD = dyn_cast(RD)) { for (const auto &I : CXXRD->bases()) { assert(!I.isVirtual() && !I.getType()->isDependentType() && "Unexpected base class!"); const auto *Base = cast(I.getType()->castAs()->getDecl()); // If the base is after the span we care about, ignore it. unsigned BaseOffset = Context.toBits(Layout.getBaseClassOffset(Base)); if (BaseOffset >= EndBit) continue; unsigned BaseStart = BaseOffset < StartBit ? StartBit-BaseOffset :0; if (!BitsContainNoUserData(I.getType(), BaseStart, EndBit-BaseOffset, Context)) return false; } } // Verify that no field has data that overlaps the region of interest. Yes // this could be sped up a lot by being smarter about queried fields, // however we're only looking at structs up to 16 bytes, so we don't care // much. unsigned idx = 0; for (RecordDecl::field_iterator i = RD->field_begin(), e = RD->field_end(); i != e; ++i, ++idx) { unsigned FieldOffset = (unsigned)Layout.getFieldOffset(idx); // If we found a field after the region we care about, then we're done. if (FieldOffset >= EndBit) break; unsigned FieldStart = FieldOffset < StartBit ? StartBit-FieldOffset :0; if (!BitsContainNoUserData(i->getType(), FieldStart, EndBit-FieldOffset, Context)) return false; } // If nothing in this record overlapped the area of interest, then we're // clean. return true; } return false; } /// ContainsFloatAtOffset - Return true if the specified LLVM IR type has a /// float member at the specified offset. For example, {int,{float}} has a /// float at offset 4. It is conservatively correct for this routine to return /// false. static bool ContainsFloatAtOffset(llvm::Type *IRType, unsigned IROffset, const llvm::DataLayout &TD) { // Base case if we find a float. if (IROffset == 0 && IRType->isFloatTy()) return true; // If this is a struct, recurse into the field at the specified offset. if (llvm::StructType *STy = dyn_cast(IRType)) { const llvm::StructLayout *SL = TD.getStructLayout(STy); unsigned Elt = SL->getElementContainingOffset(IROffset); IROffset -= SL->getElementOffset(Elt); return ContainsFloatAtOffset(STy->getElementType(Elt), IROffset, TD); } // If this is an array, recurse into the field at the specified offset. if (llvm::ArrayType *ATy = dyn_cast(IRType)) { llvm::Type *EltTy = ATy->getElementType(); unsigned EltSize = TD.getTypeAllocSize(EltTy); IROffset -= IROffset/EltSize*EltSize; return ContainsFloatAtOffset(EltTy, IROffset, TD); } return false; } /// GetSSETypeAtOffset - Return a type that will be passed by the backend in the /// low 8 bytes of an XMM register, corresponding to the SSE class. llvm::Type *X86_64ABIInfo:: GetSSETypeAtOffset(llvm::Type *IRType, unsigned IROffset, QualType SourceTy, unsigned SourceOffset) const { // The only three choices we have are either double, <2 x float>, or float. We // pass as float if the last 4 bytes is just padding. This happens for // structs that contain 3 floats. if (BitsContainNoUserData(SourceTy, SourceOffset*8+32, SourceOffset*8+64, getContext())) return llvm::Type::getFloatTy(getVMContext()); // We want to pass as <2 x float> if the LLVM IR type contains a float at // offset+0 and offset+4. Walk the LLVM IR type to find out if this is the // case. if (ContainsFloatAtOffset(IRType, IROffset, getDataLayout()) && ContainsFloatAtOffset(IRType, IROffset+4, getDataLayout())) return llvm::FixedVectorType::get(llvm::Type::getFloatTy(getVMContext()), 2); return llvm::Type::getDoubleTy(getVMContext()); } /// GetINTEGERTypeAtOffset - The ABI specifies that a value should be passed in /// an 8-byte GPR. This means that we either have a scalar or we are talking /// about the high or low part of an up-to-16-byte struct. This routine picks /// the best LLVM IR type to represent this, which may be i64 or may be anything /// else that the backend will pass in a GPR that works better (e.g. i8, %foo*, /// etc). /// /// PrefType is an LLVM IR type that corresponds to (part of) the IR type for /// the source type. IROffset is an offset in bytes into the LLVM IR type that /// the 8-byte value references. PrefType may be null. /// /// SourceTy is the source-level type for the entire argument. SourceOffset is /// an offset into this that we're processing (which is always either 0 or 8). /// llvm::Type *X86_64ABIInfo:: GetINTEGERTypeAtOffset(llvm::Type *IRType, unsigned IROffset, QualType SourceTy, unsigned SourceOffset) const { // If we're dealing with an un-offset LLVM IR type, then it means that we're // returning an 8-byte unit starting with it. See if we can safely use it. if (IROffset == 0) { // Pointers and int64's always fill the 8-byte unit. if ((isa(IRType) && Has64BitPointers) || IRType->isIntegerTy(64)) return IRType; // If we have a 1/2/4-byte integer, we can use it only if the rest of the // goodness in the source type is just tail padding. This is allowed to // kick in for struct {double,int} on the int, but not on // struct{double,int,int} because we wouldn't return the second int. We // have to do this analysis on the source type because we can't depend on // unions being lowered a specific way etc. if (IRType->isIntegerTy(8) || IRType->isIntegerTy(16) || IRType->isIntegerTy(32) || (isa(IRType) && !Has64BitPointers)) { unsigned BitWidth = isa(IRType) ? 32 : cast(IRType)->getBitWidth(); if (BitsContainNoUserData(SourceTy, SourceOffset*8+BitWidth, SourceOffset*8+64, getContext())) return IRType; } } if (llvm::StructType *STy = dyn_cast(IRType)) { // If this is a struct, recurse into the field at the specified offset. const llvm::StructLayout *SL = getDataLayout().getStructLayout(STy); if (IROffset < SL->getSizeInBytes()) { unsigned FieldIdx = SL->getElementContainingOffset(IROffset); IROffset -= SL->getElementOffset(FieldIdx); return GetINTEGERTypeAtOffset(STy->getElementType(FieldIdx), IROffset, SourceTy, SourceOffset); } } if (llvm::ArrayType *ATy = dyn_cast(IRType)) { llvm::Type *EltTy = ATy->getElementType(); unsigned EltSize = getDataLayout().getTypeAllocSize(EltTy); unsigned EltOffset = IROffset/EltSize*EltSize; return GetINTEGERTypeAtOffset(EltTy, IROffset-EltOffset, SourceTy, SourceOffset); } // Okay, we don't have any better idea of what to pass, so we pass this in an // integer register that isn't too big to fit the rest of the struct. unsigned TySizeInBytes = (unsigned)getContext().getTypeSizeInChars(SourceTy).getQuantity(); assert(TySizeInBytes != SourceOffset && "Empty field?"); // It is always safe to classify this as an integer type up to i64 that // isn't larger than the structure. return llvm::IntegerType::get(getVMContext(), std::min(TySizeInBytes-SourceOffset, 8U)*8); } /// GetX86_64ByValArgumentPair - Given a high and low type that can ideally /// be used as elements of a two register pair to pass or return, return a /// first class aggregate to represent them. For example, if the low part of /// a by-value argument should be passed as i32* and the high part as float, /// return {i32*, float}. static llvm::Type * GetX86_64ByValArgumentPair(llvm::Type *Lo, llvm::Type *Hi, const llvm::DataLayout &TD) { // In order to correctly satisfy the ABI, we need to the high part to start // at offset 8. If the high and low parts we inferred are both 4-byte types // (e.g. i32 and i32) then the resultant struct type ({i32,i32}) won't have // the second element at offset 8. Check for this: unsigned LoSize = (unsigned)TD.getTypeAllocSize(Lo); unsigned HiAlign = TD.getABITypeAlignment(Hi); unsigned HiStart = llvm::alignTo(LoSize, HiAlign); assert(HiStart != 0 && HiStart <= 8 && "Invalid x86-64 argument pair!"); // To handle this, we have to increase the size of the low part so that the // second element will start at an 8 byte offset. We can't increase the size // of the second element because it might make us access off the end of the // struct. if (HiStart != 8) { // There are usually two sorts of types the ABI generation code can produce // for the low part of a pair that aren't 8 bytes in size: float or // i8/i16/i32. This can also include pointers when they are 32-bit (X32 and // NaCl). // Promote these to a larger type. if (Lo->isFloatTy()) Lo = llvm::Type::getDoubleTy(Lo->getContext()); else { assert((Lo->isIntegerTy() || Lo->isPointerTy()) && "Invalid/unknown lo type"); Lo = llvm::Type::getInt64Ty(Lo->getContext()); } } llvm::StructType *Result = llvm::StructType::get(Lo, Hi); // Verify that the second element is at an 8-byte offset. assert(TD.getStructLayout(Result)->getElementOffset(1) == 8 && "Invalid x86-64 argument pair!"); return Result; } ABIArgInfo X86_64ABIInfo:: classifyReturnType(QualType RetTy) const { // AMD64-ABI 3.2.3p4: Rule 1. Classify the return type with the // classification algorithm. X86_64ABIInfo::Class Lo, Hi; classify(RetTy, 0, Lo, Hi, /*isNamedArg*/ true); // Check some invariants. assert((Hi != Memory || Lo == Memory) && "Invalid memory classification."); assert((Hi != SSEUp || Lo == SSE) && "Invalid SSEUp classification."); llvm::Type *ResType = nullptr; switch (Lo) { case NoClass: if (Hi == NoClass) return ABIArgInfo::getIgnore(); // If the low part is just padding, it takes no register, leave ResType // null. assert((Hi == SSE || Hi == Integer || Hi == X87Up) && "Unknown missing lo part"); break; case SSEUp: case X87Up: llvm_unreachable("Invalid classification for lo word."); // AMD64-ABI 3.2.3p4: Rule 2. Types of class memory are returned via // hidden argument. case Memory: return getIndirectReturnResult(RetTy); // AMD64-ABI 3.2.3p4: Rule 3. If the class is INTEGER, the next // available register of the sequence %rax, %rdx is used. case Integer: ResType = GetINTEGERTypeAtOffset(CGT.ConvertType(RetTy), 0, RetTy, 0); // If we have a sign or zero extended integer, make sure to return Extend // so that the parameter gets the right LLVM IR attributes. if (Hi == NoClass && isa(ResType)) { // Treat an enum type as its underlying type. if (const EnumType *EnumTy = RetTy->getAs()) RetTy = EnumTy->getDecl()->getIntegerType(); if (RetTy->isIntegralOrEnumerationType() && isPromotableIntegerTypeForABI(RetTy)) return ABIArgInfo::getExtend(RetTy); } break; // AMD64-ABI 3.2.3p4: Rule 4. If the class is SSE, the next // available SSE register of the sequence %xmm0, %xmm1 is used. case SSE: ResType = GetSSETypeAtOffset(CGT.ConvertType(RetTy), 0, RetTy, 0); break; // AMD64-ABI 3.2.3p4: Rule 6. If the class is X87, the value is // returned on the X87 stack in %st0 as 80-bit x87 number. case X87: ResType = llvm::Type::getX86_FP80Ty(getVMContext()); break; // AMD64-ABI 3.2.3p4: Rule 8. If the class is COMPLEX_X87, the real // part of the value is returned in %st0 and the imaginary part in // %st1. case ComplexX87: assert(Hi == ComplexX87 && "Unexpected ComplexX87 classification."); ResType = llvm::StructType::get(llvm::Type::getX86_FP80Ty(getVMContext()), llvm::Type::getX86_FP80Ty(getVMContext())); break; } llvm::Type *HighPart = nullptr; switch (Hi) { // Memory was handled previously and X87 should // never occur as a hi class. case Memory: case X87: llvm_unreachable("Invalid classification for hi word."); case ComplexX87: // Previously handled. case NoClass: break; case Integer: HighPart = GetINTEGERTypeAtOffset(CGT.ConvertType(RetTy), 8, RetTy, 8); if (Lo == NoClass) // Return HighPart at offset 8 in memory. return ABIArgInfo::getDirect(HighPart, 8); break; case SSE: HighPart = GetSSETypeAtOffset(CGT.ConvertType(RetTy), 8, RetTy, 8); if (Lo == NoClass) // Return HighPart at offset 8 in memory. return ABIArgInfo::getDirect(HighPart, 8); break; // AMD64-ABI 3.2.3p4: Rule 5. If the class is SSEUP, the eightbyte // is passed in the next available eightbyte chunk if the last used // vector register. // // SSEUP should always be preceded by SSE, just widen. case SSEUp: assert(Lo == SSE && "Unexpected SSEUp classification."); ResType = GetByteVectorType(RetTy); break; // AMD64-ABI 3.2.3p4: Rule 7. If the class is X87UP, the value is // returned together with the previous X87 value in %st0. case X87Up: // If X87Up is preceded by X87, we don't need to do // anything. However, in some cases with unions it may not be // preceded by X87. In such situations we follow gcc and pass the // extra bits in an SSE reg. if (Lo != X87) { HighPart = GetSSETypeAtOffset(CGT.ConvertType(RetTy), 8, RetTy, 8); if (Lo == NoClass) // Return HighPart at offset 8 in memory. return ABIArgInfo::getDirect(HighPart, 8); } break; } // If a high part was specified, merge it together with the low part. It is // known to pass in the high eightbyte of the result. We do this by forming a // first class struct aggregate with the high and low part: {low, high} if (HighPart) ResType = GetX86_64ByValArgumentPair(ResType, HighPart, getDataLayout()); return ABIArgInfo::getDirect(ResType); } ABIArgInfo X86_64ABIInfo::classifyArgumentType( QualType Ty, unsigned freeIntRegs, unsigned &neededInt, unsigned &neededSSE, bool isNamedArg) const { Ty = useFirstFieldIfTransparentUnion(Ty); X86_64ABIInfo::Class Lo, Hi; classify(Ty, 0, Lo, Hi, isNamedArg); // Check some invariants. // FIXME: Enforce these by construction. assert((Hi != Memory || Lo == Memory) && "Invalid memory classification."); assert((Hi != SSEUp || Lo == SSE) && "Invalid SSEUp classification."); neededInt = 0; neededSSE = 0; llvm::Type *ResType = nullptr; switch (Lo) { case NoClass: if (Hi == NoClass) return ABIArgInfo::getIgnore(); // If the low part is just padding, it takes no register, leave ResType // null. assert((Hi == SSE || Hi == Integer || Hi == X87Up) && "Unknown missing lo part"); break; // AMD64-ABI 3.2.3p3: Rule 1. If the class is MEMORY, pass the argument // on the stack. case Memory: // AMD64-ABI 3.2.3p3: Rule 5. If the class is X87, X87UP or // COMPLEX_X87, it is passed in memory. case X87: case ComplexX87: if (getRecordArgABI(Ty, getCXXABI()) == CGCXXABI::RAA_Indirect) ++neededInt; return getIndirectResult(Ty, freeIntRegs); case SSEUp: case X87Up: llvm_unreachable("Invalid classification for lo word."); // AMD64-ABI 3.2.3p3: Rule 2. If the class is INTEGER, the next // available register of the sequence %rdi, %rsi, %rdx, %rcx, %r8 // and %r9 is used. case Integer: ++neededInt; // Pick an 8-byte type based on the preferred type. ResType = GetINTEGERTypeAtOffset(CGT.ConvertType(Ty), 0, Ty, 0); // If we have a sign or zero extended integer, make sure to return Extend // so that the parameter gets the right LLVM IR attributes. if (Hi == NoClass && isa(ResType)) { // Treat an enum type as its underlying type. if (const EnumType *EnumTy = Ty->getAs()) Ty = EnumTy->getDecl()->getIntegerType(); if (Ty->isIntegralOrEnumerationType() && isPromotableIntegerTypeForABI(Ty)) return ABIArgInfo::getExtend(Ty); } break; // AMD64-ABI 3.2.3p3: Rule 3. If the class is SSE, the next // available SSE register is used, the registers are taken in the // order from %xmm0 to %xmm7. case SSE: { llvm::Type *IRType = CGT.ConvertType(Ty); ResType = GetSSETypeAtOffset(IRType, 0, Ty, 0); ++neededSSE; break; } } llvm::Type *HighPart = nullptr; switch (Hi) { // Memory was handled previously, ComplexX87 and X87 should // never occur as hi classes, and X87Up must be preceded by X87, // which is passed in memory. case Memory: case X87: case ComplexX87: llvm_unreachable("Invalid classification for hi word."); case NoClass: break; case Integer: ++neededInt; // Pick an 8-byte type based on the preferred type. HighPart = GetINTEGERTypeAtOffset(CGT.ConvertType(Ty), 8, Ty, 8); if (Lo == NoClass) // Pass HighPart at offset 8 in memory. return ABIArgInfo::getDirect(HighPart, 8); break; // X87Up generally doesn't occur here (long double is passed in // memory), except in situations involving unions. case X87Up: case SSE: HighPart = GetSSETypeAtOffset(CGT.ConvertType(Ty), 8, Ty, 8); if (Lo == NoClass) // Pass HighPart at offset 8 in memory. return ABIArgInfo::getDirect(HighPart, 8); ++neededSSE; break; // AMD64-ABI 3.2.3p3: Rule 4. If the class is SSEUP, the // eightbyte is passed in the upper half of the last used SSE // register. This only happens when 128-bit vectors are passed. case SSEUp: assert(Lo == SSE && "Unexpected SSEUp classification"); ResType = GetByteVectorType(Ty); break; } // If a high part was specified, merge it together with the low part. It is // known to pass in the high eightbyte of the result. We do this by forming a // first class struct aggregate with the high and low part: {low, high} if (HighPart) ResType = GetX86_64ByValArgumentPair(ResType, HighPart, getDataLayout()); return ABIArgInfo::getDirect(ResType); } ABIArgInfo X86_64ABIInfo::classifyRegCallStructTypeImpl(QualType Ty, unsigned &NeededInt, unsigned &NeededSSE) const { auto RT = Ty->getAs(); assert(RT && "classifyRegCallStructType only valid with struct types"); if (RT->getDecl()->hasFlexibleArrayMember()) return getIndirectReturnResult(Ty); // Sum up bases if (auto CXXRD = dyn_cast(RT->getDecl())) { if (CXXRD->isDynamicClass()) { NeededInt = NeededSSE = 0; return getIndirectReturnResult(Ty); } for (const auto &I : CXXRD->bases()) if (classifyRegCallStructTypeImpl(I.getType(), NeededInt, NeededSSE) .isIndirect()) { NeededInt = NeededSSE = 0; return getIndirectReturnResult(Ty); } } // Sum up members for (const auto *FD : RT->getDecl()->fields()) { if (FD->getType()->isRecordType() && !FD->getType()->isUnionType()) { if (classifyRegCallStructTypeImpl(FD->getType(), NeededInt, NeededSSE) .isIndirect()) { NeededInt = NeededSSE = 0; return getIndirectReturnResult(Ty); } } else { unsigned LocalNeededInt, LocalNeededSSE; if (classifyArgumentType(FD->getType(), UINT_MAX, LocalNeededInt, LocalNeededSSE, true) .isIndirect()) { NeededInt = NeededSSE = 0; return getIndirectReturnResult(Ty); } NeededInt += LocalNeededInt; NeededSSE += LocalNeededSSE; } } return ABIArgInfo::getDirect(); } ABIArgInfo X86_64ABIInfo::classifyRegCallStructType(QualType Ty, unsigned &NeededInt, unsigned &NeededSSE) const { NeededInt = 0; NeededSSE = 0; return classifyRegCallStructTypeImpl(Ty, NeededInt, NeededSSE); } void X86_64ABIInfo::computeInfo(CGFunctionInfo &FI) const { const unsigned CallingConv = FI.getCallingConvention(); // It is possible to force Win64 calling convention on any x86_64 target by // using __attribute__((ms_abi)). In such case to correctly emit Win64 // compatible code delegate this call to WinX86_64ABIInfo::computeInfo. if (CallingConv == llvm::CallingConv::Win64) { WinX86_64ABIInfo Win64ABIInfo(CGT, AVXLevel); Win64ABIInfo.computeInfo(FI); return; } bool IsRegCall = CallingConv == llvm::CallingConv::X86_RegCall; // Keep track of the number of assigned registers. unsigned FreeIntRegs = IsRegCall ? 11 : 6; unsigned FreeSSERegs = IsRegCall ? 16 : 8; unsigned NeededInt, NeededSSE; if (!::classifyReturnType(getCXXABI(), FI, *this)) { if (IsRegCall && FI.getReturnType()->getTypePtr()->isRecordType() && !FI.getReturnType()->getTypePtr()->isUnionType()) { FI.getReturnInfo() = classifyRegCallStructType(FI.getReturnType(), NeededInt, NeededSSE); if (FreeIntRegs >= NeededInt && FreeSSERegs >= NeededSSE) { FreeIntRegs -= NeededInt; FreeSSERegs -= NeededSSE; } else { FI.getReturnInfo() = getIndirectReturnResult(FI.getReturnType()); } } else if (IsRegCall && FI.getReturnType()->getAs() && getContext().getCanonicalType(FI.getReturnType() ->getAs() ->getElementType()) == getContext().LongDoubleTy) // Complex Long Double Type is passed in Memory when Regcall // calling convention is used. FI.getReturnInfo() = getIndirectReturnResult(FI.getReturnType()); else FI.getReturnInfo() = classifyReturnType(FI.getReturnType()); } // If the return value is indirect, then the hidden argument is consuming one // integer register. if (FI.getReturnInfo().isIndirect()) --FreeIntRegs; // The chain argument effectively gives us another free register. if (FI.isChainCall()) ++FreeIntRegs; unsigned NumRequiredArgs = FI.getNumRequiredArgs(); // AMD64-ABI 3.2.3p3: Once arguments are classified, the registers // get assigned (in left-to-right order) for passing as follows... unsigned ArgNo = 0; for (CGFunctionInfo::arg_iterator it = FI.arg_begin(), ie = FI.arg_end(); it != ie; ++it, ++ArgNo) { bool IsNamedArg = ArgNo < NumRequiredArgs; if (IsRegCall && it->type->isStructureOrClassType()) it->info = classifyRegCallStructType(it->type, NeededInt, NeededSSE); else it->info = classifyArgumentType(it->type, FreeIntRegs, NeededInt, NeededSSE, IsNamedArg); // AMD64-ABI 3.2.3p3: If there are no registers available for any // eightbyte of an argument, the whole argument is passed on the // stack. If registers have already been assigned for some // eightbytes of such an argument, the assignments get reverted. if (FreeIntRegs >= NeededInt && FreeSSERegs >= NeededSSE) { FreeIntRegs -= NeededInt; FreeSSERegs -= NeededSSE; } else { it->info = getIndirectResult(it->type, FreeIntRegs); } } } static Address EmitX86_64VAArgFromMemory(CodeGenFunction &CGF, Address VAListAddr, QualType Ty) { Address overflow_arg_area_p = CGF.Builder.CreateStructGEP(VAListAddr, 2, "overflow_arg_area_p"); llvm::Value *overflow_arg_area = CGF.Builder.CreateLoad(overflow_arg_area_p, "overflow_arg_area"); // AMD64-ABI 3.5.7p5: Step 7. Align l->overflow_arg_area upwards to a 16 // byte boundary if alignment needed by type exceeds 8 byte boundary. // It isn't stated explicitly in the standard, but in practice we use // alignment greater than 16 where necessary. CharUnits Align = CGF.getContext().getTypeAlignInChars(Ty); if (Align > CharUnits::fromQuantity(8)) { overflow_arg_area = emitRoundPointerUpToAlignment(CGF, overflow_arg_area, Align); } // AMD64-ABI 3.5.7p5: Step 8. Fetch type from l->overflow_arg_area. llvm::Type *LTy = CGF.ConvertTypeForMem(Ty); llvm::Value *Res = CGF.Builder.CreateBitCast(overflow_arg_area, llvm::PointerType::getUnqual(LTy)); // AMD64-ABI 3.5.7p5: Step 9. Set l->overflow_arg_area to: // l->overflow_arg_area + sizeof(type). // AMD64-ABI 3.5.7p5: Step 10. Align l->overflow_arg_area upwards to // an 8 byte boundary. uint64_t SizeInBytes = (CGF.getContext().getTypeSize(Ty) + 7) / 8; llvm::Value *Offset = llvm::ConstantInt::get(CGF.Int32Ty, (SizeInBytes + 7) & ~7); overflow_arg_area = CGF.Builder.CreateGEP(overflow_arg_area, Offset, "overflow_arg_area.next"); CGF.Builder.CreateStore(overflow_arg_area, overflow_arg_area_p); // AMD64-ABI 3.5.7p5: Step 11. Return the fetched type. return Address(Res, Align); } Address X86_64ABIInfo::EmitVAArg(CodeGenFunction &CGF, Address VAListAddr, QualType Ty) const { // Assume that va_list type is correct; should be pointer to LLVM type: // struct { // i32 gp_offset; // i32 fp_offset; // i8* overflow_arg_area; // i8* reg_save_area; // }; unsigned neededInt, neededSSE; Ty = getContext().getCanonicalType(Ty); ABIArgInfo AI = classifyArgumentType(Ty, 0, neededInt, neededSSE, /*isNamedArg*/false); // AMD64-ABI 3.5.7p5: Step 1. Determine whether type may be passed // in the registers. If not go to step 7. if (!neededInt && !neededSSE) return EmitX86_64VAArgFromMemory(CGF, VAListAddr, Ty); // AMD64-ABI 3.5.7p5: Step 2. Compute num_gp to hold the number of // general purpose registers needed to pass type and num_fp to hold // the number of floating point registers needed. // AMD64-ABI 3.5.7p5: Step 3. Verify whether arguments fit into // registers. In the case: l->gp_offset > 48 - num_gp * 8 or // l->fp_offset > 304 - num_fp * 16 go to step 7. // // NOTE: 304 is a typo, there are (6 * 8 + 8 * 16) = 176 bytes of // register save space). llvm::Value *InRegs = nullptr; Address gp_offset_p = Address::invalid(), fp_offset_p = Address::invalid(); llvm::Value *gp_offset = nullptr, *fp_offset = nullptr; if (neededInt) { gp_offset_p = CGF.Builder.CreateStructGEP(VAListAddr, 0, "gp_offset_p"); gp_offset = CGF.Builder.CreateLoad(gp_offset_p, "gp_offset"); InRegs = llvm::ConstantInt::get(CGF.Int32Ty, 48 - neededInt * 8); InRegs = CGF.Builder.CreateICmpULE(gp_offset, InRegs, "fits_in_gp"); } if (neededSSE) { fp_offset_p = CGF.Builder.CreateStructGEP(VAListAddr, 1, "fp_offset_p"); fp_offset = CGF.Builder.CreateLoad(fp_offset_p, "fp_offset"); llvm::Value *FitsInFP = llvm::ConstantInt::get(CGF.Int32Ty, 176 - neededSSE * 16); FitsInFP = CGF.Builder.CreateICmpULE(fp_offset, FitsInFP, "fits_in_fp"); InRegs = InRegs ? CGF.Builder.CreateAnd(InRegs, FitsInFP) : FitsInFP; } llvm::BasicBlock *InRegBlock = CGF.createBasicBlock("vaarg.in_reg"); llvm::BasicBlock *InMemBlock = CGF.createBasicBlock("vaarg.in_mem"); llvm::BasicBlock *ContBlock = CGF.createBasicBlock("vaarg.end"); CGF.Builder.CreateCondBr(InRegs, InRegBlock, InMemBlock); // Emit code to load the value if it was passed in registers. CGF.EmitBlock(InRegBlock); // AMD64-ABI 3.5.7p5: Step 4. Fetch type from l->reg_save_area with // an offset of l->gp_offset and/or l->fp_offset. This may require // copying to a temporary location in case the parameter is passed // in different register classes or requires an alignment greater // than 8 for general purpose registers and 16 for XMM registers. // // FIXME: This really results in shameful code when we end up needing to // collect arguments from different places; often what should result in a // simple assembling of a structure from scattered addresses has many more // loads than necessary. Can we clean this up? llvm::Type *LTy = CGF.ConvertTypeForMem(Ty); llvm::Value *RegSaveArea = CGF.Builder.CreateLoad( CGF.Builder.CreateStructGEP(VAListAddr, 3), "reg_save_area"); Address RegAddr = Address::invalid(); if (neededInt && neededSSE) { // FIXME: Cleanup. assert(AI.isDirect() && "Unexpected ABI info for mixed regs"); llvm::StructType *ST = cast(AI.getCoerceToType()); Address Tmp = CGF.CreateMemTemp(Ty); Tmp = CGF.Builder.CreateElementBitCast(Tmp, ST); assert(ST->getNumElements() == 2 && "Unexpected ABI info for mixed regs"); llvm::Type *TyLo = ST->getElementType(0); llvm::Type *TyHi = ST->getElementType(1); assert((TyLo->isFPOrFPVectorTy() ^ TyHi->isFPOrFPVectorTy()) && "Unexpected ABI info for mixed regs"); llvm::Type *PTyLo = llvm::PointerType::getUnqual(TyLo); llvm::Type *PTyHi = llvm::PointerType::getUnqual(TyHi); llvm::Value *GPAddr = CGF.Builder.CreateGEP(RegSaveArea, gp_offset); llvm::Value *FPAddr = CGF.Builder.CreateGEP(RegSaveArea, fp_offset); llvm::Value *RegLoAddr = TyLo->isFPOrFPVectorTy() ? FPAddr : GPAddr; llvm::Value *RegHiAddr = TyLo->isFPOrFPVectorTy() ? GPAddr : FPAddr; // Copy the first element. // FIXME: Our choice of alignment here and below is probably pessimistic. llvm::Value *V = CGF.Builder.CreateAlignedLoad( TyLo, CGF.Builder.CreateBitCast(RegLoAddr, PTyLo), CharUnits::fromQuantity(getDataLayout().getABITypeAlignment(TyLo))); CGF.Builder.CreateStore(V, CGF.Builder.CreateStructGEP(Tmp, 0)); // Copy the second element. V = CGF.Builder.CreateAlignedLoad( TyHi, CGF.Builder.CreateBitCast(RegHiAddr, PTyHi), CharUnits::fromQuantity(getDataLayout().getABITypeAlignment(TyHi))); CGF.Builder.CreateStore(V, CGF.Builder.CreateStructGEP(Tmp, 1)); RegAddr = CGF.Builder.CreateElementBitCast(Tmp, LTy); } else if (neededInt) { RegAddr = Address(CGF.Builder.CreateGEP(RegSaveArea, gp_offset), CharUnits::fromQuantity(8)); RegAddr = CGF.Builder.CreateElementBitCast(RegAddr, LTy); // Copy to a temporary if necessary to ensure the appropriate alignment. std::pair SizeAlign = getContext().getTypeInfoInChars(Ty); uint64_t TySize = SizeAlign.first.getQuantity(); CharUnits TyAlign = SizeAlign.second; // Copy into a temporary if the type is more aligned than the // register save area. if (TyAlign.getQuantity() > 8) { Address Tmp = CGF.CreateMemTemp(Ty); CGF.Builder.CreateMemCpy(Tmp, RegAddr, TySize, false); RegAddr = Tmp; } } else if (neededSSE == 1) { RegAddr = Address(CGF.Builder.CreateGEP(RegSaveArea, fp_offset), CharUnits::fromQuantity(16)); RegAddr = CGF.Builder.CreateElementBitCast(RegAddr, LTy); } else { assert(neededSSE == 2 && "Invalid number of needed registers!"); // SSE registers are spaced 16 bytes apart in the register save // area, we need to collect the two eightbytes together. // The ABI isn't explicit about this, but it seems reasonable // to assume that the slots are 16-byte aligned, since the stack is // naturally 16-byte aligned and the prologue is expected to store // all the SSE registers to the RSA. Address RegAddrLo = Address(CGF.Builder.CreateGEP(RegSaveArea, fp_offset), CharUnits::fromQuantity(16)); Address RegAddrHi = CGF.Builder.CreateConstInBoundsByteGEP(RegAddrLo, CharUnits::fromQuantity(16)); llvm::Type *ST = AI.canHaveCoerceToType() ? AI.getCoerceToType() : llvm::StructType::get(CGF.DoubleTy, CGF.DoubleTy); llvm::Value *V; Address Tmp = CGF.CreateMemTemp(Ty); Tmp = CGF.Builder.CreateElementBitCast(Tmp, ST); V = CGF.Builder.CreateLoad(CGF.Builder.CreateElementBitCast( RegAddrLo, ST->getStructElementType(0))); CGF.Builder.CreateStore(V, CGF.Builder.CreateStructGEP(Tmp, 0)); V = CGF.Builder.CreateLoad(CGF.Builder.CreateElementBitCast( RegAddrHi, ST->getStructElementType(1))); CGF.Builder.CreateStore(V, CGF.Builder.CreateStructGEP(Tmp, 1)); RegAddr = CGF.Builder.CreateElementBitCast(Tmp, LTy); } // AMD64-ABI 3.5.7p5: Step 5. Set: // l->gp_offset = l->gp_offset + num_gp * 8 // l->fp_offset = l->fp_offset + num_fp * 16. if (neededInt) { llvm::Value *Offset = llvm::ConstantInt::get(CGF.Int32Ty, neededInt * 8); CGF.Builder.CreateStore(CGF.Builder.CreateAdd(gp_offset, Offset), gp_offset_p); } if (neededSSE) { llvm::Value *Offset = llvm::ConstantInt::get(CGF.Int32Ty, neededSSE * 16); CGF.Builder.CreateStore(CGF.Builder.CreateAdd(fp_offset, Offset), fp_offset_p); } CGF.EmitBranch(ContBlock); // Emit code to load the value if it was passed in memory. CGF.EmitBlock(InMemBlock); Address MemAddr = EmitX86_64VAArgFromMemory(CGF, VAListAddr, Ty); // Return the appropriate result. CGF.EmitBlock(ContBlock); Address ResAddr = emitMergePHI(CGF, RegAddr, InRegBlock, MemAddr, InMemBlock, "vaarg.addr"); return ResAddr; } Address X86_64ABIInfo::EmitMSVAArg(CodeGenFunction &CGF, Address VAListAddr, QualType Ty) const { return emitVoidPtrVAArg(CGF, VAListAddr, Ty, /*indirect*/ false, CGF.getContext().getTypeInfoInChars(Ty), CharUnits::fromQuantity(8), /*allowHigherAlign*/ false); } ABIArgInfo WinX86_64ABIInfo::reclassifyHvaArgType(QualType Ty, unsigned &FreeSSERegs, const ABIArgInfo ¤t) const { // Assumes vectorCall calling convention. const Type *Base = nullptr; uint64_t NumElts = 0; if (!Ty->isBuiltinType() && !Ty->isVectorType() && isHomogeneousAggregate(Ty, Base, NumElts) && FreeSSERegs >= NumElts) { FreeSSERegs -= NumElts; return getDirectX86Hva(); } return current; } ABIArgInfo WinX86_64ABIInfo::classify(QualType Ty, unsigned &FreeSSERegs, bool IsReturnType, bool IsVectorCall, bool IsRegCall) const { if (Ty->isVoidType()) return ABIArgInfo::getIgnore(); if (const EnumType *EnumTy = Ty->getAs()) Ty = EnumTy->getDecl()->getIntegerType(); TypeInfo Info = getContext().getTypeInfo(Ty); uint64_t Width = Info.Width; CharUnits Align = getContext().toCharUnitsFromBits(Info.Align); const RecordType *RT = Ty->getAs(); if (RT) { if (!IsReturnType) { if (CGCXXABI::RecordArgABI RAA = getRecordArgABI(RT, getCXXABI())) return getNaturalAlignIndirect(Ty, RAA == CGCXXABI::RAA_DirectInMemory); } if (RT->getDecl()->hasFlexibleArrayMember()) return getNaturalAlignIndirect(Ty, /*ByVal=*/false); } const Type *Base = nullptr; uint64_t NumElts = 0; // vectorcall adds the concept of a homogenous vector aggregate, similar to // other targets. if ((IsVectorCall || IsRegCall) && isHomogeneousAggregate(Ty, Base, NumElts)) { if (IsRegCall) { if (FreeSSERegs >= NumElts) { FreeSSERegs -= NumElts; if (IsReturnType || Ty->isBuiltinType() || Ty->isVectorType()) return ABIArgInfo::getDirect(); return ABIArgInfo::getExpand(); } return ABIArgInfo::getIndirect(Align, /*ByVal=*/false); } else if (IsVectorCall) { if (FreeSSERegs >= NumElts && (IsReturnType || Ty->isBuiltinType() || Ty->isVectorType())) { FreeSSERegs -= NumElts; return ABIArgInfo::getDirect(); } else if (IsReturnType) { return ABIArgInfo::getExpand(); } else if (!Ty->isBuiltinType() && !Ty->isVectorType()) { // HVAs are delayed and reclassified in the 2nd step. return ABIArgInfo::getIndirect(Align, /*ByVal=*/false); } } } if (Ty->isMemberPointerType()) { // If the member pointer is represented by an LLVM int or ptr, pass it // directly. llvm::Type *LLTy = CGT.ConvertType(Ty); if (LLTy->isPointerTy() || LLTy->isIntegerTy()) return ABIArgInfo::getDirect(); } if (RT || Ty->isAnyComplexType() || Ty->isMemberPointerType()) { // MS x64 ABI requirement: "Any argument that doesn't fit in 8 bytes, or is // not 1, 2, 4, or 8 bytes, must be passed by reference." if (Width > 64 || !llvm::isPowerOf2_64(Width)) return getNaturalAlignIndirect(Ty, /*ByVal=*/false); // Otherwise, coerce it to a small integer. return ABIArgInfo::getDirect(llvm::IntegerType::get(getVMContext(), Width)); } if (const BuiltinType *BT = Ty->getAs()) { switch (BT->getKind()) { case BuiltinType::Bool: // Bool type is always extended to the ABI, other builtin types are not // extended. return ABIArgInfo::getExtend(Ty); case BuiltinType::LongDouble: // Mingw64 GCC uses the old 80 bit extended precision floating point // unit. It passes them indirectly through memory. if (IsMingw64) { const llvm::fltSemantics *LDF = &getTarget().getLongDoubleFormat(); if (LDF == &llvm::APFloat::x87DoubleExtended()) return ABIArgInfo::getIndirect(Align, /*ByVal=*/false); } break; case BuiltinType::Int128: case BuiltinType::UInt128: // If it's a parameter type, the normal ABI rule is that arguments larger // than 8 bytes are passed indirectly. GCC follows it. We follow it too, // even though it isn't particularly efficient. if (!IsReturnType) return ABIArgInfo::getIndirect(Align, /*ByVal=*/false); // Mingw64 GCC returns i128 in XMM0. Coerce to v2i64 to handle that. // Clang matches them for compatibility. return ABIArgInfo::getDirect(llvm::FixedVectorType::get( llvm::Type::getInt64Ty(getVMContext()), 2)); default: break; } } if (Ty->isExtIntType()) { // MS x64 ABI requirement: "Any argument that doesn't fit in 8 bytes, or is // not 1, 2, 4, or 8 bytes, must be passed by reference." // However, non-power-of-two _ExtInts will be passed as 1,2,4 or 8 bytes // anyway as long is it fits in them, so we don't have to check the power of // 2. if (Width <= 64) return ABIArgInfo::getDirect(); return ABIArgInfo::getIndirect(Align, /*ByVal=*/false); } return ABIArgInfo::getDirect(); } void WinX86_64ABIInfo::computeVectorCallArgs(CGFunctionInfo &FI, unsigned FreeSSERegs, bool IsVectorCall, bool IsRegCall) const { unsigned Count = 0; for (auto &I : FI.arguments()) { // Vectorcall in x64 only permits the first 6 arguments to be passed // as XMM/YMM registers. if (Count < VectorcallMaxParamNumAsReg) I.info = classify(I.type, FreeSSERegs, false, IsVectorCall, IsRegCall); else { // Since these cannot be passed in registers, pretend no registers // are left. unsigned ZeroSSERegsAvail = 0; I.info = classify(I.type, /*FreeSSERegs=*/ZeroSSERegsAvail, false, IsVectorCall, IsRegCall); } ++Count; } for (auto &I : FI.arguments()) { I.info = reclassifyHvaArgType(I.type, FreeSSERegs, I.info); } } void WinX86_64ABIInfo::computeInfo(CGFunctionInfo &FI) const { const unsigned CC = FI.getCallingConvention(); bool IsVectorCall = CC == llvm::CallingConv::X86_VectorCall; bool IsRegCall = CC == llvm::CallingConv::X86_RegCall; // If __attribute__((sysv_abi)) is in use, use the SysV argument // classification rules. if (CC == llvm::CallingConv::X86_64_SysV) { X86_64ABIInfo SysVABIInfo(CGT, AVXLevel); SysVABIInfo.computeInfo(FI); return; } unsigned FreeSSERegs = 0; if (IsVectorCall) { // We can use up to 4 SSE return registers with vectorcall. FreeSSERegs = 4; } else if (IsRegCall) { // RegCall gives us 16 SSE registers. FreeSSERegs = 16; } if (!getCXXABI().classifyReturnType(FI)) FI.getReturnInfo() = classify(FI.getReturnType(), FreeSSERegs, true, IsVectorCall, IsRegCall); if (IsVectorCall) { // We can use up to 6 SSE register parameters with vectorcall. FreeSSERegs = 6; } else if (IsRegCall) { // RegCall gives us 16 SSE registers, we can reuse the return registers. FreeSSERegs = 16; } if (IsVectorCall) { computeVectorCallArgs(FI, FreeSSERegs, IsVectorCall, IsRegCall); } else { for (auto &I : FI.arguments()) I.info = classify(I.type, FreeSSERegs, false, IsVectorCall, IsRegCall); } } Address WinX86_64ABIInfo::EmitVAArg(CodeGenFunction &CGF, Address VAListAddr, QualType Ty) const { bool IsIndirect = false; // MS x64 ABI requirement: "Any argument that doesn't fit in 8 bytes, or is // not 1, 2, 4, or 8 bytes, must be passed by reference." if (isAggregateTypeForABI(Ty) || Ty->isMemberPointerType()) { uint64_t Width = getContext().getTypeSize(Ty); IsIndirect = Width > 64 || !llvm::isPowerOf2_64(Width); } return emitVoidPtrVAArg(CGF, VAListAddr, Ty, IsIndirect, CGF.getContext().getTypeInfoInChars(Ty), CharUnits::fromQuantity(8), /*allowHigherAlign*/ false); } static bool PPC_initDwarfEHRegSizeTable(CodeGen::CodeGenFunction &CGF, llvm::Value *Address, bool Is64Bit, bool IsAIX) { // This is calculated from the LLVM and GCC tables and verified // against gcc output. AFAIK all PPC ABIs use the same encoding. CodeGen::CGBuilderTy &Builder = CGF.Builder; llvm::IntegerType *i8 = CGF.Int8Ty; llvm::Value *Four8 = llvm::ConstantInt::get(i8, 4); llvm::Value *Eight8 = llvm::ConstantInt::get(i8, 8); llvm::Value *Sixteen8 = llvm::ConstantInt::get(i8, 16); // 0-31: r0-31, the 4-byte or 8-byte general-purpose registers AssignToArrayRange(Builder, Address, Is64Bit ? Eight8 : Four8, 0, 31); // 32-63: fp0-31, the 8-byte floating-point registers AssignToArrayRange(Builder, Address, Eight8, 32, 63); // 64-67 are various 4-byte or 8-byte special-purpose registers: // 64: mq // 65: lr // 66: ctr // 67: ap AssignToArrayRange(Builder, Address, Is64Bit ? Eight8 : Four8, 64, 67); // 68-76 are various 4-byte special-purpose registers: // 68-75 cr0-7 // 76: xer AssignToArrayRange(Builder, Address, Four8, 68, 76); // 77-108: v0-31, the 16-byte vector registers AssignToArrayRange(Builder, Address, Sixteen8, 77, 108); // 109: vrsave // 110: vscr AssignToArrayRange(Builder, Address, Is64Bit ? Eight8 : Four8, 109, 110); // AIX does not utilize the rest of the registers. if (IsAIX) return false; // 111: spe_acc // 112: spefscr // 113: sfp AssignToArrayRange(Builder, Address, Is64Bit ? Eight8 : Four8, 111, 113); if (!Is64Bit) return false; // TODO: Need to verify if these registers are used on 64 bit AIX with Power8 // or above CPU. // 64-bit only registers: // 114: tfhar // 115: tfiar // 116: texasr AssignToArrayRange(Builder, Address, Eight8, 114, 116); return false; } // AIX namespace { /// AIXABIInfo - The AIX XCOFF ABI information. class AIXABIInfo : public ABIInfo { const bool Is64Bit; const unsigned PtrByteSize; CharUnits getParamTypeAlignment(QualType Ty) const; public: AIXABIInfo(CodeGen::CodeGenTypes &CGT, bool Is64Bit) : ABIInfo(CGT), Is64Bit(Is64Bit), PtrByteSize(Is64Bit ? 8 : 4) {} bool isPromotableTypeForABI(QualType Ty) const; ABIArgInfo classifyReturnType(QualType RetTy) const; ABIArgInfo classifyArgumentType(QualType Ty) const; void computeInfo(CGFunctionInfo &FI) const override { if (!getCXXABI().classifyReturnType(FI)) FI.getReturnInfo() = classifyReturnType(FI.getReturnType()); for (auto &I : FI.arguments()) I.info = classifyArgumentType(I.type); } Address EmitVAArg(CodeGenFunction &CGF, Address VAListAddr, QualType Ty) const override; }; class AIXTargetCodeGenInfo : public TargetCodeGenInfo { const bool Is64Bit; public: AIXTargetCodeGenInfo(CodeGen::CodeGenTypes &CGT, bool Is64Bit) : TargetCodeGenInfo(std::make_unique(CGT, Is64Bit)), Is64Bit(Is64Bit) {} int getDwarfEHStackPointer(CodeGen::CodeGenModule &M) const override { return 1; // r1 is the dedicated stack pointer } bool initDwarfEHRegSizeTable(CodeGen::CodeGenFunction &CGF, llvm::Value *Address) const override; }; } // namespace // Return true if the ABI requires Ty to be passed sign- or zero- // extended to 32/64 bits. bool AIXABIInfo::isPromotableTypeForABI(QualType Ty) const { // Treat an enum type as its underlying type. if (const EnumType *EnumTy = Ty->getAs()) Ty = EnumTy->getDecl()->getIntegerType(); // Promotable integer types are required to be promoted by the ABI. if (Ty->isPromotableIntegerType()) return true; if (!Is64Bit) return false; // For 64 bit mode, in addition to the usual promotable integer types, we also // need to extend all 32-bit types, since the ABI requires promotion to 64 // bits. if (const BuiltinType *BT = Ty->getAs()) switch (BT->getKind()) { case BuiltinType::Int: case BuiltinType::UInt: return true; default: break; } return false; } ABIArgInfo AIXABIInfo::classifyReturnType(QualType RetTy) const { if (RetTy->isAnyComplexType()) llvm::report_fatal_error("complex type is not supported on AIX yet"); if (RetTy->isVectorType()) llvm::report_fatal_error("vector type is not supported on AIX yet"); if (RetTy->isVoidType()) return ABIArgInfo::getIgnore(); // TODO: Evaluate if AIX power alignment rule would have an impact on the // alignment here. if (isAggregateTypeForABI(RetTy)) return getNaturalAlignIndirect(RetTy); return (isPromotableTypeForABI(RetTy) ? ABIArgInfo::getExtend(RetTy) : ABIArgInfo::getDirect()); } ABIArgInfo AIXABIInfo::classifyArgumentType(QualType Ty) const { Ty = useFirstFieldIfTransparentUnion(Ty); if (Ty->isAnyComplexType()) llvm::report_fatal_error("complex type is not supported on AIX yet"); if (Ty->isVectorType()) llvm::report_fatal_error("vector type is not supported on AIX yet"); // TODO: Evaluate if AIX power alignment rule would have an impact on the // alignment here. if (isAggregateTypeForABI(Ty)) { // Records with non-trivial destructors/copy-constructors should not be // passed by value. if (CGCXXABI::RecordArgABI RAA = getRecordArgABI(Ty, getCXXABI())) return getNaturalAlignIndirect(Ty, RAA == CGCXXABI::RAA_DirectInMemory); CharUnits CCAlign = getParamTypeAlignment(Ty); CharUnits TyAlign = getContext().getTypeAlignInChars(Ty); return ABIArgInfo::getIndirect(CCAlign, /*ByVal*/ true, /*Realign*/ TyAlign > CCAlign); } return (isPromotableTypeForABI(Ty) ? ABIArgInfo::getExtend(Ty) : ABIArgInfo::getDirect()); } CharUnits AIXABIInfo::getParamTypeAlignment(QualType Ty) const { if (Ty->isAnyComplexType()) llvm::report_fatal_error("complex type is not supported on AIX yet"); if (Ty->isVectorType()) llvm::report_fatal_error("vector type is not supported on AIX yet"); // If the structure contains a vector type, the alignment is 16. if (isRecordWithSIMDVectorType(getContext(), Ty)) return CharUnits::fromQuantity(16); return CharUnits::fromQuantity(PtrByteSize); } Address AIXABIInfo::EmitVAArg(CodeGenFunction &CGF, Address VAListAddr, QualType Ty) const { if (Ty->isAnyComplexType()) llvm::report_fatal_error("complex type is not supported on AIX yet"); if (Ty->isVectorType()) llvm::report_fatal_error("vector type is not supported on AIX yet"); auto TypeInfo = getContext().getTypeInfoInChars(Ty); TypeInfo.second = getParamTypeAlignment(Ty); CharUnits SlotSize = CharUnits::fromQuantity(PtrByteSize); return emitVoidPtrVAArg(CGF, VAListAddr, Ty, /*Indirect*/ false, TypeInfo, SlotSize, /*AllowHigher*/ true); } bool AIXTargetCodeGenInfo::initDwarfEHRegSizeTable( CodeGen::CodeGenFunction &CGF, llvm::Value *Address) const { return PPC_initDwarfEHRegSizeTable(CGF, Address, Is64Bit, /*IsAIX*/ true); } // PowerPC-32 namespace { /// PPC32_SVR4_ABIInfo - The 32-bit PowerPC ELF (SVR4) ABI information. class PPC32_SVR4_ABIInfo : public DefaultABIInfo { bool IsSoftFloatABI; bool IsRetSmallStructInRegABI; CharUnits getParamTypeAlignment(QualType Ty) const; public: PPC32_SVR4_ABIInfo(CodeGen::CodeGenTypes &CGT, bool SoftFloatABI, bool RetSmallStructInRegABI) : DefaultABIInfo(CGT), IsSoftFloatABI(SoftFloatABI), IsRetSmallStructInRegABI(RetSmallStructInRegABI) {} ABIArgInfo classifyReturnType(QualType RetTy) const; void computeInfo(CGFunctionInfo &FI) const override { if (!getCXXABI().classifyReturnType(FI)) FI.getReturnInfo() = classifyReturnType(FI.getReturnType()); for (auto &I : FI.arguments()) I.info = classifyArgumentType(I.type); } Address EmitVAArg(CodeGenFunction &CGF, Address VAListAddr, QualType Ty) const override; }; class PPC32TargetCodeGenInfo : public TargetCodeGenInfo { public: PPC32TargetCodeGenInfo(CodeGenTypes &CGT, bool SoftFloatABI, bool RetSmallStructInRegABI) : TargetCodeGenInfo(std::make_unique( CGT, SoftFloatABI, RetSmallStructInRegABI)) {} static bool isStructReturnInRegABI(const llvm::Triple &Triple, const CodeGenOptions &Opts); int getDwarfEHStackPointer(CodeGen::CodeGenModule &M) const override { // This is recovered from gcc output. return 1; // r1 is the dedicated stack pointer } bool initDwarfEHRegSizeTable(CodeGen::CodeGenFunction &CGF, llvm::Value *Address) const override; }; } CharUnits PPC32_SVR4_ABIInfo::getParamTypeAlignment(QualType Ty) const { // Complex types are passed just like their elements. if (const ComplexType *CTy = Ty->getAs()) Ty = CTy->getElementType(); if (Ty->isVectorType()) return CharUnits::fromQuantity(getContext().getTypeSize(Ty) == 128 ? 16 : 4); // For single-element float/vector structs, we consider the whole type // to have the same alignment requirements as its single element. const Type *AlignTy = nullptr; if (const Type *EltType = isSingleElementStruct(Ty, getContext())) { const BuiltinType *BT = EltType->getAs(); if ((EltType->isVectorType() && getContext().getTypeSize(EltType) == 128) || (BT && BT->isFloatingPoint())) AlignTy = EltType; } if (AlignTy) return CharUnits::fromQuantity(AlignTy->isVectorType() ? 16 : 4); return CharUnits::fromQuantity(4); } ABIArgInfo PPC32_SVR4_ABIInfo::classifyReturnType(QualType RetTy) const { uint64_t Size; // -msvr4-struct-return puts small aggregates in GPR3 and GPR4. if (isAggregateTypeForABI(RetTy) && IsRetSmallStructInRegABI && (Size = getContext().getTypeSize(RetTy)) <= 64) { // System V ABI (1995), page 3-22, specified: // > A structure or union whose size is less than or equal to 8 bytes // > shall be returned in r3 and r4, as if it were first stored in the // > 8-byte aligned memory area and then the low addressed word were // > loaded into r3 and the high-addressed word into r4. Bits beyond // > the last member of the structure or union are not defined. // // GCC for big-endian PPC32 inserts the pad before the first member, // not "beyond the last member" of the struct. To stay compatible // with GCC, we coerce the struct to an integer of the same size. // LLVM will extend it and return i32 in r3, or i64 in r3:r4. if (Size == 0) return ABIArgInfo::getIgnore(); else { llvm::Type *CoerceTy = llvm::Type::getIntNTy(getVMContext(), Size); return ABIArgInfo::getDirect(CoerceTy); } } return DefaultABIInfo::classifyReturnType(RetTy); } // TODO: this implementation is now likely redundant with // DefaultABIInfo::EmitVAArg. Address PPC32_SVR4_ABIInfo::EmitVAArg(CodeGenFunction &CGF, Address VAList, QualType Ty) const { if (getTarget().getTriple().isOSDarwin()) { auto TI = getContext().getTypeInfoInChars(Ty); TI.second = getParamTypeAlignment(Ty); CharUnits SlotSize = CharUnits::fromQuantity(4); return emitVoidPtrVAArg(CGF, VAList, Ty, classifyArgumentType(Ty).isIndirect(), TI, SlotSize, /*AllowHigherAlign=*/true); } const unsigned OverflowLimit = 8; if (const ComplexType *CTy = Ty->getAs()) { // TODO: Implement this. For now ignore. (void)CTy; return Address::invalid(); // FIXME? } // struct __va_list_tag { // unsigned char gpr; // unsigned char fpr; // unsigned short reserved; // void *overflow_arg_area; // void *reg_save_area; // }; bool isI64 = Ty->isIntegerType() && getContext().getTypeSize(Ty) == 64; bool isInt = Ty->isIntegerType() || Ty->isPointerType() || Ty->isAggregateType(); bool isF64 = Ty->isFloatingType() && getContext().getTypeSize(Ty) == 64; // All aggregates are passed indirectly? That doesn't seem consistent // with the argument-lowering code. bool isIndirect = Ty->isAggregateType(); CGBuilderTy &Builder = CGF.Builder; // The calling convention either uses 1-2 GPRs or 1 FPR. Address NumRegsAddr = Address::invalid(); if (isInt || IsSoftFloatABI) { NumRegsAddr = Builder.CreateStructGEP(VAList, 0, "gpr"); } else { NumRegsAddr = Builder.CreateStructGEP(VAList, 1, "fpr"); } llvm::Value *NumRegs = Builder.CreateLoad(NumRegsAddr, "numUsedRegs"); // "Align" the register count when TY is i64. if (isI64 || (isF64 && IsSoftFloatABI)) { NumRegs = Builder.CreateAdd(NumRegs, Builder.getInt8(1)); NumRegs = Builder.CreateAnd(NumRegs, Builder.getInt8((uint8_t) ~1U)); } llvm::Value *CC = Builder.CreateICmpULT(NumRegs, Builder.getInt8(OverflowLimit), "cond"); llvm::BasicBlock *UsingRegs = CGF.createBasicBlock("using_regs"); llvm::BasicBlock *UsingOverflow = CGF.createBasicBlock("using_overflow"); llvm::BasicBlock *Cont = CGF.createBasicBlock("cont"); Builder.CreateCondBr(CC, UsingRegs, UsingOverflow); llvm::Type *DirectTy = CGF.ConvertType(Ty); if (isIndirect) DirectTy = DirectTy->getPointerTo(0); // Case 1: consume registers. Address RegAddr = Address::invalid(); { CGF.EmitBlock(UsingRegs); Address RegSaveAreaPtr = Builder.CreateStructGEP(VAList, 4); RegAddr = Address(Builder.CreateLoad(RegSaveAreaPtr), CharUnits::fromQuantity(8)); assert(RegAddr.getElementType() == CGF.Int8Ty); // Floating-point registers start after the general-purpose registers. if (!(isInt || IsSoftFloatABI)) { RegAddr = Builder.CreateConstInBoundsByteGEP(RegAddr, CharUnits::fromQuantity(32)); } // Get the address of the saved value by scaling the number of // registers we've used by the number of CharUnits RegSize = CharUnits::fromQuantity((isInt || IsSoftFloatABI) ? 4 : 8); llvm::Value *RegOffset = Builder.CreateMul(NumRegs, Builder.getInt8(RegSize.getQuantity())); RegAddr = Address(Builder.CreateInBoundsGEP(CGF.Int8Ty, RegAddr.getPointer(), RegOffset), RegAddr.getAlignment().alignmentOfArrayElement(RegSize)); RegAddr = Builder.CreateElementBitCast(RegAddr, DirectTy); // Increase the used-register count. NumRegs = Builder.CreateAdd(NumRegs, Builder.getInt8((isI64 || (isF64 && IsSoftFloatABI)) ? 2 : 1)); Builder.CreateStore(NumRegs, NumRegsAddr); CGF.EmitBranch(Cont); } // Case 2: consume space in the overflow area. Address MemAddr = Address::invalid(); { CGF.EmitBlock(UsingOverflow); Builder.CreateStore(Builder.getInt8(OverflowLimit), NumRegsAddr); // Everything in the overflow area is rounded up to a size of at least 4. CharUnits OverflowAreaAlign = CharUnits::fromQuantity(4); CharUnits Size; if (!isIndirect) { auto TypeInfo = CGF.getContext().getTypeInfoInChars(Ty); Size = TypeInfo.first.alignTo(OverflowAreaAlign); } else { Size = CGF.getPointerSize(); } Address OverflowAreaAddr = Builder.CreateStructGEP(VAList, 3); Address OverflowArea(Builder.CreateLoad(OverflowAreaAddr, "argp.cur"), OverflowAreaAlign); // Round up address of argument to alignment CharUnits Align = CGF.getContext().getTypeAlignInChars(Ty); if (Align > OverflowAreaAlign) { llvm::Value *Ptr = OverflowArea.getPointer(); OverflowArea = Address(emitRoundPointerUpToAlignment(CGF, Ptr, Align), Align); } MemAddr = Builder.CreateElementBitCast(OverflowArea, DirectTy); // Increase the overflow area. OverflowArea = Builder.CreateConstInBoundsByteGEP(OverflowArea, Size); Builder.CreateStore(OverflowArea.getPointer(), OverflowAreaAddr); CGF.EmitBranch(Cont); } CGF.EmitBlock(Cont); // Merge the cases with a phi. Address Result = emitMergePHI(CGF, RegAddr, UsingRegs, MemAddr, UsingOverflow, "vaarg.addr"); // Load the pointer if the argument was passed indirectly. if (isIndirect) { Result = Address(Builder.CreateLoad(Result, "aggr"), getContext().getTypeAlignInChars(Ty)); } return Result; } bool PPC32TargetCodeGenInfo::isStructReturnInRegABI( const llvm::Triple &Triple, const CodeGenOptions &Opts) { assert(Triple.getArch() == llvm::Triple::ppc); switch (Opts.getStructReturnConvention()) { case CodeGenOptions::SRCK_Default: break; case CodeGenOptions::SRCK_OnStack: // -maix-struct-return return false; case CodeGenOptions::SRCK_InRegs: // -msvr4-struct-return return true; } if (Triple.isOSBinFormatELF() && !Triple.isOSLinux()) return true; return false; } bool PPC32TargetCodeGenInfo::initDwarfEHRegSizeTable(CodeGen::CodeGenFunction &CGF, llvm::Value *Address) const { return PPC_initDwarfEHRegSizeTable(CGF, Address, /*Is64Bit*/ false, /*IsAIX*/ false); } // PowerPC-64 namespace { /// PPC64_SVR4_ABIInfo - The 64-bit PowerPC ELF (SVR4) ABI information. class PPC64_SVR4_ABIInfo : public SwiftABIInfo { public: enum ABIKind { ELFv1 = 0, ELFv2 }; private: static const unsigned GPRBits = 64; ABIKind Kind; bool HasQPX; bool IsSoftFloatABI; // A vector of float or double will be promoted to <4 x f32> or <4 x f64> and // will be passed in a QPX register. bool IsQPXVectorTy(const Type *Ty) const { if (!HasQPX) return false; if (const VectorType *VT = Ty->getAs()) { unsigned NumElements = VT->getNumElements(); if (NumElements == 1) return false; if (VT->getElementType()->isSpecificBuiltinType(BuiltinType::Double)) { if (getContext().getTypeSize(Ty) <= 256) return true; } else if (VT->getElementType()-> isSpecificBuiltinType(BuiltinType::Float)) { if (getContext().getTypeSize(Ty) <= 128) return true; } } return false; } bool IsQPXVectorTy(QualType Ty) const { return IsQPXVectorTy(Ty.getTypePtr()); } public: PPC64_SVR4_ABIInfo(CodeGen::CodeGenTypes &CGT, ABIKind Kind, bool HasQPX, bool SoftFloatABI) : SwiftABIInfo(CGT), Kind(Kind), HasQPX(HasQPX), IsSoftFloatABI(SoftFloatABI) {} bool isPromotableTypeForABI(QualType Ty) const; CharUnits getParamTypeAlignment(QualType Ty) const; ABIArgInfo classifyReturnType(QualType RetTy) const; ABIArgInfo classifyArgumentType(QualType Ty) const; bool isHomogeneousAggregateBaseType(QualType Ty) const override; bool isHomogeneousAggregateSmallEnough(const Type *Ty, uint64_t Members) const override; // TODO: We can add more logic to computeInfo to improve performance. // Example: For aggregate arguments that fit in a register, we could // use getDirectInReg (as is done below for structs containing a single // floating-point value) to avoid pushing them to memory on function // entry. This would require changing the logic in PPCISelLowering // when lowering the parameters in the caller and args in the callee. void computeInfo(CGFunctionInfo &FI) const override { if (!getCXXABI().classifyReturnType(FI)) FI.getReturnInfo() = classifyReturnType(FI.getReturnType()); for (auto &I : FI.arguments()) { // We rely on the default argument classification for the most part. // One exception: An aggregate containing a single floating-point // or vector item must be passed in a register if one is available. const Type *T = isSingleElementStruct(I.type, getContext()); if (T) { const BuiltinType *BT = T->getAs(); if (IsQPXVectorTy(T) || (T->isVectorType() && getContext().getTypeSize(T) == 128) || (BT && BT->isFloatingPoint())) { QualType QT(T, 0); I.info = ABIArgInfo::getDirectInReg(CGT.ConvertType(QT)); continue; } } I.info = classifyArgumentType(I.type); } } Address EmitVAArg(CodeGenFunction &CGF, Address VAListAddr, QualType Ty) const override; bool shouldPassIndirectlyForSwift(ArrayRef scalars, bool asReturnValue) const override { return occupiesMoreThan(CGT, scalars, /*total*/ 4); } bool isSwiftErrorInRegister() const override { return false; } }; class PPC64_SVR4_TargetCodeGenInfo : public TargetCodeGenInfo { public: PPC64_SVR4_TargetCodeGenInfo(CodeGenTypes &CGT, PPC64_SVR4_ABIInfo::ABIKind Kind, bool HasQPX, bool SoftFloatABI) : TargetCodeGenInfo(std::make_unique( CGT, Kind, HasQPX, SoftFloatABI)) {} int getDwarfEHStackPointer(CodeGen::CodeGenModule &M) const override { // This is recovered from gcc output. return 1; // r1 is the dedicated stack pointer } bool initDwarfEHRegSizeTable(CodeGen::CodeGenFunction &CGF, llvm::Value *Address) const override; }; class PPC64TargetCodeGenInfo : public DefaultTargetCodeGenInfo { public: PPC64TargetCodeGenInfo(CodeGenTypes &CGT) : DefaultTargetCodeGenInfo(CGT) {} int getDwarfEHStackPointer(CodeGen::CodeGenModule &M) const override { // This is recovered from gcc output. return 1; // r1 is the dedicated stack pointer } bool initDwarfEHRegSizeTable(CodeGen::CodeGenFunction &CGF, llvm::Value *Address) const override; }; } // Return true if the ABI requires Ty to be passed sign- or zero- // extended to 64 bits. bool PPC64_SVR4_ABIInfo::isPromotableTypeForABI(QualType Ty) const { // Treat an enum type as its underlying type. if (const EnumType *EnumTy = Ty->getAs()) Ty = EnumTy->getDecl()->getIntegerType(); // Promotable integer types are required to be promoted by the ABI. if (isPromotableIntegerTypeForABI(Ty)) return true; // In addition to the usual promotable integer types, we also need to // extend all 32-bit types, since the ABI requires promotion to 64 bits. if (const BuiltinType *BT = Ty->getAs()) switch (BT->getKind()) { case BuiltinType::Int: case BuiltinType::UInt: return true; default: break; } if (const auto *EIT = Ty->getAs()) if (EIT->getNumBits() < 64) return true; return false; } /// isAlignedParamType - Determine whether a type requires 16-byte or /// higher alignment in the parameter area. Always returns at least 8. CharUnits PPC64_SVR4_ABIInfo::getParamTypeAlignment(QualType Ty) const { // Complex types are passed just like their elements. if (const ComplexType *CTy = Ty->getAs()) Ty = CTy->getElementType(); // Only vector types of size 16 bytes need alignment (larger types are // passed via reference, smaller types are not aligned). if (IsQPXVectorTy(Ty)) { if (getContext().getTypeSize(Ty) > 128) return CharUnits::fromQuantity(32); return CharUnits::fromQuantity(16); } else if (Ty->isVectorType()) { return CharUnits::fromQuantity(getContext().getTypeSize(Ty) == 128 ? 16 : 8); } // For single-element float/vector structs, we consider the whole type // to have the same alignment requirements as its single element. const Type *AlignAsType = nullptr; const Type *EltType = isSingleElementStruct(Ty, getContext()); if (EltType) { const BuiltinType *BT = EltType->getAs(); if (IsQPXVectorTy(EltType) || (EltType->isVectorType() && getContext().getTypeSize(EltType) == 128) || (BT && BT->isFloatingPoint())) AlignAsType = EltType; } // Likewise for ELFv2 homogeneous aggregates. const Type *Base = nullptr; uint64_t Members = 0; if (!AlignAsType && Kind == ELFv2 && isAggregateTypeForABI(Ty) && isHomogeneousAggregate(Ty, Base, Members)) AlignAsType = Base; // With special case aggregates, only vector base types need alignment. if (AlignAsType && IsQPXVectorTy(AlignAsType)) { if (getContext().getTypeSize(AlignAsType) > 128) return CharUnits::fromQuantity(32); return CharUnits::fromQuantity(16); } else if (AlignAsType) { return CharUnits::fromQuantity(AlignAsType->isVectorType() ? 16 : 8); } // Otherwise, we only need alignment for any aggregate type that // has an alignment requirement of >= 16 bytes. if (isAggregateTypeForABI(Ty) && getContext().getTypeAlign(Ty) >= 128) { if (HasQPX && getContext().getTypeAlign(Ty) >= 256) return CharUnits::fromQuantity(32); return CharUnits::fromQuantity(16); } return CharUnits::fromQuantity(8); } /// isHomogeneousAggregate - Return true if a type is an ELFv2 homogeneous /// aggregate. Base is set to the base element type, and Members is set /// to the number of base elements. bool ABIInfo::isHomogeneousAggregate(QualType Ty, const Type *&Base, uint64_t &Members) const { if (const ConstantArrayType *AT = getContext().getAsConstantArrayType(Ty)) { uint64_t NElements = AT->getSize().getZExtValue(); if (NElements == 0) return false; if (!isHomogeneousAggregate(AT->getElementType(), Base, Members)) return false; Members *= NElements; } else if (const RecordType *RT = Ty->getAs()) { const RecordDecl *RD = RT->getDecl(); if (RD->hasFlexibleArrayMember()) return false; Members = 0; // If this is a C++ record, check the bases first. if (const CXXRecordDecl *CXXRD = dyn_cast(RD)) { for (const auto &I : CXXRD->bases()) { // Ignore empty records. if (isEmptyRecord(getContext(), I.getType(), true)) continue; uint64_t FldMembers; if (!isHomogeneousAggregate(I.getType(), Base, FldMembers)) return false; Members += FldMembers; } } for (const auto *FD : RD->fields()) { // Ignore (non-zero arrays of) empty records. QualType FT = FD->getType(); while (const ConstantArrayType *AT = getContext().getAsConstantArrayType(FT)) { if (AT->getSize().getZExtValue() == 0) return false; FT = AT->getElementType(); } if (isEmptyRecord(getContext(), FT, true)) continue; // For compatibility with GCC, ignore empty bitfields in C++ mode. if (getContext().getLangOpts().CPlusPlus && FD->isZeroLengthBitField(getContext())) continue; uint64_t FldMembers; if (!isHomogeneousAggregate(FD->getType(), Base, FldMembers)) return false; Members = (RD->isUnion() ? std::max(Members, FldMembers) : Members + FldMembers); } if (!Base) return false; // Ensure there is no padding. if (getContext().getTypeSize(Base) * Members != getContext().getTypeSize(Ty)) return false; } else { Members = 1; if (const ComplexType *CT = Ty->getAs()) { Members = 2; Ty = CT->getElementType(); } // Most ABIs only support float, double, and some vector type widths. if (!isHomogeneousAggregateBaseType(Ty)) return false; // The base type must be the same for all members. Types that // agree in both total size and mode (float vs. vector) are // treated as being equivalent here. const Type *TyPtr = Ty.getTypePtr(); if (!Base) { Base = TyPtr; // If it's a non-power-of-2 vector, its size is already a power-of-2, // so make sure to widen it explicitly. if (const VectorType *VT = Base->getAs()) { QualType EltTy = VT->getElementType(); unsigned NumElements = getContext().getTypeSize(VT) / getContext().getTypeSize(EltTy); Base = getContext() .getVectorType(EltTy, NumElements, VT->getVectorKind()) .getTypePtr(); } } if (Base->isVectorType() != TyPtr->isVectorType() || getContext().getTypeSize(Base) != getContext().getTypeSize(TyPtr)) return false; } return Members > 0 && isHomogeneousAggregateSmallEnough(Base, Members); } bool PPC64_SVR4_ABIInfo::isHomogeneousAggregateBaseType(QualType Ty) const { // Homogeneous aggregates for ELFv2 must have base types of float, // double, long double, or 128-bit vectors. if (const BuiltinType *BT = Ty->getAs()) { if (BT->getKind() == BuiltinType::Float || BT->getKind() == BuiltinType::Double || BT->getKind() == BuiltinType::LongDouble || (getContext().getTargetInfo().hasFloat128Type() && (BT->getKind() == BuiltinType::Float128))) { if (IsSoftFloatABI) return false; return true; } } if (const VectorType *VT = Ty->getAs()) { if (getContext().getTypeSize(VT) == 128 || IsQPXVectorTy(Ty)) return true; } return false; } bool PPC64_SVR4_ABIInfo::isHomogeneousAggregateSmallEnough( const Type *Base, uint64_t Members) const { // Vector and fp128 types require one register, other floating point types // require one or two registers depending on their size. uint32_t NumRegs = ((getContext().getTargetInfo().hasFloat128Type() && Base->isFloat128Type()) || Base->isVectorType()) ? 1 : (getContext().getTypeSize(Base) + 63) / 64; // Homogeneous Aggregates may occupy at most 8 registers. return Members * NumRegs <= 8; } ABIArgInfo PPC64_SVR4_ABIInfo::classifyArgumentType(QualType Ty) const { Ty = useFirstFieldIfTransparentUnion(Ty); if (Ty->isAnyComplexType()) return ABIArgInfo::getDirect(); // Non-Altivec vector types are passed in GPRs (smaller than 16 bytes) // or via reference (larger than 16 bytes). if (Ty->isVectorType() && !IsQPXVectorTy(Ty)) { uint64_t Size = getContext().getTypeSize(Ty); if (Size > 128) return getNaturalAlignIndirect(Ty, /*ByVal=*/false); else if (Size < 128) { llvm::Type *CoerceTy = llvm::IntegerType::get(getVMContext(), Size); return ABIArgInfo::getDirect(CoerceTy); } } if (const auto *EIT = Ty->getAs()) if (EIT->getNumBits() > 128) return getNaturalAlignIndirect(Ty, /*ByVal=*/true); if (isAggregateTypeForABI(Ty)) { if (CGCXXABI::RecordArgABI RAA = getRecordArgABI(Ty, getCXXABI())) return getNaturalAlignIndirect(Ty, RAA == CGCXXABI::RAA_DirectInMemory); uint64_t ABIAlign = getParamTypeAlignment(Ty).getQuantity(); uint64_t TyAlign = getContext().getTypeAlignInChars(Ty).getQuantity(); // ELFv2 homogeneous aggregates are passed as array types. const Type *Base = nullptr; uint64_t Members = 0; if (Kind == ELFv2 && isHomogeneousAggregate(Ty, Base, Members)) { llvm::Type *BaseTy = CGT.ConvertType(QualType(Base, 0)); llvm::Type *CoerceTy = llvm::ArrayType::get(BaseTy, Members); return ABIArgInfo::getDirect(CoerceTy); } // If an aggregate may end up fully in registers, we do not // use the ByVal method, but pass the aggregate as array. // This is usually beneficial since we avoid forcing the // back-end to store the argument to memory. uint64_t Bits = getContext().getTypeSize(Ty); if (Bits > 0 && Bits <= 8 * GPRBits) { llvm::Type *CoerceTy; // Types up to 8 bytes are passed as integer type (which will be // properly aligned in the argument save area doubleword). if (Bits <= GPRBits) CoerceTy = llvm::IntegerType::get(getVMContext(), llvm::alignTo(Bits, 8)); // Larger types are passed as arrays, with the base type selected // according to the required alignment in the save area. else { uint64_t RegBits = ABIAlign * 8; uint64_t NumRegs = llvm::alignTo(Bits, RegBits) / RegBits; llvm::Type *RegTy = llvm::IntegerType::get(getVMContext(), RegBits); CoerceTy = llvm::ArrayType::get(RegTy, NumRegs); } return ABIArgInfo::getDirect(CoerceTy); } // All other aggregates are passed ByVal. return ABIArgInfo::getIndirect(CharUnits::fromQuantity(ABIAlign), /*ByVal=*/true, /*Realign=*/TyAlign > ABIAlign); } return (isPromotableTypeForABI(Ty) ? ABIArgInfo::getExtend(Ty) : ABIArgInfo::getDirect()); } ABIArgInfo PPC64_SVR4_ABIInfo::classifyReturnType(QualType RetTy) const { if (RetTy->isVoidType()) return ABIArgInfo::getIgnore(); if (RetTy->isAnyComplexType()) return ABIArgInfo::getDirect(); // Non-Altivec vector types are returned in GPRs (smaller than 16 bytes) // or via reference (larger than 16 bytes). if (RetTy->isVectorType() && !IsQPXVectorTy(RetTy)) { uint64_t Size = getContext().getTypeSize(RetTy); if (Size > 128) return getNaturalAlignIndirect(RetTy); else if (Size < 128) { llvm::Type *CoerceTy = llvm::IntegerType::get(getVMContext(), Size); return ABIArgInfo::getDirect(CoerceTy); } } if (const auto *EIT = RetTy->getAs()) if (EIT->getNumBits() > 128) return getNaturalAlignIndirect(RetTy, /*ByVal=*/false); if (isAggregateTypeForABI(RetTy)) { // ELFv2 homogeneous aggregates are returned as array types. const Type *Base = nullptr; uint64_t Members = 0; if (Kind == ELFv2 && isHomogeneousAggregate(RetTy, Base, Members)) { llvm::Type *BaseTy = CGT.ConvertType(QualType(Base, 0)); llvm::Type *CoerceTy = llvm::ArrayType::get(BaseTy, Members); return ABIArgInfo::getDirect(CoerceTy); } // ELFv2 small aggregates are returned in up to two registers. uint64_t Bits = getContext().getTypeSize(RetTy); if (Kind == ELFv2 && Bits <= 2 * GPRBits) { if (Bits == 0) return ABIArgInfo::getIgnore(); llvm::Type *CoerceTy; if (Bits > GPRBits) { CoerceTy = llvm::IntegerType::get(getVMContext(), GPRBits); CoerceTy = llvm::StructType::get(CoerceTy, CoerceTy); } else CoerceTy = llvm::IntegerType::get(getVMContext(), llvm::alignTo(Bits, 8)); return ABIArgInfo::getDirect(CoerceTy); } // All other aggregates are returned indirectly. return getNaturalAlignIndirect(RetTy); } return (isPromotableTypeForABI(RetTy) ? ABIArgInfo::getExtend(RetTy) : ABIArgInfo::getDirect()); } // Based on ARMABIInfo::EmitVAArg, adjusted for 64-bit machine. Address PPC64_SVR4_ABIInfo::EmitVAArg(CodeGenFunction &CGF, Address VAListAddr, QualType Ty) const { auto TypeInfo = getContext().getTypeInfoInChars(Ty); TypeInfo.second = getParamTypeAlignment(Ty); CharUnits SlotSize = CharUnits::fromQuantity(8); // If we have a complex type and the base type is smaller than 8 bytes, // the ABI calls for the real and imaginary parts to be right-adjusted // in separate doublewords. However, Clang expects us to produce a // pointer to a structure with the two parts packed tightly. So generate // loads of the real and imaginary parts relative to the va_list pointer, // and store them to a temporary structure. if (const ComplexType *CTy = Ty->getAs()) { CharUnits EltSize = TypeInfo.first / 2; if (EltSize < SlotSize) { Address Addr = emitVoidPtrDirectVAArg(CGF, VAListAddr, CGF.Int8Ty, SlotSize * 2, SlotSize, SlotSize, /*AllowHigher*/ true); Address RealAddr = Addr; Address ImagAddr = RealAddr; if (CGF.CGM.getDataLayout().isBigEndian()) { RealAddr = CGF.Builder.CreateConstInBoundsByteGEP(RealAddr, SlotSize - EltSize); ImagAddr = CGF.Builder.CreateConstInBoundsByteGEP(ImagAddr, 2 * SlotSize - EltSize); } else { ImagAddr = CGF.Builder.CreateConstInBoundsByteGEP(RealAddr, SlotSize); } llvm::Type *EltTy = CGF.ConvertTypeForMem(CTy->getElementType()); RealAddr = CGF.Builder.CreateElementBitCast(RealAddr, EltTy); ImagAddr = CGF.Builder.CreateElementBitCast(ImagAddr, EltTy); llvm::Value *Real = CGF.Builder.CreateLoad(RealAddr, ".vareal"); llvm::Value *Imag = CGF.Builder.CreateLoad(ImagAddr, ".vaimag"); Address Temp = CGF.CreateMemTemp(Ty, "vacplx"); CGF.EmitStoreOfComplex({Real, Imag}, CGF.MakeAddrLValue(Temp, Ty), /*init*/ true); return Temp; } } // Otherwise, just use the general rule. return emitVoidPtrVAArg(CGF, VAListAddr, Ty, /*Indirect*/ false, TypeInfo, SlotSize, /*AllowHigher*/ true); } bool PPC64_SVR4_TargetCodeGenInfo::initDwarfEHRegSizeTable( CodeGen::CodeGenFunction &CGF, llvm::Value *Address) const { return PPC_initDwarfEHRegSizeTable(CGF, Address, /*Is64Bit*/ true, /*IsAIX*/ false); } bool PPC64TargetCodeGenInfo::initDwarfEHRegSizeTable(CodeGen::CodeGenFunction &CGF, llvm::Value *Address) const { return PPC_initDwarfEHRegSizeTable(CGF, Address, /*Is64Bit*/ true, /*IsAIX*/ false); } //===----------------------------------------------------------------------===// // AArch64 ABI Implementation //===----------------------------------------------------------------------===// namespace { class AArch64ABIInfo : public SwiftABIInfo { public: enum ABIKind { AAPCS = 0, DarwinPCS, Win64 }; private: ABIKind Kind; public: AArch64ABIInfo(CodeGenTypes &CGT, ABIKind Kind) : SwiftABIInfo(CGT), Kind(Kind) {} private: ABIKind getABIKind() const { return Kind; } bool isDarwinPCS() const { return Kind == DarwinPCS; } ABIArgInfo classifyReturnType(QualType RetTy, bool IsVariadic) const; ABIArgInfo classifyArgumentType(QualType RetTy) const; ABIArgInfo coerceIllegalVector(QualType Ty) const; bool isHomogeneousAggregateBaseType(QualType Ty) const override; bool isHomogeneousAggregateSmallEnough(const Type *Ty, uint64_t Members) const override; bool isIllegalVectorType(QualType Ty) const; void computeInfo(CGFunctionInfo &FI) const override { if (!::classifyReturnType(getCXXABI(), FI, *this)) FI.getReturnInfo() = classifyReturnType(FI.getReturnType(), FI.isVariadic()); for (auto &it : FI.arguments()) it.info = classifyArgumentType(it.type); } Address EmitDarwinVAArg(Address VAListAddr, QualType Ty, CodeGenFunction &CGF) const; Address EmitAAPCSVAArg(Address VAListAddr, QualType Ty, CodeGenFunction &CGF) const; Address EmitVAArg(CodeGenFunction &CGF, Address VAListAddr, QualType Ty) const override { return Kind == Win64 ? EmitMSVAArg(CGF, VAListAddr, Ty) : isDarwinPCS() ? EmitDarwinVAArg(VAListAddr, Ty, CGF) : EmitAAPCSVAArg(VAListAddr, Ty, CGF); } Address EmitMSVAArg(CodeGenFunction &CGF, Address VAListAddr, QualType Ty) const override; bool shouldPassIndirectlyForSwift(ArrayRef scalars, bool asReturnValue) const override { return occupiesMoreThan(CGT, scalars, /*total*/ 4); } bool isSwiftErrorInRegister() const override { return true; } bool isLegalVectorTypeForSwift(CharUnits totalSize, llvm::Type *eltTy, unsigned elts) const override; bool allowBFloatArgsAndRet() const override { return getTarget().hasBFloat16Type(); } }; class AArch64TargetCodeGenInfo : public TargetCodeGenInfo { public: AArch64TargetCodeGenInfo(CodeGenTypes &CGT, AArch64ABIInfo::ABIKind Kind) : TargetCodeGenInfo(std::make_unique(CGT, Kind)) {} StringRef getARCRetainAutoreleasedReturnValueMarker() const override { return "mov\tfp, fp\t\t// marker for objc_retainAutoreleaseReturnValue"; } int getDwarfEHStackPointer(CodeGen::CodeGenModule &M) const override { return 31; } bool doesReturnSlotInterfereWithArgs() const override { return false; } void setTargetAttributes(const Decl *D, llvm::GlobalValue *GV, CodeGen::CodeGenModule &CGM) const override { const FunctionDecl *FD = dyn_cast_or_null(D); if (!FD) return; - LangOptions::SignReturnAddressScopeKind Scope = - CGM.getLangOpts().getSignReturnAddressScope(); - LangOptions::SignReturnAddressKeyKind Key = - CGM.getLangOpts().getSignReturnAddressKey(); - bool BranchTargetEnforcement = CGM.getLangOpts().BranchTargetEnforcement; - if (const auto *TA = FD->getAttr()) { - ParsedTargetAttr Attr = TA->parse(); - if (!Attr.BranchProtection.empty()) { - TargetInfo::BranchProtectionInfo BPI; - StringRef Error; - (void)CGM.getTarget().validateBranchProtection(Attr.BranchProtection, - BPI, Error); - assert(Error.empty()); - Scope = BPI.SignReturnAddr; - Key = BPI.SignKey; - BranchTargetEnforcement = BPI.BranchTargetEnforcement; - } - } + const auto *TA = FD->getAttr(); + if (TA == nullptr) + return; + + ParsedTargetAttr Attr = TA->parse(); + if (Attr.BranchProtection.empty()) + return; + + TargetInfo::BranchProtectionInfo BPI; + StringRef Error; + (void)CGM.getTarget().validateBranchProtection(Attr.BranchProtection, + BPI, Error); + assert(Error.empty()); auto *Fn = cast(GV); - if (Scope != LangOptions::SignReturnAddressScopeKind::None) { - Fn->addFnAttr("sign-return-address", - Scope == LangOptions::SignReturnAddressScopeKind::All - ? "all" - : "non-leaf"); + static const char *SignReturnAddrStr[] = {"none", "non-leaf", "all"}; + Fn->addFnAttr("sign-return-address", SignReturnAddrStr[static_cast(BPI.SignReturnAddr)]); + if (BPI.SignReturnAddr != LangOptions::SignReturnAddressScopeKind::None) { Fn->addFnAttr("sign-return-address-key", - Key == LangOptions::SignReturnAddressKeyKind::AKey + BPI.SignKey == LangOptions::SignReturnAddressKeyKind::AKey ? "a_key" : "b_key"); } - if (BranchTargetEnforcement) - Fn->addFnAttr("branch-target-enforcement"); + Fn->addFnAttr("branch-target-enforcement", + BPI.BranchTargetEnforcement ? "true" : "false"); } }; class WindowsAArch64TargetCodeGenInfo : public AArch64TargetCodeGenInfo { public: WindowsAArch64TargetCodeGenInfo(CodeGenTypes &CGT, AArch64ABIInfo::ABIKind K) : AArch64TargetCodeGenInfo(CGT, K) {} void setTargetAttributes(const Decl *D, llvm::GlobalValue *GV, CodeGen::CodeGenModule &CGM) const override; void getDependentLibraryOption(llvm::StringRef Lib, llvm::SmallString<24> &Opt) const override { Opt = "/DEFAULTLIB:" + qualifyWindowsLibrary(Lib); } void getDetectMismatchOption(llvm::StringRef Name, llvm::StringRef Value, llvm::SmallString<32> &Opt) const override { Opt = "/FAILIFMISMATCH:\"" + Name.str() + "=" + Value.str() + "\""; } }; void WindowsAArch64TargetCodeGenInfo::setTargetAttributes( const Decl *D, llvm::GlobalValue *GV, CodeGen::CodeGenModule &CGM) const { AArch64TargetCodeGenInfo::setTargetAttributes(D, GV, CGM); if (GV->isDeclaration()) return; addStackProbeTargetAttributes(D, GV, CGM); } } ABIArgInfo AArch64ABIInfo::coerceIllegalVector(QualType Ty) const { assert(Ty->isVectorType() && "expected vector type!"); const auto *VT = Ty->castAs(); if (VT->getVectorKind() == VectorType::SveFixedLengthPredicateVector) { assert(VT->getElementType()->isBuiltinType() && "expected builtin type!"); assert(VT->getElementType()->castAs()->getKind() == BuiltinType::UChar && "unexpected builtin type for SVE predicate!"); return ABIArgInfo::getDirect(llvm::ScalableVectorType::get( llvm::Type::getInt1Ty(getVMContext()), 16)); } if (VT->getVectorKind() == VectorType::SveFixedLengthDataVector) { assert(VT->getElementType()->isBuiltinType() && "expected builtin type!"); const auto *BT = VT->getElementType()->castAs(); llvm::ScalableVectorType *ResType = nullptr; switch (BT->getKind()) { default: llvm_unreachable("unexpected builtin type for SVE vector!"); case BuiltinType::SChar: case BuiltinType::UChar: ResType = llvm::ScalableVectorType::get( llvm::Type::getInt8Ty(getVMContext()), 16); break; case BuiltinType::Short: case BuiltinType::UShort: ResType = llvm::ScalableVectorType::get( llvm::Type::getInt16Ty(getVMContext()), 8); break; case BuiltinType::Int: case BuiltinType::UInt: ResType = llvm::ScalableVectorType::get( llvm::Type::getInt32Ty(getVMContext()), 4); break; case BuiltinType::Long: case BuiltinType::ULong: ResType = llvm::ScalableVectorType::get( llvm::Type::getInt64Ty(getVMContext()), 2); break; case BuiltinType::Half: ResType = llvm::ScalableVectorType::get( llvm::Type::getHalfTy(getVMContext()), 8); break; case BuiltinType::Float: ResType = llvm::ScalableVectorType::get( llvm::Type::getFloatTy(getVMContext()), 4); break; case BuiltinType::Double: ResType = llvm::ScalableVectorType::get( llvm::Type::getDoubleTy(getVMContext()), 2); break; case BuiltinType::BFloat16: ResType = llvm::ScalableVectorType::get( llvm::Type::getBFloatTy(getVMContext()), 8); break; } return ABIArgInfo::getDirect(ResType); } uint64_t Size = getContext().getTypeSize(Ty); // Android promotes <2 x i8> to i16, not i32 if (isAndroid() && (Size <= 16)) { llvm::Type *ResType = llvm::Type::getInt16Ty(getVMContext()); return ABIArgInfo::getDirect(ResType); } if (Size <= 32) { llvm::Type *ResType = llvm::Type::getInt32Ty(getVMContext()); return ABIArgInfo::getDirect(ResType); } if (Size == 64) { auto *ResType = llvm::FixedVectorType::get(llvm::Type::getInt32Ty(getVMContext()), 2); return ABIArgInfo::getDirect(ResType); } if (Size == 128) { auto *ResType = llvm::FixedVectorType::get(llvm::Type::getInt32Ty(getVMContext()), 4); return ABIArgInfo::getDirect(ResType); } return getNaturalAlignIndirect(Ty, /*ByVal=*/false); } ABIArgInfo AArch64ABIInfo::classifyArgumentType(QualType Ty) const { Ty = useFirstFieldIfTransparentUnion(Ty); // Handle illegal vector types here. if (isIllegalVectorType(Ty)) return coerceIllegalVector(Ty); if (!isAggregateTypeForABI(Ty)) { // Treat an enum type as its underlying type. if (const EnumType *EnumTy = Ty->getAs()) Ty = EnumTy->getDecl()->getIntegerType(); if (const auto *EIT = Ty->getAs()) if (EIT->getNumBits() > 128) return getNaturalAlignIndirect(Ty); return (isPromotableIntegerTypeForABI(Ty) && isDarwinPCS() ? ABIArgInfo::getExtend(Ty) : ABIArgInfo::getDirect()); } // Structures with either a non-trivial destructor or a non-trivial // copy constructor are always indirect. if (CGCXXABI::RecordArgABI RAA = getRecordArgABI(Ty, getCXXABI())) { return getNaturalAlignIndirect(Ty, /*ByVal=*/RAA == CGCXXABI::RAA_DirectInMemory); } // Empty records are always ignored on Darwin, but actually passed in C++ mode // elsewhere for GNU compatibility. uint64_t Size = getContext().getTypeSize(Ty); bool IsEmpty = isEmptyRecord(getContext(), Ty, true); if (IsEmpty || Size == 0) { if (!getContext().getLangOpts().CPlusPlus || isDarwinPCS()) return ABIArgInfo::getIgnore(); // GNU C mode. The only argument that gets ignored is an empty one with size // 0. if (IsEmpty && Size == 0) return ABIArgInfo::getIgnore(); return ABIArgInfo::getDirect(llvm::Type::getInt8Ty(getVMContext())); } // Homogeneous Floating-point Aggregates (HFAs) need to be expanded. const Type *Base = nullptr; uint64_t Members = 0; if (isHomogeneousAggregate(Ty, Base, Members)) { return ABIArgInfo::getDirect( llvm::ArrayType::get(CGT.ConvertType(QualType(Base, 0)), Members)); } // Aggregates <= 16 bytes are passed directly in registers or on the stack. if (Size <= 128) { // On RenderScript, coerce Aggregates <= 16 bytes to an integer array of // same size and alignment. if (getTarget().isRenderScriptTarget()) { return coerceToIntArray(Ty, getContext(), getVMContext()); } unsigned Alignment; if (Kind == AArch64ABIInfo::AAPCS) { Alignment = getContext().getTypeUnadjustedAlign(Ty); Alignment = Alignment < 128 ? 64 : 128; } else { Alignment = std::max(getContext().getTypeAlign(Ty), (unsigned)getTarget().getPointerWidth(0)); } Size = llvm::alignTo(Size, Alignment); // We use a pair of i64 for 16-byte aggregate with 8-byte alignment. // For aggregates with 16-byte alignment, we use i128. llvm::Type *BaseTy = llvm::Type::getIntNTy(getVMContext(), Alignment); return ABIArgInfo::getDirect( Size == Alignment ? BaseTy : llvm::ArrayType::get(BaseTy, Size / Alignment)); } return getNaturalAlignIndirect(Ty, /*ByVal=*/false); } ABIArgInfo AArch64ABIInfo::classifyReturnType(QualType RetTy, bool IsVariadic) const { if (RetTy->isVoidType()) return ABIArgInfo::getIgnore(); if (const auto *VT = RetTy->getAs()) { if (VT->getVectorKind() == VectorType::SveFixedLengthDataVector || VT->getVectorKind() == VectorType::SveFixedLengthPredicateVector) return coerceIllegalVector(RetTy); } // Large vector types should be returned via memory. if (RetTy->isVectorType() && getContext().getTypeSize(RetTy) > 128) return getNaturalAlignIndirect(RetTy); if (!isAggregateTypeForABI(RetTy)) { // Treat an enum type as its underlying type. if (const EnumType *EnumTy = RetTy->getAs()) RetTy = EnumTy->getDecl()->getIntegerType(); if (const auto *EIT = RetTy->getAs()) if (EIT->getNumBits() > 128) return getNaturalAlignIndirect(RetTy); return (isPromotableIntegerTypeForABI(RetTy) && isDarwinPCS() ? ABIArgInfo::getExtend(RetTy) : ABIArgInfo::getDirect()); } uint64_t Size = getContext().getTypeSize(RetTy); if (isEmptyRecord(getContext(), RetTy, true) || Size == 0) return ABIArgInfo::getIgnore(); const Type *Base = nullptr; uint64_t Members = 0; if (isHomogeneousAggregate(RetTy, Base, Members) && !(getTarget().getTriple().getArch() == llvm::Triple::aarch64_32 && IsVariadic)) // Homogeneous Floating-point Aggregates (HFAs) are returned directly. return ABIArgInfo::getDirect(); // Aggregates <= 16 bytes are returned directly in registers or on the stack. if (Size <= 128) { // On RenderScript, coerce Aggregates <= 16 bytes to an integer array of // same size and alignment. if (getTarget().isRenderScriptTarget()) { return coerceToIntArray(RetTy, getContext(), getVMContext()); } unsigned Alignment = getContext().getTypeAlign(RetTy); Size = llvm::alignTo(Size, 64); // round up to multiple of 8 bytes // We use a pair of i64 for 16-byte aggregate with 8-byte alignment. // For aggregates with 16-byte alignment, we use i128. if (Alignment < 128 && Size == 128) { llvm::Type *BaseTy = llvm::Type::getInt64Ty(getVMContext()); return ABIArgInfo::getDirect(llvm::ArrayType::get(BaseTy, Size / 64)); } return ABIArgInfo::getDirect(llvm::IntegerType::get(getVMContext(), Size)); } return getNaturalAlignIndirect(RetTy); } /// isIllegalVectorType - check whether the vector type is legal for AArch64. bool AArch64ABIInfo::isIllegalVectorType(QualType Ty) const { if (const VectorType *VT = Ty->getAs()) { // Check whether VT is a fixed-length SVE vector. These types are // represented as scalable vectors in function args/return and must be // coerced from fixed vectors. if (VT->getVectorKind() == VectorType::SveFixedLengthDataVector || VT->getVectorKind() == VectorType::SveFixedLengthPredicateVector) return true; // Check whether VT is legal. unsigned NumElements = VT->getNumElements(); uint64_t Size = getContext().getTypeSize(VT); // NumElements should be power of 2. if (!llvm::isPowerOf2_32(NumElements)) return true; // arm64_32 has to be compatible with the ARM logic here, which allows huge // vectors for some reason. llvm::Triple Triple = getTarget().getTriple(); if (Triple.getArch() == llvm::Triple::aarch64_32 && Triple.isOSBinFormatMachO()) return Size <= 32; return Size != 64 && (Size != 128 || NumElements == 1); } return false; } bool AArch64ABIInfo::isLegalVectorTypeForSwift(CharUnits totalSize, llvm::Type *eltTy, unsigned elts) const { if (!llvm::isPowerOf2_32(elts)) return false; if (totalSize.getQuantity() != 8 && (totalSize.getQuantity() != 16 || elts == 1)) return false; return true; } bool AArch64ABIInfo::isHomogeneousAggregateBaseType(QualType Ty) const { // Homogeneous aggregates for AAPCS64 must have base types of a floating // point type or a short-vector type. This is the same as the 32-bit ABI, // but with the difference that any floating-point type is allowed, // including __fp16. if (const BuiltinType *BT = Ty->getAs()) { if (BT->isFloatingPoint()) return true; } else if (const VectorType *VT = Ty->getAs()) { unsigned VecSize = getContext().getTypeSize(VT); if (VecSize == 64 || VecSize == 128) return true; } return false; } bool AArch64ABIInfo::isHomogeneousAggregateSmallEnough(const Type *Base, uint64_t Members) const { return Members <= 4; } Address AArch64ABIInfo::EmitAAPCSVAArg(Address VAListAddr, QualType Ty, CodeGenFunction &CGF) const { ABIArgInfo AI = classifyArgumentType(Ty); bool IsIndirect = AI.isIndirect(); llvm::Type *BaseTy = CGF.ConvertType(Ty); if (IsIndirect) BaseTy = llvm::PointerType::getUnqual(BaseTy); else if (AI.getCoerceToType()) BaseTy = AI.getCoerceToType(); unsigned NumRegs = 1; if (llvm::ArrayType *ArrTy = dyn_cast(BaseTy)) { BaseTy = ArrTy->getElementType(); NumRegs = ArrTy->getNumElements(); } bool IsFPR = BaseTy->isFloatingPointTy() || BaseTy->isVectorTy(); // The AArch64 va_list type and handling is specified in the Procedure Call // Standard, section B.4: // // struct { // void *__stack; // void *__gr_top; // void *__vr_top; // int __gr_offs; // int __vr_offs; // }; llvm::BasicBlock *MaybeRegBlock = CGF.createBasicBlock("vaarg.maybe_reg"); llvm::BasicBlock *InRegBlock = CGF.createBasicBlock("vaarg.in_reg"); llvm::BasicBlock *OnStackBlock = CGF.createBasicBlock("vaarg.on_stack"); llvm::BasicBlock *ContBlock = CGF.createBasicBlock("vaarg.end"); CharUnits TySize = getContext().getTypeSizeInChars(Ty); CharUnits TyAlign = getContext().getTypeUnadjustedAlignInChars(Ty); Address reg_offs_p = Address::invalid(); llvm::Value *reg_offs = nullptr; int reg_top_index; int RegSize = IsIndirect ? 8 : TySize.getQuantity(); if (!IsFPR) { // 3 is the field number of __gr_offs reg_offs_p = CGF.Builder.CreateStructGEP(VAListAddr, 3, "gr_offs_p"); reg_offs = CGF.Builder.CreateLoad(reg_offs_p, "gr_offs"); reg_top_index = 1; // field number for __gr_top RegSize = llvm::alignTo(RegSize, 8); } else { // 4 is the field number of __vr_offs. reg_offs_p = CGF.Builder.CreateStructGEP(VAListAddr, 4, "vr_offs_p"); reg_offs = CGF.Builder.CreateLoad(reg_offs_p, "vr_offs"); reg_top_index = 2; // field number for __vr_top RegSize = 16 * NumRegs; } //======================================= // Find out where argument was passed //======================================= // If reg_offs >= 0 we're already using the stack for this type of // argument. We don't want to keep updating reg_offs (in case it overflows, // though anyone passing 2GB of arguments, each at most 16 bytes, deserves // whatever they get). llvm::Value *UsingStack = nullptr; UsingStack = CGF.Builder.CreateICmpSGE( reg_offs, llvm::ConstantInt::get(CGF.Int32Ty, 0)); CGF.Builder.CreateCondBr(UsingStack, OnStackBlock, MaybeRegBlock); // Otherwise, at least some kind of argument could go in these registers, the // question is whether this particular type is too big. CGF.EmitBlock(MaybeRegBlock); // Integer arguments may need to correct register alignment (for example a // "struct { __int128 a; };" gets passed in x_2N, x_{2N+1}). In this case we // align __gr_offs to calculate the potential address. if (!IsFPR && !IsIndirect && TyAlign.getQuantity() > 8) { int Align = TyAlign.getQuantity(); reg_offs = CGF.Builder.CreateAdd( reg_offs, llvm::ConstantInt::get(CGF.Int32Ty, Align - 1), "align_regoffs"); reg_offs = CGF.Builder.CreateAnd( reg_offs, llvm::ConstantInt::get(CGF.Int32Ty, -Align), "aligned_regoffs"); } // Update the gr_offs/vr_offs pointer for next call to va_arg on this va_list. // The fact that this is done unconditionally reflects the fact that // allocating an argument to the stack also uses up all the remaining // registers of the appropriate kind. llvm::Value *NewOffset = nullptr; NewOffset = CGF.Builder.CreateAdd( reg_offs, llvm::ConstantInt::get(CGF.Int32Ty, RegSize), "new_reg_offs"); CGF.Builder.CreateStore(NewOffset, reg_offs_p); // Now we're in a position to decide whether this argument really was in // registers or not. llvm::Value *InRegs = nullptr; InRegs = CGF.Builder.CreateICmpSLE( NewOffset, llvm::ConstantInt::get(CGF.Int32Ty, 0), "inreg"); CGF.Builder.CreateCondBr(InRegs, InRegBlock, OnStackBlock); //======================================= // Argument was in registers //======================================= // Now we emit the code for if the argument was originally passed in // registers. First start the appropriate block: CGF.EmitBlock(InRegBlock); llvm::Value *reg_top = nullptr; Address reg_top_p = CGF.Builder.CreateStructGEP(VAListAddr, reg_top_index, "reg_top_p"); reg_top = CGF.Builder.CreateLoad(reg_top_p, "reg_top"); Address BaseAddr(CGF.Builder.CreateInBoundsGEP(reg_top, reg_offs), CharUnits::fromQuantity(IsFPR ? 16 : 8)); Address RegAddr = Address::invalid(); llvm::Type *MemTy = CGF.ConvertTypeForMem(Ty); if (IsIndirect) { // If it's been passed indirectly (actually a struct), whatever we find from // stored registers or on the stack will actually be a struct **. MemTy = llvm::PointerType::getUnqual(MemTy); } const Type *Base = nullptr; uint64_t NumMembers = 0; bool IsHFA = isHomogeneousAggregate(Ty, Base, NumMembers); if (IsHFA && NumMembers > 1) { // Homogeneous aggregates passed in registers will have their elements split // and stored 16-bytes apart regardless of size (they're notionally in qN, // qN+1, ...). We reload and store into a temporary local variable // contiguously. assert(!IsIndirect && "Homogeneous aggregates should be passed directly"); auto BaseTyInfo = getContext().getTypeInfoInChars(QualType(Base, 0)); llvm::Type *BaseTy = CGF.ConvertType(QualType(Base, 0)); llvm::Type *HFATy = llvm::ArrayType::get(BaseTy, NumMembers); Address Tmp = CGF.CreateTempAlloca(HFATy, std::max(TyAlign, BaseTyInfo.second)); // On big-endian platforms, the value will be right-aligned in its slot. int Offset = 0; if (CGF.CGM.getDataLayout().isBigEndian() && BaseTyInfo.first.getQuantity() < 16) Offset = 16 - BaseTyInfo.first.getQuantity(); for (unsigned i = 0; i < NumMembers; ++i) { CharUnits BaseOffset = CharUnits::fromQuantity(16 * i + Offset); Address LoadAddr = CGF.Builder.CreateConstInBoundsByteGEP(BaseAddr, BaseOffset); LoadAddr = CGF.Builder.CreateElementBitCast(LoadAddr, BaseTy); Address StoreAddr = CGF.Builder.CreateConstArrayGEP(Tmp, i); llvm::Value *Elem = CGF.Builder.CreateLoad(LoadAddr); CGF.Builder.CreateStore(Elem, StoreAddr); } RegAddr = CGF.Builder.CreateElementBitCast(Tmp, MemTy); } else { // Otherwise the object is contiguous in memory. // It might be right-aligned in its slot. CharUnits SlotSize = BaseAddr.getAlignment(); if (CGF.CGM.getDataLayout().isBigEndian() && !IsIndirect && (IsHFA || !isAggregateTypeForABI(Ty)) && TySize < SlotSize) { CharUnits Offset = SlotSize - TySize; BaseAddr = CGF.Builder.CreateConstInBoundsByteGEP(BaseAddr, Offset); } RegAddr = CGF.Builder.CreateElementBitCast(BaseAddr, MemTy); } CGF.EmitBranch(ContBlock); //======================================= // Argument was on the stack //======================================= CGF.EmitBlock(OnStackBlock); Address stack_p = CGF.Builder.CreateStructGEP(VAListAddr, 0, "stack_p"); llvm::Value *OnStackPtr = CGF.Builder.CreateLoad(stack_p, "stack"); // Again, stack arguments may need realignment. In this case both integer and // floating-point ones might be affected. if (!IsIndirect && TyAlign.getQuantity() > 8) { int Align = TyAlign.getQuantity(); OnStackPtr = CGF.Builder.CreatePtrToInt(OnStackPtr, CGF.Int64Ty); OnStackPtr = CGF.Builder.CreateAdd( OnStackPtr, llvm::ConstantInt::get(CGF.Int64Ty, Align - 1), "align_stack"); OnStackPtr = CGF.Builder.CreateAnd( OnStackPtr, llvm::ConstantInt::get(CGF.Int64Ty, -Align), "align_stack"); OnStackPtr = CGF.Builder.CreateIntToPtr(OnStackPtr, CGF.Int8PtrTy); } Address OnStackAddr(OnStackPtr, std::max(CharUnits::fromQuantity(8), TyAlign)); // All stack slots are multiples of 8 bytes. CharUnits StackSlotSize = CharUnits::fromQuantity(8); CharUnits StackSize; if (IsIndirect) StackSize = StackSlotSize; else StackSize = TySize.alignTo(StackSlotSize); llvm::Value *StackSizeC = CGF.Builder.getSize(StackSize); llvm::Value *NewStack = CGF.Builder.CreateInBoundsGEP(OnStackPtr, StackSizeC, "new_stack"); // Write the new value of __stack for the next call to va_arg CGF.Builder.CreateStore(NewStack, stack_p); if (CGF.CGM.getDataLayout().isBigEndian() && !isAggregateTypeForABI(Ty) && TySize < StackSlotSize) { CharUnits Offset = StackSlotSize - TySize; OnStackAddr = CGF.Builder.CreateConstInBoundsByteGEP(OnStackAddr, Offset); } OnStackAddr = CGF.Builder.CreateElementBitCast(OnStackAddr, MemTy); CGF.EmitBranch(ContBlock); //======================================= // Tidy up //======================================= CGF.EmitBlock(ContBlock); Address ResAddr = emitMergePHI(CGF, RegAddr, InRegBlock, OnStackAddr, OnStackBlock, "vaargs.addr"); if (IsIndirect) return Address(CGF.Builder.CreateLoad(ResAddr, "vaarg.addr"), TyAlign); return ResAddr; } Address AArch64ABIInfo::EmitDarwinVAArg(Address VAListAddr, QualType Ty, CodeGenFunction &CGF) const { // The backend's lowering doesn't support va_arg for aggregates or // illegal vector types. Lower VAArg here for these cases and use // the LLVM va_arg instruction for everything else. if (!isAggregateTypeForABI(Ty) && !isIllegalVectorType(Ty)) return EmitVAArgInstr(CGF, VAListAddr, Ty, ABIArgInfo::getDirect()); uint64_t PointerSize = getTarget().getPointerWidth(0) / 8; CharUnits SlotSize = CharUnits::fromQuantity(PointerSize); // Empty records are ignored for parameter passing purposes. if (isEmptyRecord(getContext(), Ty, true)) { Address Addr(CGF.Builder.CreateLoad(VAListAddr, "ap.cur"), SlotSize); Addr = CGF.Builder.CreateElementBitCast(Addr, CGF.ConvertTypeForMem(Ty)); return Addr; } // The size of the actual thing passed, which might end up just // being a pointer for indirect types. auto TyInfo = getContext().getTypeInfoInChars(Ty); // Arguments bigger than 16 bytes which aren't homogeneous // aggregates should be passed indirectly. bool IsIndirect = false; if (TyInfo.first.getQuantity() > 16) { const Type *Base = nullptr; uint64_t Members = 0; IsIndirect = !isHomogeneousAggregate(Ty, Base, Members); } return emitVoidPtrVAArg(CGF, VAListAddr, Ty, IsIndirect, TyInfo, SlotSize, /*AllowHigherAlign*/ true); } Address AArch64ABIInfo::EmitMSVAArg(CodeGenFunction &CGF, Address VAListAddr, QualType Ty) const { return emitVoidPtrVAArg(CGF, VAListAddr, Ty, /*indirect*/ false, CGF.getContext().getTypeInfoInChars(Ty), CharUnits::fromQuantity(8), /*allowHigherAlign*/ false); } //===----------------------------------------------------------------------===// // ARM ABI Implementation //===----------------------------------------------------------------------===// namespace { class ARMABIInfo : public SwiftABIInfo { public: enum ABIKind { APCS = 0, AAPCS = 1, AAPCS_VFP = 2, AAPCS16_VFP = 3, }; private: ABIKind Kind; bool IsFloatABISoftFP; public: ARMABIInfo(CodeGenTypes &CGT, ABIKind _Kind) : SwiftABIInfo(CGT), Kind(_Kind) { setCCs(); IsFloatABISoftFP = CGT.getCodeGenOpts().FloatABI == "softfp" || CGT.getCodeGenOpts().FloatABI == ""; // default } bool isEABI() const { switch (getTarget().getTriple().getEnvironment()) { case llvm::Triple::Android: case llvm::Triple::EABI: case llvm::Triple::EABIHF: case llvm::Triple::GNUEABI: case llvm::Triple::GNUEABIHF: case llvm::Triple::MuslEABI: case llvm::Triple::MuslEABIHF: return true; default: return false; } } bool isEABIHF() const { switch (getTarget().getTriple().getEnvironment()) { case llvm::Triple::EABIHF: case llvm::Triple::GNUEABIHF: case llvm::Triple::MuslEABIHF: return true; default: return false; } } ABIKind getABIKind() const { return Kind; } bool allowBFloatArgsAndRet() const override { return !IsFloatABISoftFP && getTarget().hasBFloat16Type(); } private: ABIArgInfo classifyReturnType(QualType RetTy, bool isVariadic, unsigned functionCallConv) const; ABIArgInfo classifyArgumentType(QualType RetTy, bool isVariadic, unsigned functionCallConv) const; ABIArgInfo classifyHomogeneousAggregate(QualType Ty, const Type *Base, uint64_t Members) const; ABIArgInfo coerceIllegalVector(QualType Ty) const; bool isIllegalVectorType(QualType Ty) const; bool containsAnyFP16Vectors(QualType Ty) const; bool isHomogeneousAggregateBaseType(QualType Ty) const override; bool isHomogeneousAggregateSmallEnough(const Type *Ty, uint64_t Members) const override; bool isEffectivelyAAPCS_VFP(unsigned callConvention, bool acceptHalf) const; void computeInfo(CGFunctionInfo &FI) const override; Address EmitVAArg(CodeGenFunction &CGF, Address VAListAddr, QualType Ty) const override; llvm::CallingConv::ID getLLVMDefaultCC() const; llvm::CallingConv::ID getABIDefaultCC() const; void setCCs(); bool shouldPassIndirectlyForSwift(ArrayRef scalars, bool asReturnValue) const override { return occupiesMoreThan(CGT, scalars, /*total*/ 4); } bool isSwiftErrorInRegister() const override { return true; } bool isLegalVectorTypeForSwift(CharUnits totalSize, llvm::Type *eltTy, unsigned elts) const override; }; class ARMTargetCodeGenInfo : public TargetCodeGenInfo { public: ARMTargetCodeGenInfo(CodeGenTypes &CGT, ARMABIInfo::ABIKind K) : TargetCodeGenInfo(std::make_unique(CGT, K)) {} const ARMABIInfo &getABIInfo() const { return static_cast(TargetCodeGenInfo::getABIInfo()); } int getDwarfEHStackPointer(CodeGen::CodeGenModule &M) const override { return 13; } StringRef getARCRetainAutoreleasedReturnValueMarker() const override { return "mov\tr7, r7\t\t// marker for objc_retainAutoreleaseReturnValue"; } bool initDwarfEHRegSizeTable(CodeGen::CodeGenFunction &CGF, llvm::Value *Address) const override { llvm::Value *Four8 = llvm::ConstantInt::get(CGF.Int8Ty, 4); // 0-15 are the 16 integer registers. AssignToArrayRange(CGF.Builder, Address, Four8, 0, 15); return false; } unsigned getSizeOfUnwindException() const override { if (getABIInfo().isEABI()) return 88; return TargetCodeGenInfo::getSizeOfUnwindException(); } void setTargetAttributes(const Decl *D, llvm::GlobalValue *GV, CodeGen::CodeGenModule &CGM) const override { if (GV->isDeclaration()) return; const FunctionDecl *FD = dyn_cast_or_null(D); if (!FD) return; const ARMInterruptAttr *Attr = FD->getAttr(); if (!Attr) return; const char *Kind; switch (Attr->getInterrupt()) { case ARMInterruptAttr::Generic: Kind = ""; break; case ARMInterruptAttr::IRQ: Kind = "IRQ"; break; case ARMInterruptAttr::FIQ: Kind = "FIQ"; break; case ARMInterruptAttr::SWI: Kind = "SWI"; break; case ARMInterruptAttr::ABORT: Kind = "ABORT"; break; case ARMInterruptAttr::UNDEF: Kind = "UNDEF"; break; } llvm::Function *Fn = cast(GV); Fn->addFnAttr("interrupt", Kind); ARMABIInfo::ABIKind ABI = cast(getABIInfo()).getABIKind(); if (ABI == ARMABIInfo::APCS) return; // AAPCS guarantees that sp will be 8-byte aligned on any public interface, // however this is not necessarily true on taking any interrupt. Instruct // the backend to perform a realignment as part of the function prologue. llvm::AttrBuilder B; B.addStackAlignmentAttr(8); Fn->addAttributes(llvm::AttributeList::FunctionIndex, B); } }; class WindowsARMTargetCodeGenInfo : public ARMTargetCodeGenInfo { public: WindowsARMTargetCodeGenInfo(CodeGenTypes &CGT, ARMABIInfo::ABIKind K) : ARMTargetCodeGenInfo(CGT, K) {} void setTargetAttributes(const Decl *D, llvm::GlobalValue *GV, CodeGen::CodeGenModule &CGM) const override; void getDependentLibraryOption(llvm::StringRef Lib, llvm::SmallString<24> &Opt) const override { Opt = "/DEFAULTLIB:" + qualifyWindowsLibrary(Lib); } void getDetectMismatchOption(llvm::StringRef Name, llvm::StringRef Value, llvm::SmallString<32> &Opt) const override { Opt = "/FAILIFMISMATCH:\"" + Name.str() + "=" + Value.str() + "\""; } }; void WindowsARMTargetCodeGenInfo::setTargetAttributes( const Decl *D, llvm::GlobalValue *GV, CodeGen::CodeGenModule &CGM) const { ARMTargetCodeGenInfo::setTargetAttributes(D, GV, CGM); if (GV->isDeclaration()) return; addStackProbeTargetAttributes(D, GV, CGM); } } void ARMABIInfo::computeInfo(CGFunctionInfo &FI) const { if (!::classifyReturnType(getCXXABI(), FI, *this)) FI.getReturnInfo() = classifyReturnType(FI.getReturnType(), FI.isVariadic(), FI.getCallingConvention()); for (auto &I : FI.arguments()) I.info = classifyArgumentType(I.type, FI.isVariadic(), FI.getCallingConvention()); // Always honor user-specified calling convention. if (FI.getCallingConvention() != llvm::CallingConv::C) return; llvm::CallingConv::ID cc = getRuntimeCC(); if (cc != llvm::CallingConv::C) FI.setEffectiveCallingConvention(cc); } /// Return the default calling convention that LLVM will use. llvm::CallingConv::ID ARMABIInfo::getLLVMDefaultCC() const { // The default calling convention that LLVM will infer. if (isEABIHF() || getTarget().getTriple().isWatchABI()) return llvm::CallingConv::ARM_AAPCS_VFP; else if (isEABI()) return llvm::CallingConv::ARM_AAPCS; else return llvm::CallingConv::ARM_APCS; } /// Return the calling convention that our ABI would like us to use /// as the C calling convention. llvm::CallingConv::ID ARMABIInfo::getABIDefaultCC() const { switch (getABIKind()) { case APCS: return llvm::CallingConv::ARM_APCS; case AAPCS: return llvm::CallingConv::ARM_AAPCS; case AAPCS_VFP: return llvm::CallingConv::ARM_AAPCS_VFP; case AAPCS16_VFP: return llvm::CallingConv::ARM_AAPCS_VFP; } llvm_unreachable("bad ABI kind"); } void ARMABIInfo::setCCs() { assert(getRuntimeCC() == llvm::CallingConv::C); // Don't muddy up the IR with a ton of explicit annotations if // they'd just match what LLVM will infer from the triple. llvm::CallingConv::ID abiCC = getABIDefaultCC(); if (abiCC != getLLVMDefaultCC()) RuntimeCC = abiCC; } ABIArgInfo ARMABIInfo::coerceIllegalVector(QualType Ty) const { uint64_t Size = getContext().getTypeSize(Ty); if (Size <= 32) { llvm::Type *ResType = llvm::Type::getInt32Ty(getVMContext()); return ABIArgInfo::getDirect(ResType); } if (Size == 64 || Size == 128) { auto *ResType = llvm::FixedVectorType::get( llvm::Type::getInt32Ty(getVMContext()), Size / 32); return ABIArgInfo::getDirect(ResType); } return getNaturalAlignIndirect(Ty, /*ByVal=*/false); } ABIArgInfo ARMABIInfo::classifyHomogeneousAggregate(QualType Ty, const Type *Base, uint64_t Members) const { assert(Base && "Base class should be set for homogeneous aggregate"); // Base can be a floating-point or a vector. if (const VectorType *VT = Base->getAs()) { // FP16 vectors should be converted to integer vectors if (!getTarget().hasLegalHalfType() && containsAnyFP16Vectors(Ty)) { uint64_t Size = getContext().getTypeSize(VT); auto *NewVecTy = llvm::FixedVectorType::get( llvm::Type::getInt32Ty(getVMContext()), Size / 32); llvm::Type *Ty = llvm::ArrayType::get(NewVecTy, Members); return ABIArgInfo::getDirect(Ty, 0, nullptr, false); } } return ABIArgInfo::getDirect(nullptr, 0, nullptr, false); } ABIArgInfo ARMABIInfo::classifyArgumentType(QualType Ty, bool isVariadic, unsigned functionCallConv) const { // 6.1.2.1 The following argument types are VFP CPRCs: // A single-precision floating-point type (including promoted // half-precision types); A double-precision floating-point type; // A 64-bit or 128-bit containerized vector type; Homogeneous Aggregate // with a Base Type of a single- or double-precision floating-point type, // 64-bit containerized vectors or 128-bit containerized vectors with one // to four Elements. // Variadic functions should always marshal to the base standard. bool IsAAPCS_VFP = !isVariadic && isEffectivelyAAPCS_VFP(functionCallConv, /* AAPCS16 */ false); Ty = useFirstFieldIfTransparentUnion(Ty); // Handle illegal vector types here. if (isIllegalVectorType(Ty)) return coerceIllegalVector(Ty); if (!isAggregateTypeForABI(Ty)) { // Treat an enum type as its underlying type. if (const EnumType *EnumTy = Ty->getAs()) { Ty = EnumTy->getDecl()->getIntegerType(); } if (const auto *EIT = Ty->getAs()) if (EIT->getNumBits() > 64) return getNaturalAlignIndirect(Ty, /*ByVal=*/true); return (isPromotableIntegerTypeForABI(Ty) ? ABIArgInfo::getExtend(Ty) : ABIArgInfo::getDirect()); } if (CGCXXABI::RecordArgABI RAA = getRecordArgABI(Ty, getCXXABI())) { return getNaturalAlignIndirect(Ty, RAA == CGCXXABI::RAA_DirectInMemory); } // Ignore empty records. if (isEmptyRecord(getContext(), Ty, true)) return ABIArgInfo::getIgnore(); if (IsAAPCS_VFP) { // Homogeneous Aggregates need to be expanded when we can fit the aggregate // into VFP registers. const Type *Base = nullptr; uint64_t Members = 0; if (isHomogeneousAggregate(Ty, Base, Members)) return classifyHomogeneousAggregate(Ty, Base, Members); } else if (getABIKind() == ARMABIInfo::AAPCS16_VFP) { // WatchOS does have homogeneous aggregates. Note that we intentionally use // this convention even for a variadic function: the backend will use GPRs // if needed. const Type *Base = nullptr; uint64_t Members = 0; if (isHomogeneousAggregate(Ty, Base, Members)) { assert(Base && Members <= 4 && "unexpected homogeneous aggregate"); llvm::Type *Ty = llvm::ArrayType::get(CGT.ConvertType(QualType(Base, 0)), Members); return ABIArgInfo::getDirect(Ty, 0, nullptr, false); } } if (getABIKind() == ARMABIInfo::AAPCS16_VFP && getContext().getTypeSizeInChars(Ty) > CharUnits::fromQuantity(16)) { // WatchOS is adopting the 64-bit AAPCS rule on composite types: if they're // bigger than 128-bits, they get placed in space allocated by the caller, // and a pointer is passed. return ABIArgInfo::getIndirect( CharUnits::fromQuantity(getContext().getTypeAlign(Ty) / 8), false); } // Support byval for ARM. // The ABI alignment for APCS is 4-byte and for AAPCS at least 4-byte and at // most 8-byte. We realign the indirect argument if type alignment is bigger // than ABI alignment. uint64_t ABIAlign = 4; uint64_t TyAlign; if (getABIKind() == ARMABIInfo::AAPCS_VFP || getABIKind() == ARMABIInfo::AAPCS) { TyAlign = getContext().getTypeUnadjustedAlignInChars(Ty).getQuantity(); ABIAlign = std::min(std::max(TyAlign, (uint64_t)4), (uint64_t)8); } else { TyAlign = getContext().getTypeAlignInChars(Ty).getQuantity(); } if (getContext().getTypeSizeInChars(Ty) > CharUnits::fromQuantity(64)) { assert(getABIKind() != ARMABIInfo::AAPCS16_VFP && "unexpected byval"); return ABIArgInfo::getIndirect(CharUnits::fromQuantity(ABIAlign), /*ByVal=*/true, /*Realign=*/TyAlign > ABIAlign); } // On RenderScript, coerce Aggregates <= 64 bytes to an integer array of // same size and alignment. if (getTarget().isRenderScriptTarget()) { return coerceToIntArray(Ty, getContext(), getVMContext()); } // Otherwise, pass by coercing to a structure of the appropriate size. llvm::Type* ElemTy; unsigned SizeRegs; // FIXME: Try to match the types of the arguments more accurately where // we can. if (TyAlign <= 4) { ElemTy = llvm::Type::getInt32Ty(getVMContext()); SizeRegs = (getContext().getTypeSize(Ty) + 31) / 32; } else { ElemTy = llvm::Type::getInt64Ty(getVMContext()); SizeRegs = (getContext().getTypeSize(Ty) + 63) / 64; } return ABIArgInfo::getDirect(llvm::ArrayType::get(ElemTy, SizeRegs)); } static bool isIntegerLikeType(QualType Ty, ASTContext &Context, llvm::LLVMContext &VMContext) { // APCS, C Language Calling Conventions, Non-Simple Return Values: A structure // is called integer-like if its size is less than or equal to one word, and // the offset of each of its addressable sub-fields is zero. uint64_t Size = Context.getTypeSize(Ty); // Check that the type fits in a word. if (Size > 32) return false; // FIXME: Handle vector types! if (Ty->isVectorType()) return false; // Float types are never treated as "integer like". if (Ty->isRealFloatingType()) return false; // If this is a builtin or pointer type then it is ok. if (Ty->getAs() || Ty->isPointerType()) return true; // Small complex integer types are "integer like". if (const ComplexType *CT = Ty->getAs()) return isIntegerLikeType(CT->getElementType(), Context, VMContext); // Single element and zero sized arrays should be allowed, by the definition // above, but they are not. // Otherwise, it must be a record type. const RecordType *RT = Ty->getAs(); if (!RT) return false; // Ignore records with flexible arrays. const RecordDecl *RD = RT->getDecl(); if (RD->hasFlexibleArrayMember()) return false; // Check that all sub-fields are at offset 0, and are themselves "integer // like". const ASTRecordLayout &Layout = Context.getASTRecordLayout(RD); bool HadField = false; unsigned idx = 0; for (RecordDecl::field_iterator i = RD->field_begin(), e = RD->field_end(); i != e; ++i, ++idx) { const FieldDecl *FD = *i; // Bit-fields are not addressable, we only need to verify they are "integer // like". We still have to disallow a subsequent non-bitfield, for example: // struct { int : 0; int x } // is non-integer like according to gcc. if (FD->isBitField()) { if (!RD->isUnion()) HadField = true; if (!isIntegerLikeType(FD->getType(), Context, VMContext)) return false; continue; } // Check if this field is at offset 0. if (Layout.getFieldOffset(idx) != 0) return false; if (!isIntegerLikeType(FD->getType(), Context, VMContext)) return false; // Only allow at most one field in a structure. This doesn't match the // wording above, but follows gcc in situations with a field following an // empty structure. if (!RD->isUnion()) { if (HadField) return false; HadField = true; } } return true; } ABIArgInfo ARMABIInfo::classifyReturnType(QualType RetTy, bool isVariadic, unsigned functionCallConv) const { // Variadic functions should always marshal to the base standard. bool IsAAPCS_VFP = !isVariadic && isEffectivelyAAPCS_VFP(functionCallConv, /* AAPCS16 */ true); if (RetTy->isVoidType()) return ABIArgInfo::getIgnore(); if (const VectorType *VT = RetTy->getAs()) { // Large vector types should be returned via memory. if (getContext().getTypeSize(RetTy) > 128) return getNaturalAlignIndirect(RetTy); // TODO: FP16/BF16 vectors should be converted to integer vectors // This check is similar to isIllegalVectorType - refactor? if ((!getTarget().hasLegalHalfType() && (VT->getElementType()->isFloat16Type() || VT->getElementType()->isHalfType())) || (IsFloatABISoftFP && VT->getElementType()->isBFloat16Type())) return coerceIllegalVector(RetTy); } if (!isAggregateTypeForABI(RetTy)) { // Treat an enum type as its underlying type. if (const EnumType *EnumTy = RetTy->getAs()) RetTy = EnumTy->getDecl()->getIntegerType(); if (const auto *EIT = RetTy->getAs()) if (EIT->getNumBits() > 64) return getNaturalAlignIndirect(RetTy, /*ByVal=*/false); return isPromotableIntegerTypeForABI(RetTy) ? ABIArgInfo::getExtend(RetTy) : ABIArgInfo::getDirect(); } // Are we following APCS? if (getABIKind() == APCS) { if (isEmptyRecord(getContext(), RetTy, false)) return ABIArgInfo::getIgnore(); // Complex types are all returned as packed integers. // // FIXME: Consider using 2 x vector types if the back end handles them // correctly. if (RetTy->isAnyComplexType()) return ABIArgInfo::getDirect(llvm::IntegerType::get( getVMContext(), getContext().getTypeSize(RetTy))); // Integer like structures are returned in r0. if (isIntegerLikeType(RetTy, getContext(), getVMContext())) { // Return in the smallest viable integer type. uint64_t Size = getContext().getTypeSize(RetTy); if (Size <= 8) return ABIArgInfo::getDirect(llvm::Type::getInt8Ty(getVMContext())); if (Size <= 16) return ABIArgInfo::getDirect(llvm::Type::getInt16Ty(getVMContext())); return ABIArgInfo::getDirect(llvm::Type::getInt32Ty(getVMContext())); } // Otherwise return in memory. return getNaturalAlignIndirect(RetTy); } // Otherwise this is an AAPCS variant. if (isEmptyRecord(getContext(), RetTy, true)) return ABIArgInfo::getIgnore(); // Check for homogeneous aggregates with AAPCS-VFP. if (IsAAPCS_VFP) { const Type *Base = nullptr; uint64_t Members = 0; if (isHomogeneousAggregate(RetTy, Base, Members)) return classifyHomogeneousAggregate(RetTy, Base, Members); } // Aggregates <= 4 bytes are returned in r0; other aggregates // are returned indirectly. uint64_t Size = getContext().getTypeSize(RetTy); if (Size <= 32) { // On RenderScript, coerce Aggregates <= 4 bytes to an integer array of // same size and alignment. if (getTarget().isRenderScriptTarget()) { return coerceToIntArray(RetTy, getContext(), getVMContext()); } if (getDataLayout().isBigEndian()) // Return in 32 bit integer integer type (as if loaded by LDR, AAPCS 5.4) return ABIArgInfo::getDirect(llvm::Type::getInt32Ty(getVMContext())); // Return in the smallest viable integer type. if (Size <= 8) return ABIArgInfo::getDirect(llvm::Type::getInt8Ty(getVMContext())); if (Size <= 16) return ABIArgInfo::getDirect(llvm::Type::getInt16Ty(getVMContext())); return ABIArgInfo::getDirect(llvm::Type::getInt32Ty(getVMContext())); } else if (Size <= 128 && getABIKind() == AAPCS16_VFP) { llvm::Type *Int32Ty = llvm::Type::getInt32Ty(getVMContext()); llvm::Type *CoerceTy = llvm::ArrayType::get(Int32Ty, llvm::alignTo(Size, 32) / 32); return ABIArgInfo::getDirect(CoerceTy); } return getNaturalAlignIndirect(RetTy); } /// isIllegalVector - check whether Ty is an illegal vector type. bool ARMABIInfo::isIllegalVectorType(QualType Ty) const { if (const VectorType *VT = Ty->getAs ()) { // On targets that don't support half, fp16 or bfloat, they are expanded // into float, and we don't want the ABI to depend on whether or not they // are supported in hardware. Thus return false to coerce vectors of these // types into integer vectors. // We do not depend on hasLegalHalfType for bfloat as it is a // separate IR type. if ((!getTarget().hasLegalHalfType() && (VT->getElementType()->isFloat16Type() || VT->getElementType()->isHalfType())) || (IsFloatABISoftFP && VT->getElementType()->isBFloat16Type())) return true; if (isAndroid()) { // Android shipped using Clang 3.1, which supported a slightly different // vector ABI. The primary differences were that 3-element vector types // were legal, and so were sub 32-bit vectors (i.e. <2 x i8>). This path // accepts that legacy behavior for Android only. // Check whether VT is legal. unsigned NumElements = VT->getNumElements(); // NumElements should be power of 2 or equal to 3. if (!llvm::isPowerOf2_32(NumElements) && NumElements != 3) return true; } else { // Check whether VT is legal. unsigned NumElements = VT->getNumElements(); uint64_t Size = getContext().getTypeSize(VT); // NumElements should be power of 2. if (!llvm::isPowerOf2_32(NumElements)) return true; // Size should be greater than 32 bits. return Size <= 32; } } return false; } /// Return true if a type contains any 16-bit floating point vectors bool ARMABIInfo::containsAnyFP16Vectors(QualType Ty) const { if (const ConstantArrayType *AT = getContext().getAsConstantArrayType(Ty)) { uint64_t NElements = AT->getSize().getZExtValue(); if (NElements == 0) return false; return containsAnyFP16Vectors(AT->getElementType()); } else if (const RecordType *RT = Ty->getAs()) { const RecordDecl *RD = RT->getDecl(); // If this is a C++ record, check the bases first. if (const CXXRecordDecl *CXXRD = dyn_cast(RD)) if (llvm::any_of(CXXRD->bases(), [this](const CXXBaseSpecifier &B) { return containsAnyFP16Vectors(B.getType()); })) return true; if (llvm::any_of(RD->fields(), [this](FieldDecl *FD) { return FD && containsAnyFP16Vectors(FD->getType()); })) return true; return false; } else { if (const VectorType *VT = Ty->getAs()) return (VT->getElementType()->isFloat16Type() || VT->getElementType()->isBFloat16Type() || VT->getElementType()->isHalfType()); return false; } } bool ARMABIInfo::isLegalVectorTypeForSwift(CharUnits vectorSize, llvm::Type *eltTy, unsigned numElts) const { if (!llvm::isPowerOf2_32(numElts)) return false; unsigned size = getDataLayout().getTypeStoreSizeInBits(eltTy); if (size > 64) return false; if (vectorSize.getQuantity() != 8 && (vectorSize.getQuantity() != 16 || numElts == 1)) return false; return true; } bool ARMABIInfo::isHomogeneousAggregateBaseType(QualType Ty) const { // Homogeneous aggregates for AAPCS-VFP must have base types of float, // double, or 64-bit or 128-bit vectors. if (const BuiltinType *BT = Ty->getAs()) { if (BT->getKind() == BuiltinType::Float || BT->getKind() == BuiltinType::Double || BT->getKind() == BuiltinType::LongDouble) return true; } else if (const VectorType *VT = Ty->getAs()) { unsigned VecSize = getContext().getTypeSize(VT); if (VecSize == 64 || VecSize == 128) return true; } return false; } bool ARMABIInfo::isHomogeneousAggregateSmallEnough(const Type *Base, uint64_t Members) const { return Members <= 4; } bool ARMABIInfo::isEffectivelyAAPCS_VFP(unsigned callConvention, bool acceptHalf) const { // Give precedence to user-specified calling conventions. if (callConvention != llvm::CallingConv::C) return (callConvention == llvm::CallingConv::ARM_AAPCS_VFP); else return (getABIKind() == AAPCS_VFP) || (acceptHalf && (getABIKind() == AAPCS16_VFP)); } Address ARMABIInfo::EmitVAArg(CodeGenFunction &CGF, Address VAListAddr, QualType Ty) const { CharUnits SlotSize = CharUnits::fromQuantity(4); // Empty records are ignored for parameter passing purposes. if (isEmptyRecord(getContext(), Ty, true)) { Address Addr(CGF.Builder.CreateLoad(VAListAddr), SlotSize); Addr = CGF.Builder.CreateElementBitCast(Addr, CGF.ConvertTypeForMem(Ty)); return Addr; } CharUnits TySize = getContext().getTypeSizeInChars(Ty); CharUnits TyAlignForABI = getContext().getTypeUnadjustedAlignInChars(Ty); // Use indirect if size of the illegal vector is bigger than 16 bytes. bool IsIndirect = false; const Type *Base = nullptr; uint64_t Members = 0; if (TySize > CharUnits::fromQuantity(16) && isIllegalVectorType(Ty)) { IsIndirect = true; // ARMv7k passes structs bigger than 16 bytes indirectly, in space // allocated by the caller. } else if (TySize > CharUnits::fromQuantity(16) && getABIKind() == ARMABIInfo::AAPCS16_VFP && !isHomogeneousAggregate(Ty, Base, Members)) { IsIndirect = true; // Otherwise, bound the type's ABI alignment. // The ABI alignment for 64-bit or 128-bit vectors is 8 for AAPCS and 4 for // APCS. For AAPCS, the ABI alignment is at least 4-byte and at most 8-byte. // Our callers should be prepared to handle an under-aligned address. } else if (getABIKind() == ARMABIInfo::AAPCS_VFP || getABIKind() == ARMABIInfo::AAPCS) { TyAlignForABI = std::max(TyAlignForABI, CharUnits::fromQuantity(4)); TyAlignForABI = std::min(TyAlignForABI, CharUnits::fromQuantity(8)); } else if (getABIKind() == ARMABIInfo::AAPCS16_VFP) { // ARMv7k allows type alignment up to 16 bytes. TyAlignForABI = std::max(TyAlignForABI, CharUnits::fromQuantity(4)); TyAlignForABI = std::min(TyAlignForABI, CharUnits::fromQuantity(16)); } else { TyAlignForABI = CharUnits::fromQuantity(4); } std::pair TyInfo = { TySize, TyAlignForABI }; return emitVoidPtrVAArg(CGF, VAListAddr, Ty, IsIndirect, TyInfo, SlotSize, /*AllowHigherAlign*/ true); } //===----------------------------------------------------------------------===// // NVPTX ABI Implementation //===----------------------------------------------------------------------===// namespace { class NVPTXTargetCodeGenInfo; class NVPTXABIInfo : public ABIInfo { NVPTXTargetCodeGenInfo &CGInfo; public: NVPTXABIInfo(CodeGenTypes &CGT, NVPTXTargetCodeGenInfo &Info) : ABIInfo(CGT), CGInfo(Info) {} ABIArgInfo classifyReturnType(QualType RetTy) const; ABIArgInfo classifyArgumentType(QualType Ty) const; void computeInfo(CGFunctionInfo &FI) const override; Address EmitVAArg(CodeGenFunction &CGF, Address VAListAddr, QualType Ty) const override; bool isUnsupportedType(QualType T) const; ABIArgInfo coerceToIntArrayWithLimit(QualType Ty, unsigned MaxSize) const; }; class NVPTXTargetCodeGenInfo : public TargetCodeGenInfo { public: NVPTXTargetCodeGenInfo(CodeGenTypes &CGT) : TargetCodeGenInfo(std::make_unique(CGT, *this)) {} void setTargetAttributes(const Decl *D, llvm::GlobalValue *GV, CodeGen::CodeGenModule &M) const override; bool shouldEmitStaticExternCAliases() const override; llvm::Type *getCUDADeviceBuiltinSurfaceDeviceType() const override { // On the device side, surface reference is represented as an object handle // in 64-bit integer. return llvm::Type::getInt64Ty(getABIInfo().getVMContext()); } llvm::Type *getCUDADeviceBuiltinTextureDeviceType() const override { // On the device side, texture reference is represented as an object handle // in 64-bit integer. return llvm::Type::getInt64Ty(getABIInfo().getVMContext()); } bool emitCUDADeviceBuiltinSurfaceDeviceCopy(CodeGenFunction &CGF, LValue Dst, LValue Src) const override { emitBuiltinSurfTexDeviceCopy(CGF, Dst, Src); return true; } bool emitCUDADeviceBuiltinTextureDeviceCopy(CodeGenFunction &CGF, LValue Dst, LValue Src) const override { emitBuiltinSurfTexDeviceCopy(CGF, Dst, Src); return true; } private: // Adds a NamedMDNode with GV, Name, and Operand as operands, and adds the // resulting MDNode to the nvvm.annotations MDNode. static void addNVVMMetadata(llvm::GlobalValue *GV, StringRef Name, int Operand); static void emitBuiltinSurfTexDeviceCopy(CodeGenFunction &CGF, LValue Dst, LValue Src) { llvm::Value *Handle = nullptr; llvm::Constant *C = llvm::dyn_cast(Src.getAddress(CGF).getPointer()); // Lookup `addrspacecast` through the constant pointer if any. if (auto *ASC = llvm::dyn_cast_or_null(C)) C = llvm::cast(ASC->getPointerOperand()); if (auto *GV = llvm::dyn_cast_or_null(C)) { // Load the handle from the specific global variable using // `nvvm.texsurf.handle.internal` intrinsic. Handle = CGF.EmitRuntimeCall( CGF.CGM.getIntrinsic(llvm::Intrinsic::nvvm_texsurf_handle_internal, {GV->getType()}), {GV}, "texsurf_handle"); } else Handle = CGF.EmitLoadOfScalar(Src, SourceLocation()); CGF.EmitStoreOfScalar(Handle, Dst); } }; /// Checks if the type is unsupported directly by the current target. bool NVPTXABIInfo::isUnsupportedType(QualType T) const { ASTContext &Context = getContext(); if (!Context.getTargetInfo().hasFloat16Type() && T->isFloat16Type()) return true; if (!Context.getTargetInfo().hasFloat128Type() && (T->isFloat128Type() || (T->isRealFloatingType() && Context.getTypeSize(T) == 128))) return true; if (const auto *EIT = T->getAs()) return EIT->getNumBits() > (Context.getTargetInfo().hasInt128Type() ? 128U : 64U); if (!Context.getTargetInfo().hasInt128Type() && T->isIntegerType() && Context.getTypeSize(T) > 64U) return true; if (const auto *AT = T->getAsArrayTypeUnsafe()) return isUnsupportedType(AT->getElementType()); const auto *RT = T->getAs(); if (!RT) return false; const RecordDecl *RD = RT->getDecl(); // If this is a C++ record, check the bases first. if (const CXXRecordDecl *CXXRD = dyn_cast(RD)) for (const CXXBaseSpecifier &I : CXXRD->bases()) if (isUnsupportedType(I.getType())) return true; for (const FieldDecl *I : RD->fields()) if (isUnsupportedType(I->getType())) return true; return false; } /// Coerce the given type into an array with maximum allowed size of elements. ABIArgInfo NVPTXABIInfo::coerceToIntArrayWithLimit(QualType Ty, unsigned MaxSize) const { // Alignment and Size are measured in bits. const uint64_t Size = getContext().getTypeSize(Ty); const uint64_t Alignment = getContext().getTypeAlign(Ty); const unsigned Div = std::min(MaxSize, Alignment); llvm::Type *IntType = llvm::Type::getIntNTy(getVMContext(), Div); const uint64_t NumElements = (Size + Div - 1) / Div; return ABIArgInfo::getDirect(llvm::ArrayType::get(IntType, NumElements)); } ABIArgInfo NVPTXABIInfo::classifyReturnType(QualType RetTy) const { if (RetTy->isVoidType()) return ABIArgInfo::getIgnore(); if (getContext().getLangOpts().OpenMP && getContext().getLangOpts().OpenMPIsDevice && isUnsupportedType(RetTy)) return coerceToIntArrayWithLimit(RetTy, 64); // note: this is different from default ABI if (!RetTy->isScalarType()) return ABIArgInfo::getDirect(); // Treat an enum type as its underlying type. if (const EnumType *EnumTy = RetTy->getAs()) RetTy = EnumTy->getDecl()->getIntegerType(); return (isPromotableIntegerTypeForABI(RetTy) ? ABIArgInfo::getExtend(RetTy) : ABIArgInfo::getDirect()); } ABIArgInfo NVPTXABIInfo::classifyArgumentType(QualType Ty) const { // Treat an enum type as its underlying type. if (const EnumType *EnumTy = Ty->getAs()) Ty = EnumTy->getDecl()->getIntegerType(); // Return aggregates type as indirect by value if (isAggregateTypeForABI(Ty)) { // Under CUDA device compilation, tex/surf builtin types are replaced with // object types and passed directly. if (getContext().getLangOpts().CUDAIsDevice) { if (Ty->isCUDADeviceBuiltinSurfaceType()) return ABIArgInfo::getDirect( CGInfo.getCUDADeviceBuiltinSurfaceDeviceType()); if (Ty->isCUDADeviceBuiltinTextureType()) return ABIArgInfo::getDirect( CGInfo.getCUDADeviceBuiltinTextureDeviceType()); } return getNaturalAlignIndirect(Ty, /* byval */ true); } if (const auto *EIT = Ty->getAs()) { if ((EIT->getNumBits() > 128) || (!getContext().getTargetInfo().hasInt128Type() && EIT->getNumBits() > 64)) return getNaturalAlignIndirect(Ty, /* byval */ true); } return (isPromotableIntegerTypeForABI(Ty) ? ABIArgInfo::getExtend(Ty) : ABIArgInfo::getDirect()); } void NVPTXABIInfo::computeInfo(CGFunctionInfo &FI) const { if (!getCXXABI().classifyReturnType(FI)) FI.getReturnInfo() = classifyReturnType(FI.getReturnType()); for (auto &I : FI.arguments()) I.info = classifyArgumentType(I.type); // Always honor user-specified calling convention. if (FI.getCallingConvention() != llvm::CallingConv::C) return; FI.setEffectiveCallingConvention(getRuntimeCC()); } Address NVPTXABIInfo::EmitVAArg(CodeGenFunction &CGF, Address VAListAddr, QualType Ty) const { llvm_unreachable("NVPTX does not support varargs"); } void NVPTXTargetCodeGenInfo::setTargetAttributes( const Decl *D, llvm::GlobalValue *GV, CodeGen::CodeGenModule &M) const { if (GV->isDeclaration()) return; const VarDecl *VD = dyn_cast_or_null(D); if (VD) { if (M.getLangOpts().CUDA) { if (VD->getType()->isCUDADeviceBuiltinSurfaceType()) addNVVMMetadata(GV, "surface", 1); else if (VD->getType()->isCUDADeviceBuiltinTextureType()) addNVVMMetadata(GV, "texture", 1); return; } } const FunctionDecl *FD = dyn_cast_or_null(D); if (!FD) return; llvm::Function *F = cast(GV); // Perform special handling in OpenCL mode if (M.getLangOpts().OpenCL) { // Use OpenCL function attributes to check for kernel functions // By default, all functions are device functions if (FD->hasAttr()) { // OpenCL __kernel functions get kernel metadata // Create !{, metadata !"kernel", i32 1} node addNVVMMetadata(F, "kernel", 1); // And kernel functions are not subject to inlining F->addFnAttr(llvm::Attribute::NoInline); } } // Perform special handling in CUDA mode. if (M.getLangOpts().CUDA) { // CUDA __global__ functions get a kernel metadata entry. Since // __global__ functions cannot be called from the device, we do not // need to set the noinline attribute. if (FD->hasAttr()) { // Create !{, metadata !"kernel", i32 1} node addNVVMMetadata(F, "kernel", 1); } if (CUDALaunchBoundsAttr *Attr = FD->getAttr()) { // Create !{, metadata !"maxntidx", i32 } node llvm::APSInt MaxThreads(32); MaxThreads = Attr->getMaxThreads()->EvaluateKnownConstInt(M.getContext()); if (MaxThreads > 0) addNVVMMetadata(F, "maxntidx", MaxThreads.getExtValue()); // min blocks is an optional argument for CUDALaunchBoundsAttr. If it was // not specified in __launch_bounds__ or if the user specified a 0 value, // we don't have to add a PTX directive. if (Attr->getMinBlocks()) { llvm::APSInt MinBlocks(32); MinBlocks = Attr->getMinBlocks()->EvaluateKnownConstInt(M.getContext()); if (MinBlocks > 0) // Create !{, metadata !"minctasm", i32 } node addNVVMMetadata(F, "minctasm", MinBlocks.getExtValue()); } } } } void NVPTXTargetCodeGenInfo::addNVVMMetadata(llvm::GlobalValue *GV, StringRef Name, int Operand) { llvm::Module *M = GV->getParent(); llvm::LLVMContext &Ctx = M->getContext(); // Get "nvvm.annotations" metadata node llvm::NamedMDNode *MD = M->getOrInsertNamedMetadata("nvvm.annotations"); llvm::Metadata *MDVals[] = { llvm::ConstantAsMetadata::get(GV), llvm::MDString::get(Ctx, Name), llvm::ConstantAsMetadata::get( llvm::ConstantInt::get(llvm::Type::getInt32Ty(Ctx), Operand))}; // Append metadata to nvvm.annotations MD->addOperand(llvm::MDNode::get(Ctx, MDVals)); } bool NVPTXTargetCodeGenInfo::shouldEmitStaticExternCAliases() const { return false; } } //===----------------------------------------------------------------------===// // SystemZ ABI Implementation //===----------------------------------------------------------------------===// namespace { class SystemZABIInfo : public SwiftABIInfo { bool HasVector; bool IsSoftFloatABI; public: SystemZABIInfo(CodeGenTypes &CGT, bool HV, bool SF) : SwiftABIInfo(CGT), HasVector(HV), IsSoftFloatABI(SF) {} bool isPromotableIntegerTypeForABI(QualType Ty) const; bool isCompoundType(QualType Ty) const; bool isVectorArgumentType(QualType Ty) const; bool isFPArgumentType(QualType Ty) const; QualType GetSingleElementType(QualType Ty) const; ABIArgInfo classifyReturnType(QualType RetTy) const; ABIArgInfo classifyArgumentType(QualType ArgTy) const; void computeInfo(CGFunctionInfo &FI) const override { if (!getCXXABI().classifyReturnType(FI)) FI.getReturnInfo() = classifyReturnType(FI.getReturnType()); for (auto &I : FI.arguments()) I.info = classifyArgumentType(I.type); } Address EmitVAArg(CodeGenFunction &CGF, Address VAListAddr, QualType Ty) const override; bool shouldPassIndirectlyForSwift(ArrayRef scalars, bool asReturnValue) const override { return occupiesMoreThan(CGT, scalars, /*total*/ 4); } bool isSwiftErrorInRegister() const override { return false; } }; class SystemZTargetCodeGenInfo : public TargetCodeGenInfo { public: SystemZTargetCodeGenInfo(CodeGenTypes &CGT, bool HasVector, bool SoftFloatABI) : TargetCodeGenInfo( std::make_unique(CGT, HasVector, SoftFloatABI)) {} }; } bool SystemZABIInfo::isPromotableIntegerTypeForABI(QualType Ty) const { // Treat an enum type as its underlying type. if (const EnumType *EnumTy = Ty->getAs()) Ty = EnumTy->getDecl()->getIntegerType(); // Promotable integer types are required to be promoted by the ABI. if (ABIInfo::isPromotableIntegerTypeForABI(Ty)) return true; if (const auto *EIT = Ty->getAs()) if (EIT->getNumBits() < 64) return true; // 32-bit values must also be promoted. if (const BuiltinType *BT = Ty->getAs()) switch (BT->getKind()) { case BuiltinType::Int: case BuiltinType::UInt: return true; default: return false; } return false; } bool SystemZABIInfo::isCompoundType(QualType Ty) const { return (Ty->isAnyComplexType() || Ty->isVectorType() || isAggregateTypeForABI(Ty)); } bool SystemZABIInfo::isVectorArgumentType(QualType Ty) const { return (HasVector && Ty->isVectorType() && getContext().getTypeSize(Ty) <= 128); } bool SystemZABIInfo::isFPArgumentType(QualType Ty) const { if (IsSoftFloatABI) return false; if (const BuiltinType *BT = Ty->getAs()) switch (BT->getKind()) { case BuiltinType::Float: case BuiltinType::Double: return true; default: return false; } return false; } QualType SystemZABIInfo::GetSingleElementType(QualType Ty) const { const RecordType *RT = Ty->getAs(); if (RT && RT->isStructureOrClassType()) { const RecordDecl *RD = RT->getDecl(); QualType Found; // If this is a C++ record, check the bases first. if (const CXXRecordDecl *CXXRD = dyn_cast(RD)) for (const auto &I : CXXRD->bases()) { QualType Base = I.getType(); // Empty bases don't affect things either way. if (isEmptyRecord(getContext(), Base, true)) continue; if (!Found.isNull()) return Ty; Found = GetSingleElementType(Base); } // Check the fields. for (const auto *FD : RD->fields()) { // For compatibility with GCC, ignore empty bitfields in C++ mode. // Unlike isSingleElementStruct(), empty structure and array fields // do count. So do anonymous bitfields that aren't zero-sized. if (getContext().getLangOpts().CPlusPlus && FD->isZeroLengthBitField(getContext())) continue; // Like isSingleElementStruct(), ignore C++20 empty data members. if (FD->hasAttr() && isEmptyRecord(getContext(), FD->getType(), true)) continue; // Unlike isSingleElementStruct(), arrays do not count. // Nested structures still do though. if (!Found.isNull()) return Ty; Found = GetSingleElementType(FD->getType()); } // Unlike isSingleElementStruct(), trailing padding is allowed. // An 8-byte aligned struct s { float f; } is passed as a double. if (!Found.isNull()) return Found; } return Ty; } Address SystemZABIInfo::EmitVAArg(CodeGenFunction &CGF, Address VAListAddr, QualType Ty) const { // Assume that va_list type is correct; should be pointer to LLVM type: // struct { // i64 __gpr; // i64 __fpr; // i8 *__overflow_arg_area; // i8 *__reg_save_area; // }; // Every non-vector argument occupies 8 bytes and is passed by preference // in either GPRs or FPRs. Vector arguments occupy 8 or 16 bytes and are // always passed on the stack. Ty = getContext().getCanonicalType(Ty); auto TyInfo = getContext().getTypeInfoInChars(Ty); llvm::Type *ArgTy = CGF.ConvertTypeForMem(Ty); llvm::Type *DirectTy = ArgTy; ABIArgInfo AI = classifyArgumentType(Ty); bool IsIndirect = AI.isIndirect(); bool InFPRs = false; bool IsVector = false; CharUnits UnpaddedSize; CharUnits DirectAlign; if (IsIndirect) { DirectTy = llvm::PointerType::getUnqual(DirectTy); UnpaddedSize = DirectAlign = CharUnits::fromQuantity(8); } else { if (AI.getCoerceToType()) ArgTy = AI.getCoerceToType(); InFPRs = (!IsSoftFloatABI && (ArgTy->isFloatTy() || ArgTy->isDoubleTy())); IsVector = ArgTy->isVectorTy(); UnpaddedSize = TyInfo.first; DirectAlign = TyInfo.second; } CharUnits PaddedSize = CharUnits::fromQuantity(8); if (IsVector && UnpaddedSize > PaddedSize) PaddedSize = CharUnits::fromQuantity(16); assert((UnpaddedSize <= PaddedSize) && "Invalid argument size."); CharUnits Padding = (PaddedSize - UnpaddedSize); llvm::Type *IndexTy = CGF.Int64Ty; llvm::Value *PaddedSizeV = llvm::ConstantInt::get(IndexTy, PaddedSize.getQuantity()); if (IsVector) { // Work out the address of a vector argument on the stack. // Vector arguments are always passed in the high bits of a // single (8 byte) or double (16 byte) stack slot. Address OverflowArgAreaPtr = CGF.Builder.CreateStructGEP(VAListAddr, 2, "overflow_arg_area_ptr"); Address OverflowArgArea = Address(CGF.Builder.CreateLoad(OverflowArgAreaPtr, "overflow_arg_area"), TyInfo.second); Address MemAddr = CGF.Builder.CreateElementBitCast(OverflowArgArea, DirectTy, "mem_addr"); // Update overflow_arg_area_ptr pointer llvm::Value *NewOverflowArgArea = CGF.Builder.CreateGEP(OverflowArgArea.getPointer(), PaddedSizeV, "overflow_arg_area"); CGF.Builder.CreateStore(NewOverflowArgArea, OverflowArgAreaPtr); return MemAddr; } assert(PaddedSize.getQuantity() == 8); unsigned MaxRegs, RegCountField, RegSaveIndex; CharUnits RegPadding; if (InFPRs) { MaxRegs = 4; // Maximum of 4 FPR arguments RegCountField = 1; // __fpr RegSaveIndex = 16; // save offset for f0 RegPadding = CharUnits(); // floats are passed in the high bits of an FPR } else { MaxRegs = 5; // Maximum of 5 GPR arguments RegCountField = 0; // __gpr RegSaveIndex = 2; // save offset for r2 RegPadding = Padding; // values are passed in the low bits of a GPR } Address RegCountPtr = CGF.Builder.CreateStructGEP(VAListAddr, RegCountField, "reg_count_ptr"); llvm::Value *RegCount = CGF.Builder.CreateLoad(RegCountPtr, "reg_count"); llvm::Value *MaxRegsV = llvm::ConstantInt::get(IndexTy, MaxRegs); llvm::Value *InRegs = CGF.Builder.CreateICmpULT(RegCount, MaxRegsV, "fits_in_regs"); llvm::BasicBlock *InRegBlock = CGF.createBasicBlock("vaarg.in_reg"); llvm::BasicBlock *InMemBlock = CGF.createBasicBlock("vaarg.in_mem"); llvm::BasicBlock *ContBlock = CGF.createBasicBlock("vaarg.end"); CGF.Builder.CreateCondBr(InRegs, InRegBlock, InMemBlock); // Emit code to load the value if it was passed in registers. CGF.EmitBlock(InRegBlock); // Work out the address of an argument register. llvm::Value *ScaledRegCount = CGF.Builder.CreateMul(RegCount, PaddedSizeV, "scaled_reg_count"); llvm::Value *RegBase = llvm::ConstantInt::get(IndexTy, RegSaveIndex * PaddedSize.getQuantity() + RegPadding.getQuantity()); llvm::Value *RegOffset = CGF.Builder.CreateAdd(ScaledRegCount, RegBase, "reg_offset"); Address RegSaveAreaPtr = CGF.Builder.CreateStructGEP(VAListAddr, 3, "reg_save_area_ptr"); llvm::Value *RegSaveArea = CGF.Builder.CreateLoad(RegSaveAreaPtr, "reg_save_area"); Address RawRegAddr(CGF.Builder.CreateGEP(RegSaveArea, RegOffset, "raw_reg_addr"), PaddedSize); Address RegAddr = CGF.Builder.CreateElementBitCast(RawRegAddr, DirectTy, "reg_addr"); // Update the register count llvm::Value *One = llvm::ConstantInt::get(IndexTy, 1); llvm::Value *NewRegCount = CGF.Builder.CreateAdd(RegCount, One, "reg_count"); CGF.Builder.CreateStore(NewRegCount, RegCountPtr); CGF.EmitBranch(ContBlock); // Emit code to load the value if it was passed in memory. CGF.EmitBlock(InMemBlock); // Work out the address of a stack argument. Address OverflowArgAreaPtr = CGF.Builder.CreateStructGEP(VAListAddr, 2, "overflow_arg_area_ptr"); Address OverflowArgArea = Address(CGF.Builder.CreateLoad(OverflowArgAreaPtr, "overflow_arg_area"), PaddedSize); Address RawMemAddr = CGF.Builder.CreateConstByteGEP(OverflowArgArea, Padding, "raw_mem_addr"); Address MemAddr = CGF.Builder.CreateElementBitCast(RawMemAddr, DirectTy, "mem_addr"); // Update overflow_arg_area_ptr pointer llvm::Value *NewOverflowArgArea = CGF.Builder.CreateGEP(OverflowArgArea.getPointer(), PaddedSizeV, "overflow_arg_area"); CGF.Builder.CreateStore(NewOverflowArgArea, OverflowArgAreaPtr); CGF.EmitBranch(ContBlock); // Return the appropriate result. CGF.EmitBlock(ContBlock); Address ResAddr = emitMergePHI(CGF, RegAddr, InRegBlock, MemAddr, InMemBlock, "va_arg.addr"); if (IsIndirect) ResAddr = Address(CGF.Builder.CreateLoad(ResAddr, "indirect_arg"), TyInfo.second); return ResAddr; } ABIArgInfo SystemZABIInfo::classifyReturnType(QualType RetTy) const { if (RetTy->isVoidType()) return ABIArgInfo::getIgnore(); if (isVectorArgumentType(RetTy)) return ABIArgInfo::getDirect(); if (isCompoundType(RetTy) || getContext().getTypeSize(RetTy) > 64) return getNaturalAlignIndirect(RetTy); return (isPromotableIntegerTypeForABI(RetTy) ? ABIArgInfo::getExtend(RetTy) : ABIArgInfo::getDirect()); } ABIArgInfo SystemZABIInfo::classifyArgumentType(QualType Ty) const { // Handle the generic C++ ABI. if (CGCXXABI::RecordArgABI RAA = getRecordArgABI(Ty, getCXXABI())) return getNaturalAlignIndirect(Ty, RAA == CGCXXABI::RAA_DirectInMemory); // Integers and enums are extended to full register width. if (isPromotableIntegerTypeForABI(Ty)) return ABIArgInfo::getExtend(Ty); // Handle vector types and vector-like structure types. Note that // as opposed to float-like structure types, we do not allow any // padding for vector-like structures, so verify the sizes match. uint64_t Size = getContext().getTypeSize(Ty); QualType SingleElementTy = GetSingleElementType(Ty); if (isVectorArgumentType(SingleElementTy) && getContext().getTypeSize(SingleElementTy) == Size) return ABIArgInfo::getDirect(CGT.ConvertType(SingleElementTy)); // Values that are not 1, 2, 4 or 8 bytes in size are passed indirectly. if (Size != 8 && Size != 16 && Size != 32 && Size != 64) return getNaturalAlignIndirect(Ty, /*ByVal=*/false); // Handle small structures. if (const RecordType *RT = Ty->getAs()) { // Structures with flexible arrays have variable length, so really // fail the size test above. const RecordDecl *RD = RT->getDecl(); if (RD->hasFlexibleArrayMember()) return getNaturalAlignIndirect(Ty, /*ByVal=*/false); // The structure is passed as an unextended integer, a float, or a double. llvm::Type *PassTy; if (isFPArgumentType(SingleElementTy)) { assert(Size == 32 || Size == 64); if (Size == 32) PassTy = llvm::Type::getFloatTy(getVMContext()); else PassTy = llvm::Type::getDoubleTy(getVMContext()); } else PassTy = llvm::IntegerType::get(getVMContext(), Size); return ABIArgInfo::getDirect(PassTy); } // Non-structure compounds are passed indirectly. if (isCompoundType(Ty)) return getNaturalAlignIndirect(Ty, /*ByVal=*/false); return ABIArgInfo::getDirect(nullptr); } //===----------------------------------------------------------------------===// // MSP430 ABI Implementation //===----------------------------------------------------------------------===// namespace { class MSP430ABIInfo : public DefaultABIInfo { static ABIArgInfo complexArgInfo() { ABIArgInfo Info = ABIArgInfo::getDirect(); Info.setCanBeFlattened(false); return Info; } public: MSP430ABIInfo(CodeGenTypes &CGT) : DefaultABIInfo(CGT) {} ABIArgInfo classifyReturnType(QualType RetTy) const { if (RetTy->isAnyComplexType()) return complexArgInfo(); return DefaultABIInfo::classifyReturnType(RetTy); } ABIArgInfo classifyArgumentType(QualType RetTy) const { if (RetTy->isAnyComplexType()) return complexArgInfo(); return DefaultABIInfo::classifyArgumentType(RetTy); } // Just copy the original implementations because // DefaultABIInfo::classify{Return,Argument}Type() are not virtual void computeInfo(CGFunctionInfo &FI) const override { if (!getCXXABI().classifyReturnType(FI)) FI.getReturnInfo() = classifyReturnType(FI.getReturnType()); for (auto &I : FI.arguments()) I.info = classifyArgumentType(I.type); } Address EmitVAArg(CodeGenFunction &CGF, Address VAListAddr, QualType Ty) const override { return EmitVAArgInstr(CGF, VAListAddr, Ty, classifyArgumentType(Ty)); } }; class MSP430TargetCodeGenInfo : public TargetCodeGenInfo { public: MSP430TargetCodeGenInfo(CodeGenTypes &CGT) : TargetCodeGenInfo(std::make_unique(CGT)) {} void setTargetAttributes(const Decl *D, llvm::GlobalValue *GV, CodeGen::CodeGenModule &M) const override; }; } void MSP430TargetCodeGenInfo::setTargetAttributes( const Decl *D, llvm::GlobalValue *GV, CodeGen::CodeGenModule &M) const { if (GV->isDeclaration()) return; if (const FunctionDecl *FD = dyn_cast_or_null(D)) { const auto *InterruptAttr = FD->getAttr(); if (!InterruptAttr) return; // Handle 'interrupt' attribute: llvm::Function *F = cast(GV); // Step 1: Set ISR calling convention. F->setCallingConv(llvm::CallingConv::MSP430_INTR); // Step 2: Add attributes goodness. F->addFnAttr(llvm::Attribute::NoInline); F->addFnAttr("interrupt", llvm::utostr(InterruptAttr->getNumber())); } } //===----------------------------------------------------------------------===// // MIPS ABI Implementation. This works for both little-endian and // big-endian variants. //===----------------------------------------------------------------------===// namespace { class MipsABIInfo : public ABIInfo { bool IsO32; unsigned MinABIStackAlignInBytes, StackAlignInBytes; void CoerceToIntArgs(uint64_t TySize, SmallVectorImpl &ArgList) const; llvm::Type* HandleAggregates(QualType Ty, uint64_t TySize) const; llvm::Type* returnAggregateInRegs(QualType RetTy, uint64_t Size) const; llvm::Type* getPaddingType(uint64_t Align, uint64_t Offset) const; public: MipsABIInfo(CodeGenTypes &CGT, bool _IsO32) : ABIInfo(CGT), IsO32(_IsO32), MinABIStackAlignInBytes(IsO32 ? 4 : 8), StackAlignInBytes(IsO32 ? 8 : 16) {} ABIArgInfo classifyReturnType(QualType RetTy) const; ABIArgInfo classifyArgumentType(QualType RetTy, uint64_t &Offset) const; void computeInfo(CGFunctionInfo &FI) const override; Address EmitVAArg(CodeGenFunction &CGF, Address VAListAddr, QualType Ty) const override; ABIArgInfo extendType(QualType Ty) const; }; class MIPSTargetCodeGenInfo : public TargetCodeGenInfo { unsigned SizeOfUnwindException; public: MIPSTargetCodeGenInfo(CodeGenTypes &CGT, bool IsO32) : TargetCodeGenInfo(std::make_unique(CGT, IsO32)), SizeOfUnwindException(IsO32 ? 24 : 32) {} int getDwarfEHStackPointer(CodeGen::CodeGenModule &CGM) const override { return 29; } void setTargetAttributes(const Decl *D, llvm::GlobalValue *GV, CodeGen::CodeGenModule &CGM) const override { const FunctionDecl *FD = dyn_cast_or_null(D); if (!FD) return; llvm::Function *Fn = cast(GV); if (FD->hasAttr()) Fn->addFnAttr("long-call"); else if (FD->hasAttr()) Fn->addFnAttr("short-call"); // Other attributes do not have a meaning for declarations. if (GV->isDeclaration()) return; if (FD->hasAttr()) { Fn->addFnAttr("mips16"); } else if (FD->hasAttr()) { Fn->addFnAttr("nomips16"); } if (FD->hasAttr()) Fn->addFnAttr("micromips"); else if (FD->hasAttr()) Fn->addFnAttr("nomicromips"); const MipsInterruptAttr *Attr = FD->getAttr(); if (!Attr) return; const char *Kind; switch (Attr->getInterrupt()) { case MipsInterruptAttr::eic: Kind = "eic"; break; case MipsInterruptAttr::sw0: Kind = "sw0"; break; case MipsInterruptAttr::sw1: Kind = "sw1"; break; case MipsInterruptAttr::hw0: Kind = "hw0"; break; case MipsInterruptAttr::hw1: Kind = "hw1"; break; case MipsInterruptAttr::hw2: Kind = "hw2"; break; case MipsInterruptAttr::hw3: Kind = "hw3"; break; case MipsInterruptAttr::hw4: Kind = "hw4"; break; case MipsInterruptAttr::hw5: Kind = "hw5"; break; } Fn->addFnAttr("interrupt", Kind); } bool initDwarfEHRegSizeTable(CodeGen::CodeGenFunction &CGF, llvm::Value *Address) const override; unsigned getSizeOfUnwindException() const override { return SizeOfUnwindException; } }; } void MipsABIInfo::CoerceToIntArgs( uint64_t TySize, SmallVectorImpl &ArgList) const { llvm::IntegerType *IntTy = llvm::IntegerType::get(getVMContext(), MinABIStackAlignInBytes * 8); // Add (TySize / MinABIStackAlignInBytes) args of IntTy. for (unsigned N = TySize / (MinABIStackAlignInBytes * 8); N; --N) ArgList.push_back(IntTy); // If necessary, add one more integer type to ArgList. unsigned R = TySize % (MinABIStackAlignInBytes * 8); if (R) ArgList.push_back(llvm::IntegerType::get(getVMContext(), R)); } // In N32/64, an aligned double precision floating point field is passed in // a register. llvm::Type* MipsABIInfo::HandleAggregates(QualType Ty, uint64_t TySize) const { SmallVector ArgList, IntArgList; if (IsO32) { CoerceToIntArgs(TySize, ArgList); return llvm::StructType::get(getVMContext(), ArgList); } if (Ty->isComplexType()) return CGT.ConvertType(Ty); const RecordType *RT = Ty->getAs(); // Unions/vectors are passed in integer registers. if (!RT || !RT->isStructureOrClassType()) { CoerceToIntArgs(TySize, ArgList); return llvm::StructType::get(getVMContext(), ArgList); } const RecordDecl *RD = RT->getDecl(); const ASTRecordLayout &Layout = getContext().getASTRecordLayout(RD); assert(!(TySize % 8) && "Size of structure must be multiple of 8."); uint64_t LastOffset = 0; unsigned idx = 0; llvm::IntegerType *I64 = llvm::IntegerType::get(getVMContext(), 64); // Iterate over fields in the struct/class and check if there are any aligned // double fields. for (RecordDecl::field_iterator i = RD->field_begin(), e = RD->field_end(); i != e; ++i, ++idx) { const QualType Ty = i->getType(); const BuiltinType *BT = Ty->getAs(); if (!BT || BT->getKind() != BuiltinType::Double) continue; uint64_t Offset = Layout.getFieldOffset(idx); if (Offset % 64) // Ignore doubles that are not aligned. continue; // Add ((Offset - LastOffset) / 64) args of type i64. for (unsigned j = (Offset - LastOffset) / 64; j > 0; --j) ArgList.push_back(I64); // Add double type. ArgList.push_back(llvm::Type::getDoubleTy(getVMContext())); LastOffset = Offset + 64; } CoerceToIntArgs(TySize - LastOffset, IntArgList); ArgList.append(IntArgList.begin(), IntArgList.end()); return llvm::StructType::get(getVMContext(), ArgList); } llvm::Type *MipsABIInfo::getPaddingType(uint64_t OrigOffset, uint64_t Offset) const { if (OrigOffset + MinABIStackAlignInBytes > Offset) return nullptr; return llvm::IntegerType::get(getVMContext(), (Offset - OrigOffset) * 8); } ABIArgInfo MipsABIInfo::classifyArgumentType(QualType Ty, uint64_t &Offset) const { Ty = useFirstFieldIfTransparentUnion(Ty); uint64_t OrigOffset = Offset; uint64_t TySize = getContext().getTypeSize(Ty); uint64_t Align = getContext().getTypeAlign(Ty) / 8; Align = std::min(std::max(Align, (uint64_t)MinABIStackAlignInBytes), (uint64_t)StackAlignInBytes); unsigned CurrOffset = llvm::alignTo(Offset, Align); Offset = CurrOffset + llvm::alignTo(TySize, Align * 8) / 8; if (isAggregateTypeForABI(Ty) || Ty->isVectorType()) { // Ignore empty aggregates. if (TySize == 0) return ABIArgInfo::getIgnore(); if (CGCXXABI::RecordArgABI RAA = getRecordArgABI(Ty, getCXXABI())) { Offset = OrigOffset + MinABIStackAlignInBytes; return getNaturalAlignIndirect(Ty, RAA == CGCXXABI::RAA_DirectInMemory); } // If we have reached here, aggregates are passed directly by coercing to // another structure type. Padding is inserted if the offset of the // aggregate is unaligned. ABIArgInfo ArgInfo = ABIArgInfo::getDirect(HandleAggregates(Ty, TySize), 0, getPaddingType(OrigOffset, CurrOffset)); ArgInfo.setInReg(true); return ArgInfo; } // Treat an enum type as its underlying type. if (const EnumType *EnumTy = Ty->getAs()) Ty = EnumTy->getDecl()->getIntegerType(); // Make sure we pass indirectly things that are too large. if (const auto *EIT = Ty->getAs()) if (EIT->getNumBits() > 128 || (EIT->getNumBits() > 64 && !getContext().getTargetInfo().hasInt128Type())) return getNaturalAlignIndirect(Ty); // All integral types are promoted to the GPR width. if (Ty->isIntegralOrEnumerationType()) return extendType(Ty); return ABIArgInfo::getDirect( nullptr, 0, IsO32 ? nullptr : getPaddingType(OrigOffset, CurrOffset)); } llvm::Type* MipsABIInfo::returnAggregateInRegs(QualType RetTy, uint64_t Size) const { const RecordType *RT = RetTy->getAs(); SmallVector RTList; if (RT && RT->isStructureOrClassType()) { const RecordDecl *RD = RT->getDecl(); const ASTRecordLayout &Layout = getContext().getASTRecordLayout(RD); unsigned FieldCnt = Layout.getFieldCount(); // N32/64 returns struct/classes in floating point registers if the // following conditions are met: // 1. The size of the struct/class is no larger than 128-bit. // 2. The struct/class has one or two fields all of which are floating // point types. // 3. The offset of the first field is zero (this follows what gcc does). // // Any other composite results are returned in integer registers. // if (FieldCnt && (FieldCnt <= 2) && !Layout.getFieldOffset(0)) { RecordDecl::field_iterator b = RD->field_begin(), e = RD->field_end(); for (; b != e; ++b) { const BuiltinType *BT = b->getType()->getAs(); if (!BT || !BT->isFloatingPoint()) break; RTList.push_back(CGT.ConvertType(b->getType())); } if (b == e) return llvm::StructType::get(getVMContext(), RTList, RD->hasAttr()); RTList.clear(); } } CoerceToIntArgs(Size, RTList); return llvm::StructType::get(getVMContext(), RTList); } ABIArgInfo MipsABIInfo::classifyReturnType(QualType RetTy) const { uint64_t Size = getContext().getTypeSize(RetTy); if (RetTy->isVoidType()) return ABIArgInfo::getIgnore(); // O32 doesn't treat zero-sized structs differently from other structs. // However, N32/N64 ignores zero sized return values. if (!IsO32 && Size == 0) return ABIArgInfo::getIgnore(); if (isAggregateTypeForABI(RetTy) || RetTy->isVectorType()) { if (Size <= 128) { if (RetTy->isAnyComplexType()) return ABIArgInfo::getDirect(); // O32 returns integer vectors in registers and N32/N64 returns all small // aggregates in registers. if (!IsO32 || (RetTy->isVectorType() && !RetTy->hasFloatingRepresentation())) { ABIArgInfo ArgInfo = ABIArgInfo::getDirect(returnAggregateInRegs(RetTy, Size)); ArgInfo.setInReg(true); return ArgInfo; } } return getNaturalAlignIndirect(RetTy); } // Treat an enum type as its underlying type. if (const EnumType *EnumTy = RetTy->getAs()) RetTy = EnumTy->getDecl()->getIntegerType(); // Make sure we pass indirectly things that are too large. if (const auto *EIT = RetTy->getAs()) if (EIT->getNumBits() > 128 || (EIT->getNumBits() > 64 && !getContext().getTargetInfo().hasInt128Type())) return getNaturalAlignIndirect(RetTy); if (isPromotableIntegerTypeForABI(RetTy)) return ABIArgInfo::getExtend(RetTy); if ((RetTy->isUnsignedIntegerOrEnumerationType() || RetTy->isSignedIntegerOrEnumerationType()) && Size == 32 && !IsO32) return ABIArgInfo::getSignExtend(RetTy); return ABIArgInfo::getDirect(); } void MipsABIInfo::computeInfo(CGFunctionInfo &FI) const { ABIArgInfo &RetInfo = FI.getReturnInfo(); if (!getCXXABI().classifyReturnType(FI)) RetInfo = classifyReturnType(FI.getReturnType()); // Check if a pointer to an aggregate is passed as a hidden argument. uint64_t Offset = RetInfo.isIndirect() ? MinABIStackAlignInBytes : 0; for (auto &I : FI.arguments()) I.info = classifyArgumentType(I.type, Offset); } Address MipsABIInfo::EmitVAArg(CodeGenFunction &CGF, Address VAListAddr, QualType OrigTy) const { QualType Ty = OrigTy; // Integer arguments are promoted to 32-bit on O32 and 64-bit on N32/N64. // Pointers are also promoted in the same way but this only matters for N32. unsigned SlotSizeInBits = IsO32 ? 32 : 64; unsigned PtrWidth = getTarget().getPointerWidth(0); bool DidPromote = false; if ((Ty->isIntegerType() && getContext().getIntWidth(Ty) < SlotSizeInBits) || (Ty->isPointerType() && PtrWidth < SlotSizeInBits)) { DidPromote = true; Ty = getContext().getIntTypeForBitwidth(SlotSizeInBits, Ty->isSignedIntegerType()); } auto TyInfo = getContext().getTypeInfoInChars(Ty); // The alignment of things in the argument area is never larger than // StackAlignInBytes. TyInfo.second = std::min(TyInfo.second, CharUnits::fromQuantity(StackAlignInBytes)); // MinABIStackAlignInBytes is the size of argument slots on the stack. CharUnits ArgSlotSize = CharUnits::fromQuantity(MinABIStackAlignInBytes); Address Addr = emitVoidPtrVAArg(CGF, VAListAddr, Ty, /*indirect*/ false, TyInfo, ArgSlotSize, /*AllowHigherAlign*/ true); // If there was a promotion, "unpromote" into a temporary. // TODO: can we just use a pointer into a subset of the original slot? if (DidPromote) { Address Temp = CGF.CreateMemTemp(OrigTy, "vaarg.promotion-temp"); llvm::Value *Promoted = CGF.Builder.CreateLoad(Addr); // Truncate down to the right width. llvm::Type *IntTy = (OrigTy->isIntegerType() ? Temp.getElementType() : CGF.IntPtrTy); llvm::Value *V = CGF.Builder.CreateTrunc(Promoted, IntTy); if (OrigTy->isPointerType()) V = CGF.Builder.CreateIntToPtr(V, Temp.getElementType()); CGF.Builder.CreateStore(V, Temp); Addr = Temp; } return Addr; } ABIArgInfo MipsABIInfo::extendType(QualType Ty) const { int TySize = getContext().getTypeSize(Ty); // MIPS64 ABI requires unsigned 32 bit integers to be sign extended. if (Ty->isUnsignedIntegerOrEnumerationType() && TySize == 32) return ABIArgInfo::getSignExtend(Ty); return ABIArgInfo::getExtend(Ty); } bool MIPSTargetCodeGenInfo::initDwarfEHRegSizeTable(CodeGen::CodeGenFunction &CGF, llvm::Value *Address) const { // This information comes from gcc's implementation, which seems to // as canonical as it gets. // Everything on MIPS is 4 bytes. Double-precision FP registers // are aliased to pairs of single-precision FP registers. llvm::Value *Four8 = llvm::ConstantInt::get(CGF.Int8Ty, 4); // 0-31 are the general purpose registers, $0 - $31. // 32-63 are the floating-point registers, $f0 - $f31. // 64 and 65 are the multiply/divide registers, $hi and $lo. // 66 is the (notional, I think) register for signal-handler return. AssignToArrayRange(CGF.Builder, Address, Four8, 0, 65); // 67-74 are the floating-point status registers, $fcc0 - $fcc7. // They are one bit wide and ignored here. // 80-111 are the coprocessor 0 registers, $c0r0 - $c0r31. // (coprocessor 1 is the FP unit) // 112-143 are the coprocessor 2 registers, $c2r0 - $c2r31. // 144-175 are the coprocessor 3 registers, $c3r0 - $c3r31. // 176-181 are the DSP accumulator registers. AssignToArrayRange(CGF.Builder, Address, Four8, 80, 181); return false; } //===----------------------------------------------------------------------===// // AVR ABI Implementation. //===----------------------------------------------------------------------===// namespace { class AVRTargetCodeGenInfo : public TargetCodeGenInfo { public: AVRTargetCodeGenInfo(CodeGenTypes &CGT) : TargetCodeGenInfo(std::make_unique(CGT)) {} void setTargetAttributes(const Decl *D, llvm::GlobalValue *GV, CodeGen::CodeGenModule &CGM) const override { if (GV->isDeclaration()) return; const auto *FD = dyn_cast_or_null(D); if (!FD) return; auto *Fn = cast(GV); if (FD->getAttr()) Fn->addFnAttr("interrupt"); if (FD->getAttr()) Fn->addFnAttr("signal"); } }; } //===----------------------------------------------------------------------===// // TCE ABI Implementation (see http://tce.cs.tut.fi). Uses mostly the defaults. // Currently subclassed only to implement custom OpenCL C function attribute // handling. //===----------------------------------------------------------------------===// namespace { class TCETargetCodeGenInfo : public DefaultTargetCodeGenInfo { public: TCETargetCodeGenInfo(CodeGenTypes &CGT) : DefaultTargetCodeGenInfo(CGT) {} void setTargetAttributes(const Decl *D, llvm::GlobalValue *GV, CodeGen::CodeGenModule &M) const override; }; void TCETargetCodeGenInfo::setTargetAttributes( const Decl *D, llvm::GlobalValue *GV, CodeGen::CodeGenModule &M) const { if (GV->isDeclaration()) return; const FunctionDecl *FD = dyn_cast_or_null(D); if (!FD) return; llvm::Function *F = cast(GV); if (M.getLangOpts().OpenCL) { if (FD->hasAttr()) { // OpenCL C Kernel functions are not subject to inlining F->addFnAttr(llvm::Attribute::NoInline); const ReqdWorkGroupSizeAttr *Attr = FD->getAttr(); if (Attr) { // Convert the reqd_work_group_size() attributes to metadata. llvm::LLVMContext &Context = F->getContext(); llvm::NamedMDNode *OpenCLMetadata = M.getModule().getOrInsertNamedMetadata( "opencl.kernel_wg_size_info"); SmallVector Operands; Operands.push_back(llvm::ConstantAsMetadata::get(F)); Operands.push_back( llvm::ConstantAsMetadata::get(llvm::Constant::getIntegerValue( M.Int32Ty, llvm::APInt(32, Attr->getXDim())))); Operands.push_back( llvm::ConstantAsMetadata::get(llvm::Constant::getIntegerValue( M.Int32Ty, llvm::APInt(32, Attr->getYDim())))); Operands.push_back( llvm::ConstantAsMetadata::get(llvm::Constant::getIntegerValue( M.Int32Ty, llvm::APInt(32, Attr->getZDim())))); // Add a boolean constant operand for "required" (true) or "hint" // (false) for implementing the work_group_size_hint attr later. // Currently always true as the hint is not yet implemented. Operands.push_back( llvm::ConstantAsMetadata::get(llvm::ConstantInt::getTrue(Context))); OpenCLMetadata->addOperand(llvm::MDNode::get(Context, Operands)); } } } } } //===----------------------------------------------------------------------===// // Hexagon ABI Implementation //===----------------------------------------------------------------------===// namespace { class HexagonABIInfo : public DefaultABIInfo { public: HexagonABIInfo(CodeGenTypes &CGT) : DefaultABIInfo(CGT) {} private: ABIArgInfo classifyReturnType(QualType RetTy) const; ABIArgInfo classifyArgumentType(QualType RetTy) const; ABIArgInfo classifyArgumentType(QualType RetTy, unsigned *RegsLeft) const; void computeInfo(CGFunctionInfo &FI) const override; Address EmitVAArg(CodeGenFunction &CGF, Address VAListAddr, QualType Ty) const override; Address EmitVAArgFromMemory(CodeGenFunction &CFG, Address VAListAddr, QualType Ty) const; Address EmitVAArgForHexagon(CodeGenFunction &CFG, Address VAListAddr, QualType Ty) const; Address EmitVAArgForHexagonLinux(CodeGenFunction &CFG, Address VAListAddr, QualType Ty) const; }; class HexagonTargetCodeGenInfo : public TargetCodeGenInfo { public: HexagonTargetCodeGenInfo(CodeGenTypes &CGT) : TargetCodeGenInfo(std::make_unique(CGT)) {} int getDwarfEHStackPointer(CodeGen::CodeGenModule &M) const override { return 29; } void setTargetAttributes(const Decl *D, llvm::GlobalValue *GV, CodeGen::CodeGenModule &GCM) const override { if (GV->isDeclaration()) return; const FunctionDecl *FD = dyn_cast_or_null(D); if (!FD) return; } }; } // namespace void HexagonABIInfo::computeInfo(CGFunctionInfo &FI) const { unsigned RegsLeft = 6; if (!getCXXABI().classifyReturnType(FI)) FI.getReturnInfo() = classifyReturnType(FI.getReturnType()); for (auto &I : FI.arguments()) I.info = classifyArgumentType(I.type, &RegsLeft); } static bool HexagonAdjustRegsLeft(uint64_t Size, unsigned *RegsLeft) { assert(Size <= 64 && "Not expecting to pass arguments larger than 64 bits" " through registers"); if (*RegsLeft == 0) return false; if (Size <= 32) { (*RegsLeft)--; return true; } if (2 <= (*RegsLeft & (~1U))) { *RegsLeft = (*RegsLeft & (~1U)) - 2; return true; } // Next available register was r5 but candidate was greater than 32-bits so it // has to go on the stack. However we still consume r5 if (*RegsLeft == 1) *RegsLeft = 0; return false; } ABIArgInfo HexagonABIInfo::classifyArgumentType(QualType Ty, unsigned *RegsLeft) const { if (!isAggregateTypeForABI(Ty)) { // Treat an enum type as its underlying type. if (const EnumType *EnumTy = Ty->getAs()) Ty = EnumTy->getDecl()->getIntegerType(); uint64_t Size = getContext().getTypeSize(Ty); if (Size <= 64) HexagonAdjustRegsLeft(Size, RegsLeft); if (Size > 64 && Ty->isExtIntType()) return getNaturalAlignIndirect(Ty, /*ByVal=*/true); return isPromotableIntegerTypeForABI(Ty) ? ABIArgInfo::getExtend(Ty) : ABIArgInfo::getDirect(); } if (CGCXXABI::RecordArgABI RAA = getRecordArgABI(Ty, getCXXABI())) return getNaturalAlignIndirect(Ty, RAA == CGCXXABI::RAA_DirectInMemory); // Ignore empty records. if (isEmptyRecord(getContext(), Ty, true)) return ABIArgInfo::getIgnore(); uint64_t Size = getContext().getTypeSize(Ty); unsigned Align = getContext().getTypeAlign(Ty); if (Size > 64) return getNaturalAlignIndirect(Ty, /*ByVal=*/true); if (HexagonAdjustRegsLeft(Size, RegsLeft)) Align = Size <= 32 ? 32 : 64; if (Size <= Align) { // Pass in the smallest viable integer type. if (!llvm::isPowerOf2_64(Size)) Size = llvm::NextPowerOf2(Size); return ABIArgInfo::getDirect(llvm::Type::getIntNTy(getVMContext(), Size)); } return DefaultABIInfo::classifyArgumentType(Ty); } ABIArgInfo HexagonABIInfo::classifyReturnType(QualType RetTy) const { if (RetTy->isVoidType()) return ABIArgInfo::getIgnore(); const TargetInfo &T = CGT.getTarget(); uint64_t Size = getContext().getTypeSize(RetTy); if (RetTy->getAs()) { // HVX vectors are returned in vector registers or register pairs. if (T.hasFeature("hvx")) { assert(T.hasFeature("hvx-length64b") || T.hasFeature("hvx-length128b")); uint64_t VecSize = T.hasFeature("hvx-length64b") ? 64*8 : 128*8; if (Size == VecSize || Size == 2*VecSize) return ABIArgInfo::getDirectInReg(); } // Large vector types should be returned via memory. if (Size > 64) return getNaturalAlignIndirect(RetTy); } if (!isAggregateTypeForABI(RetTy)) { // Treat an enum type as its underlying type. if (const EnumType *EnumTy = RetTy->getAs()) RetTy = EnumTy->getDecl()->getIntegerType(); if (Size > 64 && RetTy->isExtIntType()) return getNaturalAlignIndirect(RetTy, /*ByVal=*/false); return isPromotableIntegerTypeForABI(RetTy) ? ABIArgInfo::getExtend(RetTy) : ABIArgInfo::getDirect(); } if (isEmptyRecord(getContext(), RetTy, true)) return ABIArgInfo::getIgnore(); // Aggregates <= 8 bytes are returned in registers, other aggregates // are returned indirectly. if (Size <= 64) { // Return in the smallest viable integer type. if (!llvm::isPowerOf2_64(Size)) Size = llvm::NextPowerOf2(Size); return ABIArgInfo::getDirect(llvm::Type::getIntNTy(getVMContext(), Size)); } return getNaturalAlignIndirect(RetTy, /*ByVal=*/true); } Address HexagonABIInfo::EmitVAArgFromMemory(CodeGenFunction &CGF, Address VAListAddr, QualType Ty) const { // Load the overflow area pointer. Address __overflow_area_pointer_p = CGF.Builder.CreateStructGEP(VAListAddr, 2, "__overflow_area_pointer_p"); llvm::Value *__overflow_area_pointer = CGF.Builder.CreateLoad( __overflow_area_pointer_p, "__overflow_area_pointer"); uint64_t Align = CGF.getContext().getTypeAlign(Ty) / 8; if (Align > 4) { // Alignment should be a power of 2. assert((Align & (Align - 1)) == 0 && "Alignment is not power of 2!"); // overflow_arg_area = (overflow_arg_area + align - 1) & -align; llvm::Value *Offset = llvm::ConstantInt::get(CGF.Int64Ty, Align - 1); // Add offset to the current pointer to access the argument. __overflow_area_pointer = CGF.Builder.CreateGEP(__overflow_area_pointer, Offset); llvm::Value *AsInt = CGF.Builder.CreatePtrToInt(__overflow_area_pointer, CGF.Int32Ty); // Create a mask which should be "AND"ed // with (overflow_arg_area + align - 1) llvm::Value *Mask = llvm::ConstantInt::get(CGF.Int32Ty, -(int)Align); __overflow_area_pointer = CGF.Builder.CreateIntToPtr( CGF.Builder.CreateAnd(AsInt, Mask), __overflow_area_pointer->getType(), "__overflow_area_pointer.align"); } // Get the type of the argument from memory and bitcast // overflow area pointer to the argument type. llvm::Type *PTy = CGF.ConvertTypeForMem(Ty); Address AddrTyped = CGF.Builder.CreateBitCast( Address(__overflow_area_pointer, CharUnits::fromQuantity(Align)), llvm::PointerType::getUnqual(PTy)); // Round up to the minimum stack alignment for varargs which is 4 bytes. uint64_t Offset = llvm::alignTo(CGF.getContext().getTypeSize(Ty) / 8, 4); __overflow_area_pointer = CGF.Builder.CreateGEP( __overflow_area_pointer, llvm::ConstantInt::get(CGF.Int32Ty, Offset), "__overflow_area_pointer.next"); CGF.Builder.CreateStore(__overflow_area_pointer, __overflow_area_pointer_p); return AddrTyped; } Address HexagonABIInfo::EmitVAArgForHexagon(CodeGenFunction &CGF, Address VAListAddr, QualType Ty) const { // FIXME: Need to handle alignment llvm::Type *BP = CGF.Int8PtrTy; llvm::Type *BPP = CGF.Int8PtrPtrTy; CGBuilderTy &Builder = CGF.Builder; Address VAListAddrAsBPP = Builder.CreateBitCast(VAListAddr, BPP, "ap"); llvm::Value *Addr = Builder.CreateLoad(VAListAddrAsBPP, "ap.cur"); // Handle address alignment for type alignment > 32 bits uint64_t TyAlign = CGF.getContext().getTypeAlign(Ty) / 8; if (TyAlign > 4) { assert((TyAlign & (TyAlign - 1)) == 0 && "Alignment is not power of 2!"); llvm::Value *AddrAsInt = Builder.CreatePtrToInt(Addr, CGF.Int32Ty); AddrAsInt = Builder.CreateAdd(AddrAsInt, Builder.getInt32(TyAlign - 1)); AddrAsInt = Builder.CreateAnd(AddrAsInt, Builder.getInt32(~(TyAlign - 1))); Addr = Builder.CreateIntToPtr(AddrAsInt, BP); } llvm::Type *PTy = llvm::PointerType::getUnqual(CGF.ConvertType(Ty)); Address AddrTyped = Builder.CreateBitCast( Address(Addr, CharUnits::fromQuantity(TyAlign)), PTy); uint64_t Offset = llvm::alignTo(CGF.getContext().getTypeSize(Ty) / 8, 4); llvm::Value *NextAddr = Builder.CreateGEP( Addr, llvm::ConstantInt::get(CGF.Int32Ty, Offset), "ap.next"); Builder.CreateStore(NextAddr, VAListAddrAsBPP); return AddrTyped; } Address HexagonABIInfo::EmitVAArgForHexagonLinux(CodeGenFunction &CGF, Address VAListAddr, QualType Ty) const { int ArgSize = CGF.getContext().getTypeSize(Ty) / 8; if (ArgSize > 8) return EmitVAArgFromMemory(CGF, VAListAddr, Ty); // Here we have check if the argument is in register area or // in overflow area. // If the saved register area pointer + argsize rounded up to alignment > // saved register area end pointer, argument is in overflow area. unsigned RegsLeft = 6; Ty = CGF.getContext().getCanonicalType(Ty); (void)classifyArgumentType(Ty, &RegsLeft); llvm::BasicBlock *MaybeRegBlock = CGF.createBasicBlock("vaarg.maybe_reg"); llvm::BasicBlock *InRegBlock = CGF.createBasicBlock("vaarg.in_reg"); llvm::BasicBlock *OnStackBlock = CGF.createBasicBlock("vaarg.on_stack"); llvm::BasicBlock *ContBlock = CGF.createBasicBlock("vaarg.end"); // Get rounded size of the argument.GCC does not allow vararg of // size < 4 bytes. We follow the same logic here. ArgSize = (CGF.getContext().getTypeSize(Ty) <= 32) ? 4 : 8; int ArgAlign = (CGF.getContext().getTypeSize(Ty) <= 32) ? 4 : 8; // Argument may be in saved register area CGF.EmitBlock(MaybeRegBlock); // Load the current saved register area pointer. Address __current_saved_reg_area_pointer_p = CGF.Builder.CreateStructGEP( VAListAddr, 0, "__current_saved_reg_area_pointer_p"); llvm::Value *__current_saved_reg_area_pointer = CGF.Builder.CreateLoad( __current_saved_reg_area_pointer_p, "__current_saved_reg_area_pointer"); // Load the saved register area end pointer. Address __saved_reg_area_end_pointer_p = CGF.Builder.CreateStructGEP( VAListAddr, 1, "__saved_reg_area_end_pointer_p"); llvm::Value *__saved_reg_area_end_pointer = CGF.Builder.CreateLoad( __saved_reg_area_end_pointer_p, "__saved_reg_area_end_pointer"); // If the size of argument is > 4 bytes, check if the stack // location is aligned to 8 bytes if (ArgAlign > 4) { llvm::Value *__current_saved_reg_area_pointer_int = CGF.Builder.CreatePtrToInt(__current_saved_reg_area_pointer, CGF.Int32Ty); __current_saved_reg_area_pointer_int = CGF.Builder.CreateAdd( __current_saved_reg_area_pointer_int, llvm::ConstantInt::get(CGF.Int32Ty, (ArgAlign - 1)), "align_current_saved_reg_area_pointer"); __current_saved_reg_area_pointer_int = CGF.Builder.CreateAnd(__current_saved_reg_area_pointer_int, llvm::ConstantInt::get(CGF.Int32Ty, -ArgAlign), "align_current_saved_reg_area_pointer"); __current_saved_reg_area_pointer = CGF.Builder.CreateIntToPtr(__current_saved_reg_area_pointer_int, __current_saved_reg_area_pointer->getType(), "align_current_saved_reg_area_pointer"); } llvm::Value *__new_saved_reg_area_pointer = CGF.Builder.CreateGEP(__current_saved_reg_area_pointer, llvm::ConstantInt::get(CGF.Int32Ty, ArgSize), "__new_saved_reg_area_pointer"); llvm::Value *UsingStack = 0; UsingStack = CGF.Builder.CreateICmpSGT(__new_saved_reg_area_pointer, __saved_reg_area_end_pointer); CGF.Builder.CreateCondBr(UsingStack, OnStackBlock, InRegBlock); // Argument in saved register area // Implement the block where argument is in register saved area CGF.EmitBlock(InRegBlock); llvm::Type *PTy = CGF.ConvertType(Ty); llvm::Value *__saved_reg_area_p = CGF.Builder.CreateBitCast( __current_saved_reg_area_pointer, llvm::PointerType::getUnqual(PTy)); CGF.Builder.CreateStore(__new_saved_reg_area_pointer, __current_saved_reg_area_pointer_p); CGF.EmitBranch(ContBlock); // Argument in overflow area // Implement the block where the argument is in overflow area. CGF.EmitBlock(OnStackBlock); // Load the overflow area pointer Address __overflow_area_pointer_p = CGF.Builder.CreateStructGEP(VAListAddr, 2, "__overflow_area_pointer_p"); llvm::Value *__overflow_area_pointer = CGF.Builder.CreateLoad( __overflow_area_pointer_p, "__overflow_area_pointer"); // Align the overflow area pointer according to the alignment of the argument if (ArgAlign > 4) { llvm::Value *__overflow_area_pointer_int = CGF.Builder.CreatePtrToInt(__overflow_area_pointer, CGF.Int32Ty); __overflow_area_pointer_int = CGF.Builder.CreateAdd(__overflow_area_pointer_int, llvm::ConstantInt::get(CGF.Int32Ty, ArgAlign - 1), "align_overflow_area_pointer"); __overflow_area_pointer_int = CGF.Builder.CreateAnd(__overflow_area_pointer_int, llvm::ConstantInt::get(CGF.Int32Ty, -ArgAlign), "align_overflow_area_pointer"); __overflow_area_pointer = CGF.Builder.CreateIntToPtr( __overflow_area_pointer_int, __overflow_area_pointer->getType(), "align_overflow_area_pointer"); } // Get the pointer for next argument in overflow area and store it // to overflow area pointer. llvm::Value *__new_overflow_area_pointer = CGF.Builder.CreateGEP( __overflow_area_pointer, llvm::ConstantInt::get(CGF.Int32Ty, ArgSize), "__overflow_area_pointer.next"); CGF.Builder.CreateStore(__new_overflow_area_pointer, __overflow_area_pointer_p); CGF.Builder.CreateStore(__new_overflow_area_pointer, __current_saved_reg_area_pointer_p); // Bitcast the overflow area pointer to the type of argument. llvm::Type *OverflowPTy = CGF.ConvertTypeForMem(Ty); llvm::Value *__overflow_area_p = CGF.Builder.CreateBitCast( __overflow_area_pointer, llvm::PointerType::getUnqual(OverflowPTy)); CGF.EmitBranch(ContBlock); // Get the correct pointer to load the variable argument // Implement the ContBlock CGF.EmitBlock(ContBlock); llvm::Type *MemPTy = llvm::PointerType::getUnqual(CGF.ConvertTypeForMem(Ty)); llvm::PHINode *ArgAddr = CGF.Builder.CreatePHI(MemPTy, 2, "vaarg.addr"); ArgAddr->addIncoming(__saved_reg_area_p, InRegBlock); ArgAddr->addIncoming(__overflow_area_p, OnStackBlock); return Address(ArgAddr, CharUnits::fromQuantity(ArgAlign)); } Address HexagonABIInfo::EmitVAArg(CodeGenFunction &CGF, Address VAListAddr, QualType Ty) const { if (getTarget().getTriple().isMusl()) return EmitVAArgForHexagonLinux(CGF, VAListAddr, Ty); return EmitVAArgForHexagon(CGF, VAListAddr, Ty); } //===----------------------------------------------------------------------===// // Lanai ABI Implementation //===----------------------------------------------------------------------===// namespace { class LanaiABIInfo : public DefaultABIInfo { public: LanaiABIInfo(CodeGen::CodeGenTypes &CGT) : DefaultABIInfo(CGT) {} bool shouldUseInReg(QualType Ty, CCState &State) const; void computeInfo(CGFunctionInfo &FI) const override { CCState State(FI); // Lanai uses 4 registers to pass arguments unless the function has the // regparm attribute set. if (FI.getHasRegParm()) { State.FreeRegs = FI.getRegParm(); } else { State.FreeRegs = 4; } if (!getCXXABI().classifyReturnType(FI)) FI.getReturnInfo() = classifyReturnType(FI.getReturnType()); for (auto &I : FI.arguments()) I.info = classifyArgumentType(I.type, State); } ABIArgInfo getIndirectResult(QualType Ty, bool ByVal, CCState &State) const; ABIArgInfo classifyArgumentType(QualType RetTy, CCState &State) const; }; } // end anonymous namespace bool LanaiABIInfo::shouldUseInReg(QualType Ty, CCState &State) const { unsigned Size = getContext().getTypeSize(Ty); unsigned SizeInRegs = llvm::alignTo(Size, 32U) / 32U; if (SizeInRegs == 0) return false; if (SizeInRegs > State.FreeRegs) { State.FreeRegs = 0; return false; } State.FreeRegs -= SizeInRegs; return true; } ABIArgInfo LanaiABIInfo::getIndirectResult(QualType Ty, bool ByVal, CCState &State) const { if (!ByVal) { if (State.FreeRegs) { --State.FreeRegs; // Non-byval indirects just use one pointer. return getNaturalAlignIndirectInReg(Ty); } return getNaturalAlignIndirect(Ty, false); } // Compute the byval alignment. const unsigned MinABIStackAlignInBytes = 4; unsigned TypeAlign = getContext().getTypeAlign(Ty) / 8; return ABIArgInfo::getIndirect(CharUnits::fromQuantity(4), /*ByVal=*/true, /*Realign=*/TypeAlign > MinABIStackAlignInBytes); } ABIArgInfo LanaiABIInfo::classifyArgumentType(QualType Ty, CCState &State) const { // Check with the C++ ABI first. const RecordType *RT = Ty->getAs(); if (RT) { CGCXXABI::RecordArgABI RAA = getRecordArgABI(RT, getCXXABI()); if (RAA == CGCXXABI::RAA_Indirect) { return getIndirectResult(Ty, /*ByVal=*/false, State); } else if (RAA == CGCXXABI::RAA_DirectInMemory) { return getNaturalAlignIndirect(Ty, /*ByVal=*/true); } } if (isAggregateTypeForABI(Ty)) { // Structures with flexible arrays are always indirect. if (RT && RT->getDecl()->hasFlexibleArrayMember()) return getIndirectResult(Ty, /*ByVal=*/true, State); // Ignore empty structs/unions. if (isEmptyRecord(getContext(), Ty, true)) return ABIArgInfo::getIgnore(); llvm::LLVMContext &LLVMContext = getVMContext(); unsigned SizeInRegs = (getContext().getTypeSize(Ty) + 31) / 32; if (SizeInRegs <= State.FreeRegs) { llvm::IntegerType *Int32 = llvm::Type::getInt32Ty(LLVMContext); SmallVector Elements(SizeInRegs, Int32); llvm::Type *Result = llvm::StructType::get(LLVMContext, Elements); State.FreeRegs -= SizeInRegs; return ABIArgInfo::getDirectInReg(Result); } else { State.FreeRegs = 0; } return getIndirectResult(Ty, true, State); } // Treat an enum type as its underlying type. if (const auto *EnumTy = Ty->getAs()) Ty = EnumTy->getDecl()->getIntegerType(); bool InReg = shouldUseInReg(Ty, State); // Don't pass >64 bit integers in registers. if (const auto *EIT = Ty->getAs()) if (EIT->getNumBits() > 64) return getIndirectResult(Ty, /*ByVal=*/true, State); if (isPromotableIntegerTypeForABI(Ty)) { if (InReg) return ABIArgInfo::getDirectInReg(); return ABIArgInfo::getExtend(Ty); } if (InReg) return ABIArgInfo::getDirectInReg(); return ABIArgInfo::getDirect(); } namespace { class LanaiTargetCodeGenInfo : public TargetCodeGenInfo { public: LanaiTargetCodeGenInfo(CodeGen::CodeGenTypes &CGT) : TargetCodeGenInfo(std::make_unique(CGT)) {} }; } //===----------------------------------------------------------------------===// // AMDGPU ABI Implementation //===----------------------------------------------------------------------===// namespace { class AMDGPUABIInfo final : public DefaultABIInfo { private: static const unsigned MaxNumRegsForArgsRet = 16; unsigned numRegsForType(QualType Ty) const; bool isHomogeneousAggregateBaseType(QualType Ty) const override; bool isHomogeneousAggregateSmallEnough(const Type *Base, uint64_t Members) const override; // Coerce HIP pointer arguments from generic pointers to global ones. llvm::Type *coerceKernelArgumentType(llvm::Type *Ty, unsigned FromAS, unsigned ToAS) const { // Structure types. if (auto STy = dyn_cast(Ty)) { SmallVector EltTys; bool Changed = false; for (auto T : STy->elements()) { auto NT = coerceKernelArgumentType(T, FromAS, ToAS); EltTys.push_back(NT); Changed |= (NT != T); } // Skip if there is no change in element types. if (!Changed) return STy; if (STy->hasName()) return llvm::StructType::create( EltTys, (STy->getName() + ".coerce").str(), STy->isPacked()); return llvm::StructType::get(getVMContext(), EltTys, STy->isPacked()); } // Array types. if (auto ATy = dyn_cast(Ty)) { auto T = ATy->getElementType(); auto NT = coerceKernelArgumentType(T, FromAS, ToAS); // Skip if there is no change in that element type. if (NT == T) return ATy; return llvm::ArrayType::get(NT, ATy->getNumElements()); } // Single value types. if (Ty->isPointerTy() && Ty->getPointerAddressSpace() == FromAS) return llvm::PointerType::get( cast(Ty)->getElementType(), ToAS); return Ty; } public: explicit AMDGPUABIInfo(CodeGen::CodeGenTypes &CGT) : DefaultABIInfo(CGT) {} ABIArgInfo classifyReturnType(QualType RetTy) const; ABIArgInfo classifyKernelArgumentType(QualType Ty) const; ABIArgInfo classifyArgumentType(QualType Ty, unsigned &NumRegsLeft) const; void computeInfo(CGFunctionInfo &FI) const override; Address EmitVAArg(CodeGenFunction &CGF, Address VAListAddr, QualType Ty) const override; }; bool AMDGPUABIInfo::isHomogeneousAggregateBaseType(QualType Ty) const { return true; } bool AMDGPUABIInfo::isHomogeneousAggregateSmallEnough( const Type *Base, uint64_t Members) const { uint32_t NumRegs = (getContext().getTypeSize(Base) + 31) / 32; // Homogeneous Aggregates may occupy at most 16 registers. return Members * NumRegs <= MaxNumRegsForArgsRet; } /// Estimate number of registers the type will use when passed in registers. unsigned AMDGPUABIInfo::numRegsForType(QualType Ty) const { unsigned NumRegs = 0; if (const VectorType *VT = Ty->getAs()) { // Compute from the number of elements. The reported size is based on the // in-memory size, which includes the padding 4th element for 3-vectors. QualType EltTy = VT->getElementType(); unsigned EltSize = getContext().getTypeSize(EltTy); // 16-bit element vectors should be passed as packed. if (EltSize == 16) return (VT->getNumElements() + 1) / 2; unsigned EltNumRegs = (EltSize + 31) / 32; return EltNumRegs * VT->getNumElements(); } if (const RecordType *RT = Ty->getAs()) { const RecordDecl *RD = RT->getDecl(); assert(!RD->hasFlexibleArrayMember()); for (const FieldDecl *Field : RD->fields()) { QualType FieldTy = Field->getType(); NumRegs += numRegsForType(FieldTy); } return NumRegs; } return (getContext().getTypeSize(Ty) + 31) / 32; } void AMDGPUABIInfo::computeInfo(CGFunctionInfo &FI) const { llvm::CallingConv::ID CC = FI.getCallingConvention(); if (!getCXXABI().classifyReturnType(FI)) FI.getReturnInfo() = classifyReturnType(FI.getReturnType()); unsigned NumRegsLeft = MaxNumRegsForArgsRet; for (auto &Arg : FI.arguments()) { if (CC == llvm::CallingConv::AMDGPU_KERNEL) { Arg.info = classifyKernelArgumentType(Arg.type); } else { Arg.info = classifyArgumentType(Arg.type, NumRegsLeft); } } } Address AMDGPUABIInfo::EmitVAArg(CodeGenFunction &CGF, Address VAListAddr, QualType Ty) const { llvm_unreachable("AMDGPU does not support varargs"); } ABIArgInfo AMDGPUABIInfo::classifyReturnType(QualType RetTy) const { if (isAggregateTypeForABI(RetTy)) { // Records with non-trivial destructors/copy-constructors should not be // returned by value. if (!getRecordArgABI(RetTy, getCXXABI())) { // Ignore empty structs/unions. if (isEmptyRecord(getContext(), RetTy, true)) return ABIArgInfo::getIgnore(); // Lower single-element structs to just return a regular value. if (const Type *SeltTy = isSingleElementStruct(RetTy, getContext())) return ABIArgInfo::getDirect(CGT.ConvertType(QualType(SeltTy, 0))); if (const RecordType *RT = RetTy->getAs()) { const RecordDecl *RD = RT->getDecl(); if (RD->hasFlexibleArrayMember()) return DefaultABIInfo::classifyReturnType(RetTy); } // Pack aggregates <= 4 bytes into single VGPR or pair. uint64_t Size = getContext().getTypeSize(RetTy); if (Size <= 16) return ABIArgInfo::getDirect(llvm::Type::getInt16Ty(getVMContext())); if (Size <= 32) return ABIArgInfo::getDirect(llvm::Type::getInt32Ty(getVMContext())); if (Size <= 64) { llvm::Type *I32Ty = llvm::Type::getInt32Ty(getVMContext()); return ABIArgInfo::getDirect(llvm::ArrayType::get(I32Ty, 2)); } if (numRegsForType(RetTy) <= MaxNumRegsForArgsRet) return ABIArgInfo::getDirect(); } } // Otherwise just do the default thing. return DefaultABIInfo::classifyReturnType(RetTy); } /// For kernels all parameters are really passed in a special buffer. It doesn't /// make sense to pass anything byval, so everything must be direct. ABIArgInfo AMDGPUABIInfo::classifyKernelArgumentType(QualType Ty) const { Ty = useFirstFieldIfTransparentUnion(Ty); // TODO: Can we omit empty structs? if (const Type *SeltTy = isSingleElementStruct(Ty, getContext())) Ty = QualType(SeltTy, 0); llvm::Type *OrigLTy = CGT.ConvertType(Ty); llvm::Type *LTy = OrigLTy; if (getContext().getLangOpts().HIP) { LTy = coerceKernelArgumentType( OrigLTy, /*FromAS=*/getContext().getTargetAddressSpace(LangAS::Default), /*ToAS=*/getContext().getTargetAddressSpace(LangAS::cuda_device)); } // FIXME: Should also use this for OpenCL, but it requires addressing the // problem of kernels being called. // // FIXME: This doesn't apply the optimization of coercing pointers in structs // to global address space when using byref. This would require implementing a // new kind of coercion of the in-memory type when for indirect arguments. if (!getContext().getLangOpts().OpenCL && LTy == OrigLTy && isAggregateTypeForABI(Ty)) { return ABIArgInfo::getIndirectAliased( getContext().getTypeAlignInChars(Ty), getContext().getTargetAddressSpace(LangAS::opencl_constant), false /*Realign*/, nullptr /*Padding*/); } // If we set CanBeFlattened to true, CodeGen will expand the struct to its // individual elements, which confuses the Clover OpenCL backend; therefore we // have to set it to false here. Other args of getDirect() are just defaults. return ABIArgInfo::getDirect(LTy, 0, nullptr, false); } ABIArgInfo AMDGPUABIInfo::classifyArgumentType(QualType Ty, unsigned &NumRegsLeft) const { assert(NumRegsLeft <= MaxNumRegsForArgsRet && "register estimate underflow"); Ty = useFirstFieldIfTransparentUnion(Ty); if (isAggregateTypeForABI(Ty)) { // Records with non-trivial destructors/copy-constructors should not be // passed by value. if (auto RAA = getRecordArgABI(Ty, getCXXABI())) return getNaturalAlignIndirect(Ty, RAA == CGCXXABI::RAA_DirectInMemory); // Ignore empty structs/unions. if (isEmptyRecord(getContext(), Ty, true)) return ABIArgInfo::getIgnore(); // Lower single-element structs to just pass a regular value. TODO: We // could do reasonable-size multiple-element structs too, using getExpand(), // though watch out for things like bitfields. if (const Type *SeltTy = isSingleElementStruct(Ty, getContext())) return ABIArgInfo::getDirect(CGT.ConvertType(QualType(SeltTy, 0))); if (const RecordType *RT = Ty->getAs()) { const RecordDecl *RD = RT->getDecl(); if (RD->hasFlexibleArrayMember()) return DefaultABIInfo::classifyArgumentType(Ty); } // Pack aggregates <= 8 bytes into single VGPR or pair. uint64_t Size = getContext().getTypeSize(Ty); if (Size <= 64) { unsigned NumRegs = (Size + 31) / 32; NumRegsLeft -= std::min(NumRegsLeft, NumRegs); if (Size <= 16) return ABIArgInfo::getDirect(llvm::Type::getInt16Ty(getVMContext())); if (Size <= 32) return ABIArgInfo::getDirect(llvm::Type::getInt32Ty(getVMContext())); // XXX: Should this be i64 instead, and should the limit increase? llvm::Type *I32Ty = llvm::Type::getInt32Ty(getVMContext()); return ABIArgInfo::getDirect(llvm::ArrayType::get(I32Ty, 2)); } if (NumRegsLeft > 0) { unsigned NumRegs = numRegsForType(Ty); if (NumRegsLeft >= NumRegs) { NumRegsLeft -= NumRegs; return ABIArgInfo::getDirect(); } } } // Otherwise just do the default thing. ABIArgInfo ArgInfo = DefaultABIInfo::classifyArgumentType(Ty); if (!ArgInfo.isIndirect()) { unsigned NumRegs = numRegsForType(Ty); NumRegsLeft -= std::min(NumRegs, NumRegsLeft); } return ArgInfo; } class AMDGPUTargetCodeGenInfo : public TargetCodeGenInfo { public: AMDGPUTargetCodeGenInfo(CodeGenTypes &CGT) : TargetCodeGenInfo(std::make_unique(CGT)) {} void setTargetAttributes(const Decl *D, llvm::GlobalValue *GV, CodeGen::CodeGenModule &M) const override; unsigned getOpenCLKernelCallingConv() const override; llvm::Constant *getNullPointer(const CodeGen::CodeGenModule &CGM, llvm::PointerType *T, QualType QT) const override; LangAS getASTAllocaAddressSpace() const override { return getLangASFromTargetAS( getABIInfo().getDataLayout().getAllocaAddrSpace()); } LangAS getGlobalVarAddressSpace(CodeGenModule &CGM, const VarDecl *D) const override; llvm::SyncScope::ID getLLVMSyncScopeID(const LangOptions &LangOpts, SyncScope Scope, llvm::AtomicOrdering Ordering, llvm::LLVMContext &Ctx) const override; llvm::Function * createEnqueuedBlockKernel(CodeGenFunction &CGF, llvm::Function *BlockInvokeFunc, llvm::Value *BlockLiteral) const override; bool shouldEmitStaticExternCAliases() const override; void setCUDAKernelCallingConvention(const FunctionType *&FT) const override; }; } static bool requiresAMDGPUProtectedVisibility(const Decl *D, llvm::GlobalValue *GV) { if (GV->getVisibility() != llvm::GlobalValue::HiddenVisibility) return false; return D->hasAttr() || (isa(D) && D->hasAttr()) || (isa(D) && (D->hasAttr() || D->hasAttr() || cast(D)->getType()->isCUDADeviceBuiltinSurfaceType() || cast(D)->getType()->isCUDADeviceBuiltinTextureType())); } void AMDGPUTargetCodeGenInfo::setTargetAttributes( const Decl *D, llvm::GlobalValue *GV, CodeGen::CodeGenModule &M) const { if (requiresAMDGPUProtectedVisibility(D, GV)) { GV->setVisibility(llvm::GlobalValue::ProtectedVisibility); GV->setDSOLocal(true); } if (GV->isDeclaration()) return; const FunctionDecl *FD = dyn_cast_or_null(D); if (!FD) return; llvm::Function *F = cast(GV); const auto *ReqdWGS = M.getLangOpts().OpenCL ? FD->getAttr() : nullptr; const bool IsOpenCLKernel = M.getLangOpts().OpenCL && FD->hasAttr(); const bool IsHIPKernel = M.getLangOpts().HIP && FD->hasAttr(); if ((IsOpenCLKernel || IsHIPKernel) && (M.getTriple().getOS() == llvm::Triple::AMDHSA)) F->addFnAttr("amdgpu-implicitarg-num-bytes", "56"); if (IsHIPKernel) F->addFnAttr("uniform-work-group-size", "true"); const auto *FlatWGS = FD->getAttr(); if (ReqdWGS || FlatWGS) { unsigned Min = 0; unsigned Max = 0; if (FlatWGS) { Min = FlatWGS->getMin() ->EvaluateKnownConstInt(M.getContext()) .getExtValue(); Max = FlatWGS->getMax() ->EvaluateKnownConstInt(M.getContext()) .getExtValue(); } if (ReqdWGS && Min == 0 && Max == 0) Min = Max = ReqdWGS->getXDim() * ReqdWGS->getYDim() * ReqdWGS->getZDim(); if (Min != 0) { assert(Min <= Max && "Min must be less than or equal Max"); std::string AttrVal = llvm::utostr(Min) + "," + llvm::utostr(Max); F->addFnAttr("amdgpu-flat-work-group-size", AttrVal); } else assert(Max == 0 && "Max must be zero"); } else if (IsOpenCLKernel || IsHIPKernel) { // By default, restrict the maximum size to a value specified by // --gpu-max-threads-per-block=n or its default value. std::string AttrVal = std::string("1,") + llvm::utostr(M.getLangOpts().GPUMaxThreadsPerBlock); F->addFnAttr("amdgpu-flat-work-group-size", AttrVal); } if (const auto *Attr = FD->getAttr()) { unsigned Min = Attr->getMin()->EvaluateKnownConstInt(M.getContext()).getExtValue(); unsigned Max = Attr->getMax() ? Attr->getMax() ->EvaluateKnownConstInt(M.getContext()) .getExtValue() : 0; if (Min != 0) { assert((Max == 0 || Min <= Max) && "Min must be less than or equal Max"); std::string AttrVal = llvm::utostr(Min); if (Max != 0) AttrVal = AttrVal + "," + llvm::utostr(Max); F->addFnAttr("amdgpu-waves-per-eu", AttrVal); } else assert(Max == 0 && "Max must be zero"); } if (const auto *Attr = FD->getAttr()) { unsigned NumSGPR = Attr->getNumSGPR(); if (NumSGPR != 0) F->addFnAttr("amdgpu-num-sgpr", llvm::utostr(NumSGPR)); } if (const auto *Attr = FD->getAttr()) { uint32_t NumVGPR = Attr->getNumVGPR(); if (NumVGPR != 0) F->addFnAttr("amdgpu-num-vgpr", llvm::utostr(NumVGPR)); } } unsigned AMDGPUTargetCodeGenInfo::getOpenCLKernelCallingConv() const { return llvm::CallingConv::AMDGPU_KERNEL; } // Currently LLVM assumes null pointers always have value 0, // which results in incorrectly transformed IR. Therefore, instead of // emitting null pointers in private and local address spaces, a null // pointer in generic address space is emitted which is casted to a // pointer in local or private address space. llvm::Constant *AMDGPUTargetCodeGenInfo::getNullPointer( const CodeGen::CodeGenModule &CGM, llvm::PointerType *PT, QualType QT) const { if (CGM.getContext().getTargetNullPointerValue(QT) == 0) return llvm::ConstantPointerNull::get(PT); auto &Ctx = CGM.getContext(); auto NPT = llvm::PointerType::get(PT->getElementType(), Ctx.getTargetAddressSpace(LangAS::opencl_generic)); return llvm::ConstantExpr::getAddrSpaceCast( llvm::ConstantPointerNull::get(NPT), PT); } LangAS AMDGPUTargetCodeGenInfo::getGlobalVarAddressSpace(CodeGenModule &CGM, const VarDecl *D) const { assert(!CGM.getLangOpts().OpenCL && !(CGM.getLangOpts().CUDA && CGM.getLangOpts().CUDAIsDevice) && "Address space agnostic languages only"); LangAS DefaultGlobalAS = getLangASFromTargetAS( CGM.getContext().getTargetAddressSpace(LangAS::opencl_global)); if (!D) return DefaultGlobalAS; LangAS AddrSpace = D->getType().getAddressSpace(); assert(AddrSpace == LangAS::Default || isTargetAddressSpace(AddrSpace)); if (AddrSpace != LangAS::Default) return AddrSpace; if (CGM.isTypeConstant(D->getType(), false)) { if (auto ConstAS = CGM.getTarget().getConstantAddressSpace()) return ConstAS.getValue(); } return DefaultGlobalAS; } llvm::SyncScope::ID AMDGPUTargetCodeGenInfo::getLLVMSyncScopeID(const LangOptions &LangOpts, SyncScope Scope, llvm::AtomicOrdering Ordering, llvm::LLVMContext &Ctx) const { std::string Name; switch (Scope) { case SyncScope::OpenCLWorkGroup: Name = "workgroup"; break; case SyncScope::OpenCLDevice: Name = "agent"; break; case SyncScope::OpenCLAllSVMDevices: Name = ""; break; case SyncScope::OpenCLSubGroup: Name = "wavefront"; } if (Ordering != llvm::AtomicOrdering::SequentiallyConsistent) { if (!Name.empty()) Name = Twine(Twine(Name) + Twine("-")).str(); Name = Twine(Twine(Name) + Twine("one-as")).str(); } return Ctx.getOrInsertSyncScopeID(Name); } bool AMDGPUTargetCodeGenInfo::shouldEmitStaticExternCAliases() const { return false; } void AMDGPUTargetCodeGenInfo::setCUDAKernelCallingConvention( const FunctionType *&FT) const { FT = getABIInfo().getContext().adjustFunctionType( FT, FT->getExtInfo().withCallingConv(CC_OpenCLKernel)); } //===----------------------------------------------------------------------===// // SPARC v8 ABI Implementation. // Based on the SPARC Compliance Definition version 2.4.1. // // Ensures that complex values are passed in registers. // namespace { class SparcV8ABIInfo : public DefaultABIInfo { public: SparcV8ABIInfo(CodeGenTypes &CGT) : DefaultABIInfo(CGT) {} private: ABIArgInfo classifyReturnType(QualType RetTy) const; void computeInfo(CGFunctionInfo &FI) const override; }; } // end anonymous namespace ABIArgInfo SparcV8ABIInfo::classifyReturnType(QualType Ty) const { if (Ty->isAnyComplexType()) { return ABIArgInfo::getDirect(); } else { return DefaultABIInfo::classifyReturnType(Ty); } } void SparcV8ABIInfo::computeInfo(CGFunctionInfo &FI) const { FI.getReturnInfo() = classifyReturnType(FI.getReturnType()); for (auto &Arg : FI.arguments()) Arg.info = classifyArgumentType(Arg.type); } namespace { class SparcV8TargetCodeGenInfo : public TargetCodeGenInfo { public: SparcV8TargetCodeGenInfo(CodeGenTypes &CGT) : TargetCodeGenInfo(std::make_unique(CGT)) {} }; } // end anonymous namespace //===----------------------------------------------------------------------===// // SPARC v9 ABI Implementation. // Based on the SPARC Compliance Definition version 2.4.1. // // Function arguments a mapped to a nominal "parameter array" and promoted to // registers depending on their type. Each argument occupies 8 or 16 bytes in // the array, structs larger than 16 bytes are passed indirectly. // // One case requires special care: // // struct mixed { // int i; // float f; // }; // // When a struct mixed is passed by value, it only occupies 8 bytes in the // parameter array, but the int is passed in an integer register, and the float // is passed in a floating point register. This is represented as two arguments // with the LLVM IR inreg attribute: // // declare void f(i32 inreg %i, float inreg %f) // // The code generator will only allocate 4 bytes from the parameter array for // the inreg arguments. All other arguments are allocated a multiple of 8 // bytes. // namespace { class SparcV9ABIInfo : public ABIInfo { public: SparcV9ABIInfo(CodeGenTypes &CGT) : ABIInfo(CGT) {} private: ABIArgInfo classifyType(QualType RetTy, unsigned SizeLimit) const; void computeInfo(CGFunctionInfo &FI) const override; Address EmitVAArg(CodeGenFunction &CGF, Address VAListAddr, QualType Ty) const override; // Coercion type builder for structs passed in registers. The coercion type // serves two purposes: // // 1. Pad structs to a multiple of 64 bits, so they are passed 'left-aligned' // in registers. // 2. Expose aligned floating point elements as first-level elements, so the // code generator knows to pass them in floating point registers. // // We also compute the InReg flag which indicates that the struct contains // aligned 32-bit floats. // struct CoerceBuilder { llvm::LLVMContext &Context; const llvm::DataLayout &DL; SmallVector Elems; uint64_t Size; bool InReg; CoerceBuilder(llvm::LLVMContext &c, const llvm::DataLayout &dl) : Context(c), DL(dl), Size(0), InReg(false) {} // Pad Elems with integers until Size is ToSize. void pad(uint64_t ToSize) { assert(ToSize >= Size && "Cannot remove elements"); if (ToSize == Size) return; // Finish the current 64-bit word. uint64_t Aligned = llvm::alignTo(Size, 64); if (Aligned > Size && Aligned <= ToSize) { Elems.push_back(llvm::IntegerType::get(Context, Aligned - Size)); Size = Aligned; } // Add whole 64-bit words. while (Size + 64 <= ToSize) { Elems.push_back(llvm::Type::getInt64Ty(Context)); Size += 64; } // Final in-word padding. if (Size < ToSize) { Elems.push_back(llvm::IntegerType::get(Context, ToSize - Size)); Size = ToSize; } } // Add a floating point element at Offset. void addFloat(uint64_t Offset, llvm::Type *Ty, unsigned Bits) { // Unaligned floats are treated as integers. if (Offset % Bits) return; // The InReg flag is only required if there are any floats < 64 bits. if (Bits < 64) InReg = true; pad(Offset); Elems.push_back(Ty); Size = Offset + Bits; } // Add a struct type to the coercion type, starting at Offset (in bits). void addStruct(uint64_t Offset, llvm::StructType *StrTy) { const llvm::StructLayout *Layout = DL.getStructLayout(StrTy); for (unsigned i = 0, e = StrTy->getNumElements(); i != e; ++i) { llvm::Type *ElemTy = StrTy->getElementType(i); uint64_t ElemOffset = Offset + Layout->getElementOffsetInBits(i); switch (ElemTy->getTypeID()) { case llvm::Type::StructTyID: addStruct(ElemOffset, cast(ElemTy)); break; case llvm::Type::FloatTyID: addFloat(ElemOffset, ElemTy, 32); break; case llvm::Type::DoubleTyID: addFloat(ElemOffset, ElemTy, 64); break; case llvm::Type::FP128TyID: addFloat(ElemOffset, ElemTy, 128); break; case llvm::Type::PointerTyID: if (ElemOffset % 64 == 0) { pad(ElemOffset); Elems.push_back(ElemTy); Size += 64; } break; default: break; } } } // Check if Ty is a usable substitute for the coercion type. bool isUsableType(llvm::StructType *Ty) const { return llvm::makeArrayRef(Elems) == Ty->elements(); } // Get the coercion type as a literal struct type. llvm::Type *getType() const { if (Elems.size() == 1) return Elems.front(); else return llvm::StructType::get(Context, Elems); } }; }; } // end anonymous namespace ABIArgInfo SparcV9ABIInfo::classifyType(QualType Ty, unsigned SizeLimit) const { if (Ty->isVoidType()) return ABIArgInfo::getIgnore(); uint64_t Size = getContext().getTypeSize(Ty); // Anything too big to fit in registers is passed with an explicit indirect // pointer / sret pointer. if (Size > SizeLimit) return getNaturalAlignIndirect(Ty, /*ByVal=*/false); // Treat an enum type as its underlying type. if (const EnumType *EnumTy = Ty->getAs()) Ty = EnumTy->getDecl()->getIntegerType(); // Integer types smaller than a register are extended. if (Size < 64 && Ty->isIntegerType()) return ABIArgInfo::getExtend(Ty); if (const auto *EIT = Ty->getAs()) if (EIT->getNumBits() < 64) return ABIArgInfo::getExtend(Ty); // Other non-aggregates go in registers. if (!isAggregateTypeForABI(Ty)) return ABIArgInfo::getDirect(); // If a C++ object has either a non-trivial copy constructor or a non-trivial // destructor, it is passed with an explicit indirect pointer / sret pointer. if (CGCXXABI::RecordArgABI RAA = getRecordArgABI(Ty, getCXXABI())) return getNaturalAlignIndirect(Ty, RAA == CGCXXABI::RAA_DirectInMemory); // This is a small aggregate type that should be passed in registers. // Build a coercion type from the LLVM struct type. llvm::StructType *StrTy = dyn_cast(CGT.ConvertType(Ty)); if (!StrTy) return ABIArgInfo::getDirect(); CoerceBuilder CB(getVMContext(), getDataLayout()); CB.addStruct(0, StrTy); CB.pad(llvm::alignTo(CB.DL.getTypeSizeInBits(StrTy), 64)); // Try to use the original type for coercion. llvm::Type *CoerceTy = CB.isUsableType(StrTy) ? StrTy : CB.getType(); if (CB.InReg) return ABIArgInfo::getDirectInReg(CoerceTy); else return ABIArgInfo::getDirect(CoerceTy); } Address SparcV9ABIInfo::EmitVAArg(CodeGenFunction &CGF, Address VAListAddr, QualType Ty) const { ABIArgInfo AI = classifyType(Ty, 16 * 8); llvm::Type *ArgTy = CGT.ConvertType(Ty); if (AI.canHaveCoerceToType() && !AI.getCoerceToType()) AI.setCoerceToType(ArgTy); CharUnits SlotSize = CharUnits::fromQuantity(8); CGBuilderTy &Builder = CGF.Builder; Address Addr(Builder.CreateLoad(VAListAddr, "ap.cur"), SlotSize); llvm::Type *ArgPtrTy = llvm::PointerType::getUnqual(ArgTy); auto TypeInfo = getContext().getTypeInfoInChars(Ty); Address ArgAddr = Address::invalid(); CharUnits Stride; switch (AI.getKind()) { case ABIArgInfo::Expand: case ABIArgInfo::CoerceAndExpand: case ABIArgInfo::InAlloca: llvm_unreachable("Unsupported ABI kind for va_arg"); case ABIArgInfo::Extend: { Stride = SlotSize; CharUnits Offset = SlotSize - TypeInfo.first; ArgAddr = Builder.CreateConstInBoundsByteGEP(Addr, Offset, "extend"); break; } case ABIArgInfo::Direct: { auto AllocSize = getDataLayout().getTypeAllocSize(AI.getCoerceToType()); Stride = CharUnits::fromQuantity(AllocSize).alignTo(SlotSize); ArgAddr = Addr; break; } case ABIArgInfo::Indirect: case ABIArgInfo::IndirectAliased: Stride = SlotSize; ArgAddr = Builder.CreateElementBitCast(Addr, ArgPtrTy, "indirect"); ArgAddr = Address(Builder.CreateLoad(ArgAddr, "indirect.arg"), TypeInfo.second); break; case ABIArgInfo::Ignore: return Address(llvm::UndefValue::get(ArgPtrTy), TypeInfo.second); } // Update VAList. Address NextPtr = Builder.CreateConstInBoundsByteGEP(Addr, Stride, "ap.next"); Builder.CreateStore(NextPtr.getPointer(), VAListAddr); return Builder.CreateBitCast(ArgAddr, ArgPtrTy, "arg.addr"); } void SparcV9ABIInfo::computeInfo(CGFunctionInfo &FI) const { FI.getReturnInfo() = classifyType(FI.getReturnType(), 32 * 8); for (auto &I : FI.arguments()) I.info = classifyType(I.type, 16 * 8); } namespace { class SparcV9TargetCodeGenInfo : public TargetCodeGenInfo { public: SparcV9TargetCodeGenInfo(CodeGenTypes &CGT) : TargetCodeGenInfo(std::make_unique(CGT)) {} int getDwarfEHStackPointer(CodeGen::CodeGenModule &M) const override { return 14; } bool initDwarfEHRegSizeTable(CodeGen::CodeGenFunction &CGF, llvm::Value *Address) const override; }; } // end anonymous namespace bool SparcV9TargetCodeGenInfo::initDwarfEHRegSizeTable(CodeGen::CodeGenFunction &CGF, llvm::Value *Address) const { // This is calculated from the LLVM and GCC tables and verified // against gcc output. AFAIK all ABIs use the same encoding. CodeGen::CGBuilderTy &Builder = CGF.Builder; llvm::IntegerType *i8 = CGF.Int8Ty; llvm::Value *Four8 = llvm::ConstantInt::get(i8, 4); llvm::Value *Eight8 = llvm::ConstantInt::get(i8, 8); // 0-31: the 8-byte general-purpose registers AssignToArrayRange(Builder, Address, Eight8, 0, 31); // 32-63: f0-31, the 4-byte floating-point registers AssignToArrayRange(Builder, Address, Four8, 32, 63); // Y = 64 // PSR = 65 // WIM = 66 // TBR = 67 // PC = 68 // NPC = 69 // FSR = 70 // CSR = 71 AssignToArrayRange(Builder, Address, Eight8, 64, 71); // 72-87: d0-15, the 8-byte floating-point registers AssignToArrayRange(Builder, Address, Eight8, 72, 87); return false; } // ARC ABI implementation. namespace { class ARCABIInfo : public DefaultABIInfo { public: using DefaultABIInfo::DefaultABIInfo; private: Address EmitVAArg(CodeGenFunction &CGF, Address VAListAddr, QualType Ty) const override; void updateState(const ABIArgInfo &Info, QualType Ty, CCState &State) const { if (!State.FreeRegs) return; if (Info.isIndirect() && Info.getInReg()) State.FreeRegs--; else if (Info.isDirect() && Info.getInReg()) { unsigned sz = (getContext().getTypeSize(Ty) + 31) / 32; if (sz < State.FreeRegs) State.FreeRegs -= sz; else State.FreeRegs = 0; } } void computeInfo(CGFunctionInfo &FI) const override { CCState State(FI); // ARC uses 8 registers to pass arguments. State.FreeRegs = 8; if (!getCXXABI().classifyReturnType(FI)) FI.getReturnInfo() = classifyReturnType(FI.getReturnType()); updateState(FI.getReturnInfo(), FI.getReturnType(), State); for (auto &I : FI.arguments()) { I.info = classifyArgumentType(I.type, State.FreeRegs); updateState(I.info, I.type, State); } } ABIArgInfo getIndirectByRef(QualType Ty, bool HasFreeRegs) const; ABIArgInfo getIndirectByValue(QualType Ty) const; ABIArgInfo classifyArgumentType(QualType Ty, uint8_t FreeRegs) const; ABIArgInfo classifyReturnType(QualType RetTy) const; }; class ARCTargetCodeGenInfo : public TargetCodeGenInfo { public: ARCTargetCodeGenInfo(CodeGenTypes &CGT) : TargetCodeGenInfo(std::make_unique(CGT)) {} }; ABIArgInfo ARCABIInfo::getIndirectByRef(QualType Ty, bool HasFreeRegs) const { return HasFreeRegs ? getNaturalAlignIndirectInReg(Ty) : getNaturalAlignIndirect(Ty, false); } ABIArgInfo ARCABIInfo::getIndirectByValue(QualType Ty) const { // Compute the byval alignment. const unsigned MinABIStackAlignInBytes = 4; unsigned TypeAlign = getContext().getTypeAlign(Ty) / 8; return ABIArgInfo::getIndirect(CharUnits::fromQuantity(4), /*ByVal=*/true, TypeAlign > MinABIStackAlignInBytes); } Address ARCABIInfo::EmitVAArg(CodeGenFunction &CGF, Address VAListAddr, QualType Ty) const { return emitVoidPtrVAArg(CGF, VAListAddr, Ty, /*indirect*/ false, getContext().getTypeInfoInChars(Ty), CharUnits::fromQuantity(4), true); } ABIArgInfo ARCABIInfo::classifyArgumentType(QualType Ty, uint8_t FreeRegs) const { // Handle the generic C++ ABI. const RecordType *RT = Ty->getAs(); if (RT) { CGCXXABI::RecordArgABI RAA = getRecordArgABI(RT, getCXXABI()); if (RAA == CGCXXABI::RAA_Indirect) return getIndirectByRef(Ty, FreeRegs > 0); if (RAA == CGCXXABI::RAA_DirectInMemory) return getIndirectByValue(Ty); } // Treat an enum type as its underlying type. if (const EnumType *EnumTy = Ty->getAs()) Ty = EnumTy->getDecl()->getIntegerType(); auto SizeInRegs = llvm::alignTo(getContext().getTypeSize(Ty), 32) / 32; if (isAggregateTypeForABI(Ty)) { // Structures with flexible arrays are always indirect. if (RT && RT->getDecl()->hasFlexibleArrayMember()) return getIndirectByValue(Ty); // Ignore empty structs/unions. if (isEmptyRecord(getContext(), Ty, true)) return ABIArgInfo::getIgnore(); llvm::LLVMContext &LLVMContext = getVMContext(); llvm::IntegerType *Int32 = llvm::Type::getInt32Ty(LLVMContext); SmallVector Elements(SizeInRegs, Int32); llvm::Type *Result = llvm::StructType::get(LLVMContext, Elements); return FreeRegs >= SizeInRegs ? ABIArgInfo::getDirectInReg(Result) : ABIArgInfo::getDirect(Result, 0, nullptr, false); } if (const auto *EIT = Ty->getAs()) if (EIT->getNumBits() > 64) return getIndirectByValue(Ty); return isPromotableIntegerTypeForABI(Ty) ? (FreeRegs >= SizeInRegs ? ABIArgInfo::getExtendInReg(Ty) : ABIArgInfo::getExtend(Ty)) : (FreeRegs >= SizeInRegs ? ABIArgInfo::getDirectInReg() : ABIArgInfo::getDirect()); } ABIArgInfo ARCABIInfo::classifyReturnType(QualType RetTy) const { if (RetTy->isAnyComplexType()) return ABIArgInfo::getDirectInReg(); // Arguments of size > 4 registers are indirect. auto RetSize = llvm::alignTo(getContext().getTypeSize(RetTy), 32) / 32; if (RetSize > 4) return getIndirectByRef(RetTy, /*HasFreeRegs*/ true); return DefaultABIInfo::classifyReturnType(RetTy); } } // End anonymous namespace. //===----------------------------------------------------------------------===// // XCore ABI Implementation //===----------------------------------------------------------------------===// namespace { /// A SmallStringEnc instance is used to build up the TypeString by passing /// it by reference between functions that append to it. typedef llvm::SmallString<128> SmallStringEnc; /// TypeStringCache caches the meta encodings of Types. /// /// The reason for caching TypeStrings is two fold: /// 1. To cache a type's encoding for later uses; /// 2. As a means to break recursive member type inclusion. /// /// A cache Entry can have a Status of: /// NonRecursive: The type encoding is not recursive; /// Recursive: The type encoding is recursive; /// Incomplete: An incomplete TypeString; /// IncompleteUsed: An incomplete TypeString that has been used in a /// Recursive type encoding. /// /// A NonRecursive entry will have all of its sub-members expanded as fully /// as possible. Whilst it may contain types which are recursive, the type /// itself is not recursive and thus its encoding may be safely used whenever /// the type is encountered. /// /// A Recursive entry will have all of its sub-members expanded as fully as /// possible. The type itself is recursive and it may contain other types which /// are recursive. The Recursive encoding must not be used during the expansion /// of a recursive type's recursive branch. For simplicity the code uses /// IncompleteCount to reject all usage of Recursive encodings for member types. /// /// An Incomplete entry is always a RecordType and only encodes its /// identifier e.g. "s(S){}". Incomplete 'StubEnc' entries are ephemeral and /// are placed into the cache during type expansion as a means to identify and /// handle recursive inclusion of types as sub-members. If there is recursion /// the entry becomes IncompleteUsed. /// /// During the expansion of a RecordType's members: /// /// If the cache contains a NonRecursive encoding for the member type, the /// cached encoding is used; /// /// If the cache contains a Recursive encoding for the member type, the /// cached encoding is 'Swapped' out, as it may be incorrect, and... /// /// If the member is a RecordType, an Incomplete encoding is placed into the /// cache to break potential recursive inclusion of itself as a sub-member; /// /// Once a member RecordType has been expanded, its temporary incomplete /// entry is removed from the cache. If a Recursive encoding was swapped out /// it is swapped back in; /// /// If an incomplete entry is used to expand a sub-member, the incomplete /// entry is marked as IncompleteUsed. The cache keeps count of how many /// IncompleteUsed entries it currently contains in IncompleteUsedCount; /// /// If a member's encoding is found to be a NonRecursive or Recursive viz: /// IncompleteUsedCount==0, the member's encoding is added to the cache. /// Else the member is part of a recursive type and thus the recursion has /// been exited too soon for the encoding to be correct for the member. /// class TypeStringCache { enum Status {NonRecursive, Recursive, Incomplete, IncompleteUsed}; struct Entry { std::string Str; // The encoded TypeString for the type. enum Status State; // Information about the encoding in 'Str'. std::string Swapped; // A temporary place holder for a Recursive encoding // during the expansion of RecordType's members. }; std::map Map; unsigned IncompleteCount; // Number of Incomplete entries in the Map. unsigned IncompleteUsedCount; // Number of IncompleteUsed entries in the Map. public: TypeStringCache() : IncompleteCount(0), IncompleteUsedCount(0) {} void addIncomplete(const IdentifierInfo *ID, std::string StubEnc); bool removeIncomplete(const IdentifierInfo *ID); void addIfComplete(const IdentifierInfo *ID, StringRef Str, bool IsRecursive); StringRef lookupStr(const IdentifierInfo *ID); }; /// TypeString encodings for enum & union fields must be order. /// FieldEncoding is a helper for this ordering process. class FieldEncoding { bool HasName; std::string Enc; public: FieldEncoding(bool b, SmallStringEnc &e) : HasName(b), Enc(e.c_str()) {} StringRef str() { return Enc; } bool operator<(const FieldEncoding &rhs) const { if (HasName != rhs.HasName) return HasName; return Enc < rhs.Enc; } }; class XCoreABIInfo : public DefaultABIInfo { public: XCoreABIInfo(CodeGen::CodeGenTypes &CGT) : DefaultABIInfo(CGT) {} Address EmitVAArg(CodeGenFunction &CGF, Address VAListAddr, QualType Ty) const override; }; class XCoreTargetCodeGenInfo : public TargetCodeGenInfo { mutable TypeStringCache TSC; void emitTargetMD(const Decl *D, llvm::GlobalValue *GV, const CodeGen::CodeGenModule &M) const; public: XCoreTargetCodeGenInfo(CodeGenTypes &CGT) : TargetCodeGenInfo(std::make_unique(CGT)) {} void emitTargetMetadata(CodeGen::CodeGenModule &CGM, const llvm::MapVector &MangledDeclNames) const override; }; } // End anonymous namespace. // TODO: this implementation is likely now redundant with the default // EmitVAArg. Address XCoreABIInfo::EmitVAArg(CodeGenFunction &CGF, Address VAListAddr, QualType Ty) const { CGBuilderTy &Builder = CGF.Builder; // Get the VAList. CharUnits SlotSize = CharUnits::fromQuantity(4); Address AP(Builder.CreateLoad(VAListAddr), SlotSize); // Handle the argument. ABIArgInfo AI = classifyArgumentType(Ty); CharUnits TypeAlign = getContext().getTypeAlignInChars(Ty); llvm::Type *ArgTy = CGT.ConvertType(Ty); if (AI.canHaveCoerceToType() && !AI.getCoerceToType()) AI.setCoerceToType(ArgTy); llvm::Type *ArgPtrTy = llvm::PointerType::getUnqual(ArgTy); Address Val = Address::invalid(); CharUnits ArgSize = CharUnits::Zero(); switch (AI.getKind()) { case ABIArgInfo::Expand: case ABIArgInfo::CoerceAndExpand: case ABIArgInfo::InAlloca: llvm_unreachable("Unsupported ABI kind for va_arg"); case ABIArgInfo::Ignore: Val = Address(llvm::UndefValue::get(ArgPtrTy), TypeAlign); ArgSize = CharUnits::Zero(); break; case ABIArgInfo::Extend: case ABIArgInfo::Direct: Val = Builder.CreateBitCast(AP, ArgPtrTy); ArgSize = CharUnits::fromQuantity( getDataLayout().getTypeAllocSize(AI.getCoerceToType())); ArgSize = ArgSize.alignTo(SlotSize); break; case ABIArgInfo::Indirect: case ABIArgInfo::IndirectAliased: Val = Builder.CreateElementBitCast(AP, ArgPtrTy); Val = Address(Builder.CreateLoad(Val), TypeAlign); ArgSize = SlotSize; break; } // Increment the VAList. if (!ArgSize.isZero()) { Address APN = Builder.CreateConstInBoundsByteGEP(AP, ArgSize); Builder.CreateStore(APN.getPointer(), VAListAddr); } return Val; } /// During the expansion of a RecordType, an incomplete TypeString is placed /// into the cache as a means to identify and break recursion. /// If there is a Recursive encoding in the cache, it is swapped out and will /// be reinserted by removeIncomplete(). /// All other types of encoding should have been used rather than arriving here. void TypeStringCache::addIncomplete(const IdentifierInfo *ID, std::string StubEnc) { if (!ID) return; Entry &E = Map[ID]; assert( (E.Str.empty() || E.State == Recursive) && "Incorrectly use of addIncomplete"); assert(!StubEnc.empty() && "Passing an empty string to addIncomplete()"); E.Swapped.swap(E.Str); // swap out the Recursive E.Str.swap(StubEnc); E.State = Incomplete; ++IncompleteCount; } /// Once the RecordType has been expanded, the temporary incomplete TypeString /// must be removed from the cache. /// If a Recursive was swapped out by addIncomplete(), it will be replaced. /// Returns true if the RecordType was defined recursively. bool TypeStringCache::removeIncomplete(const IdentifierInfo *ID) { if (!ID) return false; auto I = Map.find(ID); assert(I != Map.end() && "Entry not present"); Entry &E = I->second; assert( (E.State == Incomplete || E.State == IncompleteUsed) && "Entry must be an incomplete type"); bool IsRecursive = false; if (E.State == IncompleteUsed) { // We made use of our Incomplete encoding, thus we are recursive. IsRecursive = true; --IncompleteUsedCount; } if (E.Swapped.empty()) Map.erase(I); else { // Swap the Recursive back. E.Swapped.swap(E.Str); E.Swapped.clear(); E.State = Recursive; } --IncompleteCount; return IsRecursive; } /// Add the encoded TypeString to the cache only if it is NonRecursive or /// Recursive (viz: all sub-members were expanded as fully as possible). void TypeStringCache::addIfComplete(const IdentifierInfo *ID, StringRef Str, bool IsRecursive) { if (!ID || IncompleteUsedCount) return; // No key or it is is an incomplete sub-type so don't add. Entry &E = Map[ID]; if (IsRecursive && !E.Str.empty()) { assert(E.State==Recursive && E.Str.size() == Str.size() && "This is not the same Recursive entry"); // The parent container was not recursive after all, so we could have used // this Recursive sub-member entry after all, but we assumed the worse when // we started viz: IncompleteCount!=0. return; } assert(E.Str.empty() && "Entry already present"); E.Str = Str.str(); E.State = IsRecursive? Recursive : NonRecursive; } /// Return a cached TypeString encoding for the ID. If there isn't one, or we /// are recursively expanding a type (IncompleteCount != 0) and the cached /// encoding is Recursive, return an empty StringRef. StringRef TypeStringCache::lookupStr(const IdentifierInfo *ID) { if (!ID) return StringRef(); // We have no key. auto I = Map.find(ID); if (I == Map.end()) return StringRef(); // We have no encoding. Entry &E = I->second; if (E.State == Recursive && IncompleteCount) return StringRef(); // We don't use Recursive encodings for member types. if (E.State == Incomplete) { // The incomplete type is being used to break out of recursion. E.State = IncompleteUsed; ++IncompleteUsedCount; } return E.Str; } /// The XCore ABI includes a type information section that communicates symbol /// type information to the linker. The linker uses this information to verify /// safety/correctness of things such as array bound and pointers et al. /// The ABI only requires C (and XC) language modules to emit TypeStrings. /// This type information (TypeString) is emitted into meta data for all global /// symbols: definitions, declarations, functions & variables. /// /// The TypeString carries type, qualifier, name, size & value details. /// Please see 'Tools Development Guide' section 2.16.2 for format details: /// https://www.xmos.com/download/public/Tools-Development-Guide%28X9114A%29.pdf /// The output is tested by test/CodeGen/xcore-stringtype.c. /// static bool getTypeString(SmallStringEnc &Enc, const Decl *D, const CodeGen::CodeGenModule &CGM, TypeStringCache &TSC); /// XCore uses emitTargetMD to emit TypeString metadata for global symbols. void XCoreTargetCodeGenInfo::emitTargetMD( const Decl *D, llvm::GlobalValue *GV, const CodeGen::CodeGenModule &CGM) const { SmallStringEnc Enc; if (getTypeString(Enc, D, CGM, TSC)) { llvm::LLVMContext &Ctx = CGM.getModule().getContext(); llvm::Metadata *MDVals[] = {llvm::ConstantAsMetadata::get(GV), llvm::MDString::get(Ctx, Enc.str())}; llvm::NamedMDNode *MD = CGM.getModule().getOrInsertNamedMetadata("xcore.typestrings"); MD->addOperand(llvm::MDNode::get(Ctx, MDVals)); } } void XCoreTargetCodeGenInfo::emitTargetMetadata( CodeGen::CodeGenModule &CGM, const llvm::MapVector &MangledDeclNames) const { // Warning, new MangledDeclNames may be appended within this loop. // We rely on MapVector insertions adding new elements to the end // of the container. for (unsigned I = 0; I != MangledDeclNames.size(); ++I) { auto Val = *(MangledDeclNames.begin() + I); llvm::GlobalValue *GV = CGM.GetGlobalValue(Val.second); if (GV) { const Decl *D = Val.first.getDecl()->getMostRecentDecl(); emitTargetMD(D, GV, CGM); } } } //===----------------------------------------------------------------------===// // SPIR ABI Implementation //===----------------------------------------------------------------------===// namespace { class SPIRTargetCodeGenInfo : public TargetCodeGenInfo { public: SPIRTargetCodeGenInfo(CodeGen::CodeGenTypes &CGT) : TargetCodeGenInfo(std::make_unique(CGT)) {} unsigned getOpenCLKernelCallingConv() const override; }; } // End anonymous namespace. namespace clang { namespace CodeGen { void computeSPIRKernelABIInfo(CodeGenModule &CGM, CGFunctionInfo &FI) { DefaultABIInfo SPIRABI(CGM.getTypes()); SPIRABI.computeInfo(FI); } } } unsigned SPIRTargetCodeGenInfo::getOpenCLKernelCallingConv() const { return llvm::CallingConv::SPIR_KERNEL; } static bool appendType(SmallStringEnc &Enc, QualType QType, const CodeGen::CodeGenModule &CGM, TypeStringCache &TSC); /// Helper function for appendRecordType(). /// Builds a SmallVector containing the encoded field types in declaration /// order. static bool extractFieldType(SmallVectorImpl &FE, const RecordDecl *RD, const CodeGen::CodeGenModule &CGM, TypeStringCache &TSC) { for (const auto *Field : RD->fields()) { SmallStringEnc Enc; Enc += "m("; Enc += Field->getName(); Enc += "){"; if (Field->isBitField()) { Enc += "b("; llvm::raw_svector_ostream OS(Enc); OS << Field->getBitWidthValue(CGM.getContext()); Enc += ':'; } if (!appendType(Enc, Field->getType(), CGM, TSC)) return false; if (Field->isBitField()) Enc += ')'; Enc += '}'; FE.emplace_back(!Field->getName().empty(), Enc); } return true; } /// Appends structure and union types to Enc and adds encoding to cache. /// Recursively calls appendType (via extractFieldType) for each field. /// Union types have their fields ordered according to the ABI. static bool appendRecordType(SmallStringEnc &Enc, const RecordType *RT, const CodeGen::CodeGenModule &CGM, TypeStringCache &TSC, const IdentifierInfo *ID) { // Append the cached TypeString if we have one. StringRef TypeString = TSC.lookupStr(ID); if (!TypeString.empty()) { Enc += TypeString; return true; } // Start to emit an incomplete TypeString. size_t Start = Enc.size(); Enc += (RT->isUnionType()? 'u' : 's'); Enc += '('; if (ID) Enc += ID->getName(); Enc += "){"; // We collect all encoded fields and order as necessary. bool IsRecursive = false; const RecordDecl *RD = RT->getDecl()->getDefinition(); if (RD && !RD->field_empty()) { // An incomplete TypeString stub is placed in the cache for this RecordType // so that recursive calls to this RecordType will use it whilst building a // complete TypeString for this RecordType. SmallVector FE; std::string StubEnc(Enc.substr(Start).str()); StubEnc += '}'; // StubEnc now holds a valid incomplete TypeString. TSC.addIncomplete(ID, std::move(StubEnc)); if (!extractFieldType(FE, RD, CGM, TSC)) { (void) TSC.removeIncomplete(ID); return false; } IsRecursive = TSC.removeIncomplete(ID); // The ABI requires unions to be sorted but not structures. // See FieldEncoding::operator< for sort algorithm. if (RT->isUnionType()) llvm::sort(FE); // We can now complete the TypeString. unsigned E = FE.size(); for (unsigned I = 0; I != E; ++I) { if (I) Enc += ','; Enc += FE[I].str(); } } Enc += '}'; TSC.addIfComplete(ID, Enc.substr(Start), IsRecursive); return true; } /// Appends enum types to Enc and adds the encoding to the cache. static bool appendEnumType(SmallStringEnc &Enc, const EnumType *ET, TypeStringCache &TSC, const IdentifierInfo *ID) { // Append the cached TypeString if we have one. StringRef TypeString = TSC.lookupStr(ID); if (!TypeString.empty()) { Enc += TypeString; return true; } size_t Start = Enc.size(); Enc += "e("; if (ID) Enc += ID->getName(); Enc += "){"; // We collect all encoded enumerations and order them alphanumerically. if (const EnumDecl *ED = ET->getDecl()->getDefinition()) { SmallVector FE; for (auto I = ED->enumerator_begin(), E = ED->enumerator_end(); I != E; ++I) { SmallStringEnc EnumEnc; EnumEnc += "m("; EnumEnc += I->getName(); EnumEnc += "){"; I->getInitVal().toString(EnumEnc); EnumEnc += '}'; FE.push_back(FieldEncoding(!I->getName().empty(), EnumEnc)); } llvm::sort(FE); unsigned E = FE.size(); for (unsigned I = 0; I != E; ++I) { if (I) Enc += ','; Enc += FE[I].str(); } } Enc += '}'; TSC.addIfComplete(ID, Enc.substr(Start), false); return true; } /// Appends type's qualifier to Enc. /// This is done prior to appending the type's encoding. static void appendQualifier(SmallStringEnc &Enc, QualType QT) { // Qualifiers are emitted in alphabetical order. static const char *const Table[]={"","c:","r:","cr:","v:","cv:","rv:","crv:"}; int Lookup = 0; if (QT.isConstQualified()) Lookup += 1<<0; if (QT.isRestrictQualified()) Lookup += 1<<1; if (QT.isVolatileQualified()) Lookup += 1<<2; Enc += Table[Lookup]; } /// Appends built-in types to Enc. static bool appendBuiltinType(SmallStringEnc &Enc, const BuiltinType *BT) { const char *EncType; switch (BT->getKind()) { case BuiltinType::Void: EncType = "0"; break; case BuiltinType::Bool: EncType = "b"; break; case BuiltinType::Char_U: EncType = "uc"; break; case BuiltinType::UChar: EncType = "uc"; break; case BuiltinType::SChar: EncType = "sc"; break; case BuiltinType::UShort: EncType = "us"; break; case BuiltinType::Short: EncType = "ss"; break; case BuiltinType::UInt: EncType = "ui"; break; case BuiltinType::Int: EncType = "si"; break; case BuiltinType::ULong: EncType = "ul"; break; case BuiltinType::Long: EncType = "sl"; break; case BuiltinType::ULongLong: EncType = "ull"; break; case BuiltinType::LongLong: EncType = "sll"; break; case BuiltinType::Float: EncType = "ft"; break; case BuiltinType::Double: EncType = "d"; break; case BuiltinType::LongDouble: EncType = "ld"; break; default: return false; } Enc += EncType; return true; } /// Appends a pointer encoding to Enc before calling appendType for the pointee. static bool appendPointerType(SmallStringEnc &Enc, const PointerType *PT, const CodeGen::CodeGenModule &CGM, TypeStringCache &TSC) { Enc += "p("; if (!appendType(Enc, PT->getPointeeType(), CGM, TSC)) return false; Enc += ')'; return true; } /// Appends array encoding to Enc before calling appendType for the element. static bool appendArrayType(SmallStringEnc &Enc, QualType QT, const ArrayType *AT, const CodeGen::CodeGenModule &CGM, TypeStringCache &TSC, StringRef NoSizeEnc) { if (AT->getSizeModifier() != ArrayType::Normal) return false; Enc += "a("; if (const ConstantArrayType *CAT = dyn_cast(AT)) CAT->getSize().toStringUnsigned(Enc); else Enc += NoSizeEnc; // Global arrays use "*", otherwise it is "". Enc += ':'; // The Qualifiers should be attached to the type rather than the array. appendQualifier(Enc, QT); if (!appendType(Enc, AT->getElementType(), CGM, TSC)) return false; Enc += ')'; return true; } /// Appends a function encoding to Enc, calling appendType for the return type /// and the arguments. static bool appendFunctionType(SmallStringEnc &Enc, const FunctionType *FT, const CodeGen::CodeGenModule &CGM, TypeStringCache &TSC) { Enc += "f{"; if (!appendType(Enc, FT->getReturnType(), CGM, TSC)) return false; Enc += "}("; if (const FunctionProtoType *FPT = FT->getAs()) { // N.B. we are only interested in the adjusted param types. auto I = FPT->param_type_begin(); auto E = FPT->param_type_end(); if (I != E) { do { if (!appendType(Enc, *I, CGM, TSC)) return false; ++I; if (I != E) Enc += ','; } while (I != E); if (FPT->isVariadic()) Enc += ",va"; } else { if (FPT->isVariadic()) Enc += "va"; else Enc += '0'; } } Enc += ')'; return true; } /// Handles the type's qualifier before dispatching a call to handle specific /// type encodings. static bool appendType(SmallStringEnc &Enc, QualType QType, const CodeGen::CodeGenModule &CGM, TypeStringCache &TSC) { QualType QT = QType.getCanonicalType(); if (const ArrayType *AT = QT->getAsArrayTypeUnsafe()) // The Qualifiers should be attached to the type rather than the array. // Thus we don't call appendQualifier() here. return appendArrayType(Enc, QT, AT, CGM, TSC, ""); appendQualifier(Enc, QT); if (const BuiltinType *BT = QT->getAs()) return appendBuiltinType(Enc, BT); if (const PointerType *PT = QT->getAs()) return appendPointerType(Enc, PT, CGM, TSC); if (const EnumType *ET = QT->getAs()) return appendEnumType(Enc, ET, TSC, QT.getBaseTypeIdentifier()); if (const RecordType *RT = QT->getAsStructureType()) return appendRecordType(Enc, RT, CGM, TSC, QT.getBaseTypeIdentifier()); if (const RecordType *RT = QT->getAsUnionType()) return appendRecordType(Enc, RT, CGM, TSC, QT.getBaseTypeIdentifier()); if (const FunctionType *FT = QT->getAs()) return appendFunctionType(Enc, FT, CGM, TSC); return false; } static bool getTypeString(SmallStringEnc &Enc, const Decl *D, const CodeGen::CodeGenModule &CGM, TypeStringCache &TSC) { if (!D) return false; if (const FunctionDecl *FD = dyn_cast(D)) { if (FD->getLanguageLinkage() != CLanguageLinkage) return false; return appendType(Enc, FD->getType(), CGM, TSC); } if (const VarDecl *VD = dyn_cast(D)) { if (VD->getLanguageLinkage() != CLanguageLinkage) return false; QualType QT = VD->getType().getCanonicalType(); if (const ArrayType *AT = QT->getAsArrayTypeUnsafe()) { // Global ArrayTypes are given a size of '*' if the size is unknown. // The Qualifiers should be attached to the type rather than the array. // Thus we don't call appendQualifier() here. return appendArrayType(Enc, QT, AT, CGM, TSC, "*"); } return appendType(Enc, QT, CGM, TSC); } return false; } //===----------------------------------------------------------------------===// // RISCV ABI Implementation //===----------------------------------------------------------------------===// namespace { class RISCVABIInfo : public DefaultABIInfo { private: // Size of the integer ('x') registers in bits. unsigned XLen; // Size of the floating point ('f') registers in bits. Note that the target // ISA might have a wider FLen than the selected ABI (e.g. an RV32IF target // with soft float ABI has FLen==0). unsigned FLen; static const int NumArgGPRs = 8; static const int NumArgFPRs = 8; bool detectFPCCEligibleStructHelper(QualType Ty, CharUnits CurOff, llvm::Type *&Field1Ty, CharUnits &Field1Off, llvm::Type *&Field2Ty, CharUnits &Field2Off) const; public: RISCVABIInfo(CodeGen::CodeGenTypes &CGT, unsigned XLen, unsigned FLen) : DefaultABIInfo(CGT), XLen(XLen), FLen(FLen) {} // DefaultABIInfo's classifyReturnType and classifyArgumentType are // non-virtual, but computeInfo is virtual, so we overload it. void computeInfo(CGFunctionInfo &FI) const override; ABIArgInfo classifyArgumentType(QualType Ty, bool IsFixed, int &ArgGPRsLeft, int &ArgFPRsLeft) const; ABIArgInfo classifyReturnType(QualType RetTy) const; Address EmitVAArg(CodeGenFunction &CGF, Address VAListAddr, QualType Ty) const override; ABIArgInfo extendType(QualType Ty) const; bool detectFPCCEligibleStruct(QualType Ty, llvm::Type *&Field1Ty, CharUnits &Field1Off, llvm::Type *&Field2Ty, CharUnits &Field2Off, int &NeededArgGPRs, int &NeededArgFPRs) const; ABIArgInfo coerceAndExpandFPCCEligibleStruct(llvm::Type *Field1Ty, CharUnits Field1Off, llvm::Type *Field2Ty, CharUnits Field2Off) const; }; } // end anonymous namespace void RISCVABIInfo::computeInfo(CGFunctionInfo &FI) const { QualType RetTy = FI.getReturnType(); if (!getCXXABI().classifyReturnType(FI)) FI.getReturnInfo() = classifyReturnType(RetTy); // IsRetIndirect is true if classifyArgumentType indicated the value should // be passed indirect, or if the type size is a scalar greater than 2*XLen // and not a complex type with elements <= FLen. e.g. fp128 is passed direct // in LLVM IR, relying on the backend lowering code to rewrite the argument // list and pass indirectly on RV32. bool IsRetIndirect = FI.getReturnInfo().getKind() == ABIArgInfo::Indirect; if (!IsRetIndirect && RetTy->isScalarType() && getContext().getTypeSize(RetTy) > (2 * XLen)) { if (RetTy->isComplexType() && FLen) { QualType EltTy = RetTy->getAs()->getElementType(); IsRetIndirect = getContext().getTypeSize(EltTy) > FLen; } else { // This is a normal scalar > 2*XLen, such as fp128 on RV32. IsRetIndirect = true; } } // We must track the number of GPRs used in order to conform to the RISC-V // ABI, as integer scalars passed in registers should have signext/zeroext // when promoted, but are anyext if passed on the stack. As GPR usage is // different for variadic arguments, we must also track whether we are // examining a vararg or not. int ArgGPRsLeft = IsRetIndirect ? NumArgGPRs - 1 : NumArgGPRs; int ArgFPRsLeft = FLen ? NumArgFPRs : 0; int NumFixedArgs = FI.getNumRequiredArgs(); int ArgNum = 0; for (auto &ArgInfo : FI.arguments()) { bool IsFixed = ArgNum < NumFixedArgs; ArgInfo.info = classifyArgumentType(ArgInfo.type, IsFixed, ArgGPRsLeft, ArgFPRsLeft); ArgNum++; } } // Returns true if the struct is a potential candidate for the floating point // calling convention. If this function returns true, the caller is // responsible for checking that if there is only a single field then that // field is a float. bool RISCVABIInfo::detectFPCCEligibleStructHelper(QualType Ty, CharUnits CurOff, llvm::Type *&Field1Ty, CharUnits &Field1Off, llvm::Type *&Field2Ty, CharUnits &Field2Off) const { bool IsInt = Ty->isIntegralOrEnumerationType(); bool IsFloat = Ty->isRealFloatingType(); if (IsInt || IsFloat) { uint64_t Size = getContext().getTypeSize(Ty); if (IsInt && Size > XLen) return false; // Can't be eligible if larger than the FP registers. Half precision isn't // currently supported on RISC-V and the ABI hasn't been confirmed, so // default to the integer ABI in that case. if (IsFloat && (Size > FLen || Size < 32)) return false; // Can't be eligible if an integer type was already found (int+int pairs // are not eligible). if (IsInt && Field1Ty && Field1Ty->isIntegerTy()) return false; if (!Field1Ty) { Field1Ty = CGT.ConvertType(Ty); Field1Off = CurOff; return true; } if (!Field2Ty) { Field2Ty = CGT.ConvertType(Ty); Field2Off = CurOff; return true; } return false; } if (auto CTy = Ty->getAs()) { if (Field1Ty) return false; QualType EltTy = CTy->getElementType(); if (getContext().getTypeSize(EltTy) > FLen) return false; Field1Ty = CGT.ConvertType(EltTy); Field1Off = CurOff; assert(CurOff.isZero() && "Unexpected offset for first field"); Field2Ty = Field1Ty; Field2Off = Field1Off + getContext().getTypeSizeInChars(EltTy); return true; } if (const ConstantArrayType *ATy = getContext().getAsConstantArrayType(Ty)) { uint64_t ArraySize = ATy->getSize().getZExtValue(); QualType EltTy = ATy->getElementType(); CharUnits EltSize = getContext().getTypeSizeInChars(EltTy); for (uint64_t i = 0; i < ArraySize; ++i) { bool Ret = detectFPCCEligibleStructHelper(EltTy, CurOff, Field1Ty, Field1Off, Field2Ty, Field2Off); if (!Ret) return false; CurOff += EltSize; } return true; } if (const auto *RTy = Ty->getAs()) { // Structures with either a non-trivial destructor or a non-trivial // copy constructor are not eligible for the FP calling convention. if (getRecordArgABI(Ty, CGT.getCXXABI())) return false; if (isEmptyRecord(getContext(), Ty, true)) return true; const RecordDecl *RD = RTy->getDecl(); // Unions aren't eligible unless they're empty (which is caught above). if (RD->isUnion()) return false; int ZeroWidthBitFieldCount = 0; for (const FieldDecl *FD : RD->fields()) { const ASTRecordLayout &Layout = getContext().getASTRecordLayout(RD); uint64_t FieldOffInBits = Layout.getFieldOffset(FD->getFieldIndex()); QualType QTy = FD->getType(); if (FD->isBitField()) { unsigned BitWidth = FD->getBitWidthValue(getContext()); // Allow a bitfield with a type greater than XLen as long as the // bitwidth is XLen or less. if (getContext().getTypeSize(QTy) > XLen && BitWidth <= XLen) QTy = getContext().getIntTypeForBitwidth(XLen, false); if (BitWidth == 0) { ZeroWidthBitFieldCount++; continue; } } bool Ret = detectFPCCEligibleStructHelper( QTy, CurOff + getContext().toCharUnitsFromBits(FieldOffInBits), Field1Ty, Field1Off, Field2Ty, Field2Off); if (!Ret) return false; // As a quirk of the ABI, zero-width bitfields aren't ignored for fp+fp // or int+fp structs, but are ignored for a struct with an fp field and // any number of zero-width bitfields. if (Field2Ty && ZeroWidthBitFieldCount > 0) return false; } return Field1Ty != nullptr; } return false; } // Determine if a struct is eligible for passing according to the floating // point calling convention (i.e., when flattened it contains a single fp // value, fp+fp, or int+fp of appropriate size). If so, NeededArgFPRs and // NeededArgGPRs are incremented appropriately. bool RISCVABIInfo::detectFPCCEligibleStruct(QualType Ty, llvm::Type *&Field1Ty, CharUnits &Field1Off, llvm::Type *&Field2Ty, CharUnits &Field2Off, int &NeededArgGPRs, int &NeededArgFPRs) const { Field1Ty = nullptr; Field2Ty = nullptr; NeededArgGPRs = 0; NeededArgFPRs = 0; bool IsCandidate = detectFPCCEligibleStructHelper( Ty, CharUnits::Zero(), Field1Ty, Field1Off, Field2Ty, Field2Off); // Not really a candidate if we have a single int but no float. if (Field1Ty && !Field2Ty && !Field1Ty->isFloatingPointTy()) return false; if (!IsCandidate) return false; if (Field1Ty && Field1Ty->isFloatingPointTy()) NeededArgFPRs++; else if (Field1Ty) NeededArgGPRs++; if (Field2Ty && Field2Ty->isFloatingPointTy()) NeededArgFPRs++; else if (Field2Ty) NeededArgGPRs++; return IsCandidate; } // Call getCoerceAndExpand for the two-element flattened struct described by // Field1Ty, Field1Off, Field2Ty, Field2Off. This method will create an // appropriate coerceToType and unpaddedCoerceToType. ABIArgInfo RISCVABIInfo::coerceAndExpandFPCCEligibleStruct( llvm::Type *Field1Ty, CharUnits Field1Off, llvm::Type *Field2Ty, CharUnits Field2Off) const { SmallVector CoerceElts; SmallVector UnpaddedCoerceElts; if (!Field1Off.isZero()) CoerceElts.push_back(llvm::ArrayType::get( llvm::Type::getInt8Ty(getVMContext()), Field1Off.getQuantity())); CoerceElts.push_back(Field1Ty); UnpaddedCoerceElts.push_back(Field1Ty); if (!Field2Ty) { return ABIArgInfo::getCoerceAndExpand( llvm::StructType::get(getVMContext(), CoerceElts, !Field1Off.isZero()), UnpaddedCoerceElts[0]); } CharUnits Field2Align = CharUnits::fromQuantity(getDataLayout().getABITypeAlignment(Field2Ty)); CharUnits Field1Size = CharUnits::fromQuantity(getDataLayout().getTypeStoreSize(Field1Ty)); CharUnits Field2OffNoPadNoPack = Field1Size.alignTo(Field2Align); CharUnits Padding = CharUnits::Zero(); if (Field2Off > Field2OffNoPadNoPack) Padding = Field2Off - Field2OffNoPadNoPack; else if (Field2Off != Field2Align && Field2Off > Field1Size) Padding = Field2Off - Field1Size; bool IsPacked = !Field2Off.isMultipleOf(Field2Align); if (!Padding.isZero()) CoerceElts.push_back(llvm::ArrayType::get( llvm::Type::getInt8Ty(getVMContext()), Padding.getQuantity())); CoerceElts.push_back(Field2Ty); UnpaddedCoerceElts.push_back(Field2Ty); auto CoerceToType = llvm::StructType::get(getVMContext(), CoerceElts, IsPacked); auto UnpaddedCoerceToType = llvm::StructType::get(getVMContext(), UnpaddedCoerceElts, IsPacked); return ABIArgInfo::getCoerceAndExpand(CoerceToType, UnpaddedCoerceToType); } ABIArgInfo RISCVABIInfo::classifyArgumentType(QualType Ty, bool IsFixed, int &ArgGPRsLeft, int &ArgFPRsLeft) const { assert(ArgGPRsLeft <= NumArgGPRs && "Arg GPR tracking underflow"); Ty = useFirstFieldIfTransparentUnion(Ty); // Structures with either a non-trivial destructor or a non-trivial // copy constructor are always passed indirectly. if (CGCXXABI::RecordArgABI RAA = getRecordArgABI(Ty, getCXXABI())) { if (ArgGPRsLeft) ArgGPRsLeft -= 1; return getNaturalAlignIndirect(Ty, /*ByVal=*/RAA == CGCXXABI::RAA_DirectInMemory); } // Ignore empty structs/unions. if (isEmptyRecord(getContext(), Ty, true)) return ABIArgInfo::getIgnore(); uint64_t Size = getContext().getTypeSize(Ty); // Pass floating point values via FPRs if possible. if (IsFixed && Ty->isFloatingType() && !Ty->isComplexType() && FLen >= Size && ArgFPRsLeft) { ArgFPRsLeft--; return ABIArgInfo::getDirect(); } // Complex types for the hard float ABI must be passed direct rather than // using CoerceAndExpand. if (IsFixed && Ty->isComplexType() && FLen && ArgFPRsLeft >= 2) { QualType EltTy = Ty->castAs()->getElementType(); if (getContext().getTypeSize(EltTy) <= FLen) { ArgFPRsLeft -= 2; return ABIArgInfo::getDirect(); } } if (IsFixed && FLen && Ty->isStructureOrClassType()) { llvm::Type *Field1Ty = nullptr; llvm::Type *Field2Ty = nullptr; CharUnits Field1Off = CharUnits::Zero(); CharUnits Field2Off = CharUnits::Zero(); int NeededArgGPRs; int NeededArgFPRs; bool IsCandidate = detectFPCCEligibleStruct(Ty, Field1Ty, Field1Off, Field2Ty, Field2Off, NeededArgGPRs, NeededArgFPRs); if (IsCandidate && NeededArgGPRs <= ArgGPRsLeft && NeededArgFPRs <= ArgFPRsLeft) { ArgGPRsLeft -= NeededArgGPRs; ArgFPRsLeft -= NeededArgFPRs; return coerceAndExpandFPCCEligibleStruct(Field1Ty, Field1Off, Field2Ty, Field2Off); } } uint64_t NeededAlign = getContext().getTypeAlign(Ty); bool MustUseStack = false; // Determine the number of GPRs needed to pass the current argument // according to the ABI. 2*XLen-aligned varargs are passed in "aligned" // register pairs, so may consume 3 registers. int NeededArgGPRs = 1; if (!IsFixed && NeededAlign == 2 * XLen) NeededArgGPRs = 2 + (ArgGPRsLeft % 2); else if (Size > XLen && Size <= 2 * XLen) NeededArgGPRs = 2; if (NeededArgGPRs > ArgGPRsLeft) { MustUseStack = true; NeededArgGPRs = ArgGPRsLeft; } ArgGPRsLeft -= NeededArgGPRs; if (!isAggregateTypeForABI(Ty) && !Ty->isVectorType()) { // Treat an enum type as its underlying type. if (const EnumType *EnumTy = Ty->getAs()) Ty = EnumTy->getDecl()->getIntegerType(); // All integral types are promoted to XLen width, unless passed on the // stack. if (Size < XLen && Ty->isIntegralOrEnumerationType() && !MustUseStack) { return extendType(Ty); } if (const auto *EIT = Ty->getAs()) { if (EIT->getNumBits() < XLen && !MustUseStack) return extendType(Ty); if (EIT->getNumBits() > 128 || (!getContext().getTargetInfo().hasInt128Type() && EIT->getNumBits() > 64)) return getNaturalAlignIndirect(Ty, /*ByVal=*/false); } return ABIArgInfo::getDirect(); } // Aggregates which are <= 2*XLen will be passed in registers if possible, // so coerce to integers. if (Size <= 2 * XLen) { unsigned Alignment = getContext().getTypeAlign(Ty); // Use a single XLen int if possible, 2*XLen if 2*XLen alignment is // required, and a 2-element XLen array if only XLen alignment is required. if (Size <= XLen) { return ABIArgInfo::getDirect( llvm::IntegerType::get(getVMContext(), XLen)); } else if (Alignment == 2 * XLen) { return ABIArgInfo::getDirect( llvm::IntegerType::get(getVMContext(), 2 * XLen)); } else { return ABIArgInfo::getDirect(llvm::ArrayType::get( llvm::IntegerType::get(getVMContext(), XLen), 2)); } } return getNaturalAlignIndirect(Ty, /*ByVal=*/false); } ABIArgInfo RISCVABIInfo::classifyReturnType(QualType RetTy) const { if (RetTy->isVoidType()) return ABIArgInfo::getIgnore(); int ArgGPRsLeft = 2; int ArgFPRsLeft = FLen ? 2 : 0; // The rules for return and argument types are the same, so defer to // classifyArgumentType. return classifyArgumentType(RetTy, /*IsFixed=*/true, ArgGPRsLeft, ArgFPRsLeft); } Address RISCVABIInfo::EmitVAArg(CodeGenFunction &CGF, Address VAListAddr, QualType Ty) const { CharUnits SlotSize = CharUnits::fromQuantity(XLen / 8); // Empty records are ignored for parameter passing purposes. if (isEmptyRecord(getContext(), Ty, true)) { Address Addr(CGF.Builder.CreateLoad(VAListAddr), SlotSize); Addr = CGF.Builder.CreateElementBitCast(Addr, CGF.ConvertTypeForMem(Ty)); return Addr; } std::pair SizeAndAlign = getContext().getTypeInfoInChars(Ty); // Arguments bigger than 2*Xlen bytes are passed indirectly. bool IsIndirect = SizeAndAlign.first > 2 * SlotSize; return emitVoidPtrVAArg(CGF, VAListAddr, Ty, IsIndirect, SizeAndAlign, SlotSize, /*AllowHigherAlign=*/true); } ABIArgInfo RISCVABIInfo::extendType(QualType Ty) const { int TySize = getContext().getTypeSize(Ty); // RV64 ABI requires unsigned 32 bit integers to be sign extended. if (XLen == 64 && Ty->isUnsignedIntegerOrEnumerationType() && TySize == 32) return ABIArgInfo::getSignExtend(Ty); return ABIArgInfo::getExtend(Ty); } namespace { class RISCVTargetCodeGenInfo : public TargetCodeGenInfo { public: RISCVTargetCodeGenInfo(CodeGen::CodeGenTypes &CGT, unsigned XLen, unsigned FLen) : TargetCodeGenInfo(std::make_unique(CGT, XLen, FLen)) {} void setTargetAttributes(const Decl *D, llvm::GlobalValue *GV, CodeGen::CodeGenModule &CGM) const override { const auto *FD = dyn_cast_or_null(D); if (!FD) return; const auto *Attr = FD->getAttr(); if (!Attr) return; const char *Kind; switch (Attr->getInterrupt()) { case RISCVInterruptAttr::user: Kind = "user"; break; case RISCVInterruptAttr::supervisor: Kind = "supervisor"; break; case RISCVInterruptAttr::machine: Kind = "machine"; break; } auto *Fn = cast(GV); Fn->addFnAttr("interrupt", Kind); } }; } // namespace //===----------------------------------------------------------------------===// // VE ABI Implementation. // namespace { class VEABIInfo : public DefaultABIInfo { public: VEABIInfo(CodeGenTypes &CGT) : DefaultABIInfo(CGT) {} private: ABIArgInfo classifyReturnType(QualType RetTy) const; ABIArgInfo classifyArgumentType(QualType RetTy) const; void computeInfo(CGFunctionInfo &FI) const override; }; } // end anonymous namespace ABIArgInfo VEABIInfo::classifyReturnType(QualType Ty) const { if (Ty->isAnyComplexType()) return ABIArgInfo::getDirect(); uint64_t Size = getContext().getTypeSize(Ty); if (Size < 64 && Ty->isIntegerType()) return ABIArgInfo::getExtend(Ty); return DefaultABIInfo::classifyReturnType(Ty); } ABIArgInfo VEABIInfo::classifyArgumentType(QualType Ty) const { if (Ty->isAnyComplexType()) return ABIArgInfo::getDirect(); uint64_t Size = getContext().getTypeSize(Ty); if (Size < 64 && Ty->isIntegerType()) return ABIArgInfo::getExtend(Ty); return DefaultABIInfo::classifyArgumentType(Ty); } void VEABIInfo::computeInfo(CGFunctionInfo &FI) const { FI.getReturnInfo() = classifyReturnType(FI.getReturnType()); for (auto &Arg : FI.arguments()) Arg.info = classifyArgumentType(Arg.type); } namespace { class VETargetCodeGenInfo : public TargetCodeGenInfo { public: VETargetCodeGenInfo(CodeGenTypes &CGT) : TargetCodeGenInfo(std::make_unique(CGT)) {} // VE ABI requires the arguments of variadic and prototype-less functions // are passed in both registers and memory. bool isNoProtoCallVariadic(const CallArgList &args, const FunctionNoProtoType *fnType) const override { return true; } }; } // end anonymous namespace //===----------------------------------------------------------------------===// // Driver code //===----------------------------------------------------------------------===// bool CodeGenModule::supportsCOMDAT() const { return getTriple().supportsCOMDAT(); } const TargetCodeGenInfo &CodeGenModule::getTargetCodeGenInfo() { if (TheTargetCodeGenInfo) return *TheTargetCodeGenInfo; // Helper to set the unique_ptr while still keeping the return value. auto SetCGInfo = [&](TargetCodeGenInfo *P) -> const TargetCodeGenInfo & { this->TheTargetCodeGenInfo.reset(P); return *P; }; const llvm::Triple &Triple = getTarget().getTriple(); switch (Triple.getArch()) { default: return SetCGInfo(new DefaultTargetCodeGenInfo(Types)); case llvm::Triple::le32: return SetCGInfo(new PNaClTargetCodeGenInfo(Types)); case llvm::Triple::mips: case llvm::Triple::mipsel: if (Triple.getOS() == llvm::Triple::NaCl) return SetCGInfo(new PNaClTargetCodeGenInfo(Types)); return SetCGInfo(new MIPSTargetCodeGenInfo(Types, true)); case llvm::Triple::mips64: case llvm::Triple::mips64el: return SetCGInfo(new MIPSTargetCodeGenInfo(Types, false)); case llvm::Triple::avr: return SetCGInfo(new AVRTargetCodeGenInfo(Types)); case llvm::Triple::aarch64: case llvm::Triple::aarch64_32: case llvm::Triple::aarch64_be: { AArch64ABIInfo::ABIKind Kind = AArch64ABIInfo::AAPCS; if (getTarget().getABI() == "darwinpcs") Kind = AArch64ABIInfo::DarwinPCS; else if (Triple.isOSWindows()) return SetCGInfo( new WindowsAArch64TargetCodeGenInfo(Types, AArch64ABIInfo::Win64)); return SetCGInfo(new AArch64TargetCodeGenInfo(Types, Kind)); } case llvm::Triple::wasm32: case llvm::Triple::wasm64: { WebAssemblyABIInfo::ABIKind Kind = WebAssemblyABIInfo::MVP; if (getTarget().getABI() == "experimental-mv") Kind = WebAssemblyABIInfo::ExperimentalMV; return SetCGInfo(new WebAssemblyTargetCodeGenInfo(Types, Kind)); } case llvm::Triple::arm: case llvm::Triple::armeb: case llvm::Triple::thumb: case llvm::Triple::thumbeb: { if (Triple.getOS() == llvm::Triple::Win32) { return SetCGInfo( new WindowsARMTargetCodeGenInfo(Types, ARMABIInfo::AAPCS_VFP)); } ARMABIInfo::ABIKind Kind = ARMABIInfo::AAPCS; StringRef ABIStr = getTarget().getABI(); if (ABIStr == "apcs-gnu") Kind = ARMABIInfo::APCS; else if (ABIStr == "aapcs16") Kind = ARMABIInfo::AAPCS16_VFP; else if (CodeGenOpts.FloatABI == "hard" || (CodeGenOpts.FloatABI != "soft" && (Triple.getEnvironment() == llvm::Triple::GNUEABIHF || Triple.getEnvironment() == llvm::Triple::MuslEABIHF || Triple.getEnvironment() == llvm::Triple::EABIHF))) Kind = ARMABIInfo::AAPCS_VFP; return SetCGInfo(new ARMTargetCodeGenInfo(Types, Kind)); } case llvm::Triple::ppc: { if (Triple.isOSAIX()) return SetCGInfo(new AIXTargetCodeGenInfo(Types, /*Is64Bit*/ false)); bool IsSoftFloat = CodeGenOpts.FloatABI == "soft" || getTarget().hasFeature("spe"); bool RetSmallStructInRegABI = PPC32TargetCodeGenInfo::isStructReturnInRegABI(Triple, CodeGenOpts); return SetCGInfo( new PPC32TargetCodeGenInfo(Types, IsSoftFloat, RetSmallStructInRegABI)); } case llvm::Triple::ppc64: if (Triple.isOSAIX()) return SetCGInfo(new AIXTargetCodeGenInfo(Types, /*Is64Bit*/ true)); if (Triple.isOSBinFormatELF()) { PPC64_SVR4_ABIInfo::ABIKind Kind = PPC64_SVR4_ABIInfo::ELFv1; if (getTarget().getABI() == "elfv2") Kind = PPC64_SVR4_ABIInfo::ELFv2; bool HasQPX = getTarget().getABI() == "elfv1-qpx"; bool IsSoftFloat = CodeGenOpts.FloatABI == "soft"; return SetCGInfo(new PPC64_SVR4_TargetCodeGenInfo(Types, Kind, HasQPX, IsSoftFloat)); } return SetCGInfo(new PPC64TargetCodeGenInfo(Types)); case llvm::Triple::ppc64le: { assert(Triple.isOSBinFormatELF() && "PPC64 LE non-ELF not supported!"); PPC64_SVR4_ABIInfo::ABIKind Kind = PPC64_SVR4_ABIInfo::ELFv2; if (getTarget().getABI() == "elfv1" || getTarget().getABI() == "elfv1-qpx") Kind = PPC64_SVR4_ABIInfo::ELFv1; bool HasQPX = getTarget().getABI() == "elfv1-qpx"; bool IsSoftFloat = CodeGenOpts.FloatABI == "soft"; return SetCGInfo(new PPC64_SVR4_TargetCodeGenInfo(Types, Kind, HasQPX, IsSoftFloat)); } case llvm::Triple::nvptx: case llvm::Triple::nvptx64: return SetCGInfo(new NVPTXTargetCodeGenInfo(Types)); case llvm::Triple::msp430: return SetCGInfo(new MSP430TargetCodeGenInfo(Types)); case llvm::Triple::riscv32: case llvm::Triple::riscv64: { StringRef ABIStr = getTarget().getABI(); unsigned XLen = getTarget().getPointerWidth(0); unsigned ABIFLen = 0; if (ABIStr.endswith("f")) ABIFLen = 32; else if (ABIStr.endswith("d")) ABIFLen = 64; return SetCGInfo(new RISCVTargetCodeGenInfo(Types, XLen, ABIFLen)); } case llvm::Triple::systemz: { bool SoftFloat = CodeGenOpts.FloatABI == "soft"; bool HasVector = !SoftFloat && getTarget().getABI() == "vector"; return SetCGInfo(new SystemZTargetCodeGenInfo(Types, HasVector, SoftFloat)); } case llvm::Triple::tce: case llvm::Triple::tcele: return SetCGInfo(new TCETargetCodeGenInfo(Types)); case llvm::Triple::x86: { bool IsDarwinVectorABI = Triple.isOSDarwin(); bool RetSmallStructInRegABI = X86_32TargetCodeGenInfo::isStructReturnInRegABI(Triple, CodeGenOpts); bool IsWin32FloatStructABI = Triple.isOSWindows() && !Triple.isOSCygMing(); if (Triple.getOS() == llvm::Triple::Win32) { return SetCGInfo(new WinX86_32TargetCodeGenInfo( Types, IsDarwinVectorABI, RetSmallStructInRegABI, IsWin32FloatStructABI, CodeGenOpts.NumRegisterParameters)); } else { return SetCGInfo(new X86_32TargetCodeGenInfo( Types, IsDarwinVectorABI, RetSmallStructInRegABI, IsWin32FloatStructABI, CodeGenOpts.NumRegisterParameters, CodeGenOpts.FloatABI == "soft")); } } case llvm::Triple::x86_64: { StringRef ABI = getTarget().getABI(); X86AVXABILevel AVXLevel = (ABI == "avx512" ? X86AVXABILevel::AVX512 : ABI == "avx" ? X86AVXABILevel::AVX : X86AVXABILevel::None); switch (Triple.getOS()) { case llvm::Triple::Win32: return SetCGInfo(new WinX86_64TargetCodeGenInfo(Types, AVXLevel)); default: return SetCGInfo(new X86_64TargetCodeGenInfo(Types, AVXLevel)); } } case llvm::Triple::hexagon: return SetCGInfo(new HexagonTargetCodeGenInfo(Types)); case llvm::Triple::lanai: return SetCGInfo(new LanaiTargetCodeGenInfo(Types)); case llvm::Triple::r600: return SetCGInfo(new AMDGPUTargetCodeGenInfo(Types)); case llvm::Triple::amdgcn: return SetCGInfo(new AMDGPUTargetCodeGenInfo(Types)); case llvm::Triple::sparc: return SetCGInfo(new SparcV8TargetCodeGenInfo(Types)); case llvm::Triple::sparcv9: return SetCGInfo(new SparcV9TargetCodeGenInfo(Types)); case llvm::Triple::xcore: return SetCGInfo(new XCoreTargetCodeGenInfo(Types)); case llvm::Triple::arc: return SetCGInfo(new ARCTargetCodeGenInfo(Types)); case llvm::Triple::spir: case llvm::Triple::spir64: return SetCGInfo(new SPIRTargetCodeGenInfo(Types)); case llvm::Triple::ve: return SetCGInfo(new VETargetCodeGenInfo(Types)); } } /// Create an OpenCL kernel for an enqueued block. /// /// The kernel has the same function type as the block invoke function. Its /// name is the name of the block invoke function postfixed with "_kernel". /// It simply calls the block invoke function then returns. llvm::Function * TargetCodeGenInfo::createEnqueuedBlockKernel(CodeGenFunction &CGF, llvm::Function *Invoke, llvm::Value *BlockLiteral) const { auto *InvokeFT = Invoke->getFunctionType(); llvm::SmallVector ArgTys; for (auto &P : InvokeFT->params()) ArgTys.push_back(P); auto &C = CGF.getLLVMContext(); std::string Name = Invoke->getName().str() + "_kernel"; auto *FT = llvm::FunctionType::get(llvm::Type::getVoidTy(C), ArgTys, false); auto *F = llvm::Function::Create(FT, llvm::GlobalValue::InternalLinkage, Name, &CGF.CGM.getModule()); auto IP = CGF.Builder.saveIP(); auto *BB = llvm::BasicBlock::Create(C, "entry", F); auto &Builder = CGF.Builder; Builder.SetInsertPoint(BB); llvm::SmallVector Args; for (auto &A : F->args()) Args.push_back(&A); Builder.CreateCall(Invoke, Args); Builder.CreateRetVoid(); Builder.restoreIP(IP); return F; } /// Create an OpenCL kernel for an enqueued block. /// /// The type of the first argument (the block literal) is the struct type /// of the block literal instead of a pointer type. The first argument /// (block literal) is passed directly by value to the kernel. The kernel /// allocates the same type of struct on stack and stores the block literal /// to it and passes its pointer to the block invoke function. The kernel /// has "enqueued-block" function attribute and kernel argument metadata. llvm::Function *AMDGPUTargetCodeGenInfo::createEnqueuedBlockKernel( CodeGenFunction &CGF, llvm::Function *Invoke, llvm::Value *BlockLiteral) const { auto &Builder = CGF.Builder; auto &C = CGF.getLLVMContext(); auto *BlockTy = BlockLiteral->getType()->getPointerElementType(); auto *InvokeFT = Invoke->getFunctionType(); llvm::SmallVector ArgTys; llvm::SmallVector AddressQuals; llvm::SmallVector AccessQuals; llvm::SmallVector ArgTypeNames; llvm::SmallVector ArgBaseTypeNames; llvm::SmallVector ArgTypeQuals; llvm::SmallVector ArgNames; ArgTys.push_back(BlockTy); ArgTypeNames.push_back(llvm::MDString::get(C, "__block_literal")); AddressQuals.push_back(llvm::ConstantAsMetadata::get(Builder.getInt32(0))); ArgBaseTypeNames.push_back(llvm::MDString::get(C, "__block_literal")); ArgTypeQuals.push_back(llvm::MDString::get(C, "")); AccessQuals.push_back(llvm::MDString::get(C, "none")); ArgNames.push_back(llvm::MDString::get(C, "block_literal")); for (unsigned I = 1, E = InvokeFT->getNumParams(); I < E; ++I) { ArgTys.push_back(InvokeFT->getParamType(I)); ArgTypeNames.push_back(llvm::MDString::get(C, "void*")); AddressQuals.push_back(llvm::ConstantAsMetadata::get(Builder.getInt32(3))); AccessQuals.push_back(llvm::MDString::get(C, "none")); ArgBaseTypeNames.push_back(llvm::MDString::get(C, "void*")); ArgTypeQuals.push_back(llvm::MDString::get(C, "")); ArgNames.push_back( llvm::MDString::get(C, (Twine("local_arg") + Twine(I)).str())); } std::string Name = Invoke->getName().str() + "_kernel"; auto *FT = llvm::FunctionType::get(llvm::Type::getVoidTy(C), ArgTys, false); auto *F = llvm::Function::Create(FT, llvm::GlobalValue::InternalLinkage, Name, &CGF.CGM.getModule()); F->addFnAttr("enqueued-block"); auto IP = CGF.Builder.saveIP(); auto *BB = llvm::BasicBlock::Create(C, "entry", F); Builder.SetInsertPoint(BB); const auto BlockAlign = CGF.CGM.getDataLayout().getPrefTypeAlign(BlockTy); auto *BlockPtr = Builder.CreateAlloca(BlockTy, nullptr); BlockPtr->setAlignment(BlockAlign); Builder.CreateAlignedStore(F->arg_begin(), BlockPtr, BlockAlign); auto *Cast = Builder.CreatePointerCast(BlockPtr, InvokeFT->getParamType(0)); llvm::SmallVector Args; Args.push_back(Cast); for (auto I = F->arg_begin() + 1, E = F->arg_end(); I != E; ++I) Args.push_back(I); Builder.CreateCall(Invoke, Args); Builder.CreateRetVoid(); Builder.restoreIP(IP); F->setMetadata("kernel_arg_addr_space", llvm::MDNode::get(C, AddressQuals)); F->setMetadata("kernel_arg_access_qual", llvm::MDNode::get(C, AccessQuals)); F->setMetadata("kernel_arg_type", llvm::MDNode::get(C, ArgTypeNames)); F->setMetadata("kernel_arg_base_type", llvm::MDNode::get(C, ArgBaseTypeNames)); F->setMetadata("kernel_arg_type_qual", llvm::MDNode::get(C, ArgTypeQuals)); if (CGF.CGM.getCodeGenOpts().EmitOpenCLArgMetadata) F->setMetadata("kernel_arg_name", llvm::MDNode::get(C, ArgNames)); return F; } diff --git a/clang/test/CodeGen/aarch64-branch-protection-attr.c b/clang/test/CodeGen/aarch64-branch-protection-attr.c index ee761d6e8977..d694b619802d 100644 --- a/clang/test/CodeGen/aarch64-branch-protection-attr.c +++ b/clang/test/CodeGen/aarch64-branch-protection-attr.c @@ -1,81 +1,63 @@ // REQUIRES: aarch64-registered-target // RUN: %clang_cc1 -triple aarch64-unknown-unknown-eabi -emit-llvm -target-cpu generic -target-feature +v8.5a %s -o - \ -// RUN: | FileCheck %s --check-prefix=CHECK --check-prefix=NO-OVERRIDE -// RUN: %clang_cc1 -triple aarch64-unknown-unknown-eabi -emit-llvm -target-cpu generic -target-feature +v8.5a %s -o - \ -// RUN: -msign-return-address=non-leaf -msign-return-address-key=a_key -mbranch-target-enforce \ -// RUN: | FileCheck %s --check-prefix=CHECK --check-prefix=OVERRIDE - -void missing() {} -// NO-OVERRIDE: define void @missing() #[[#NONE:]] -// OVERRIDE: define void @missing() #[[#STD:]] +// RUN: | FileCheck %s --check-prefix=CHECK __attribute__ ((target("branch-protection=none"))) void none() {} -// NO-OVERRIDE: define void @none() #[[#NONE]] -// OVERRIDE: define void @none() #[[#NONE:]] +// CHECK: define void @none() #[[#NONE:]] __attribute__ ((target("branch-protection=standard"))) void std() {} -// NO-OVERRIDE: define void @std() #[[#STD:]] -// OVERRIDE: define void @std() #[[#STD]] +// CHECK: define void @std() #[[#STD:]] __attribute__ ((target("branch-protection=bti"))) void btionly() {} -// NO-OVERRIDE: define void @btionly() #[[#BTI:]] -// OVERRIDE: define void @btionly() #[[#BTI:]] +// CHECK: define void @btionly() #[[#BTI:]] __attribute__ ((target("branch-protection=pac-ret"))) void paconly() {} -// NO-OVERRIDE: define void @paconly() #[[#PAC:]] -// OVERRIDE: define void @paconly() #[[#PAC:]] +// CHECK: define void @paconly() #[[#PAC:]] __attribute__ ((target("branch-protection=pac-ret+bti"))) void pacbti0() {} -// NO-OVERRIDE: define void @pacbti0() #[[#PACBTI:]] -// OVERRIDE: define void @pacbti0() #[[#PACBTI:]] +// CHECK: define void @pacbti0() #[[#PACBTI:]] __attribute__ ((target("branch-protection=bti+pac-ret"))) void pacbti1() {} -// NO-OVERRIDE: define void @pacbti1() #[[#PACBTI]] -// OVERRIDE: define void @pacbti1() #[[#PACBTI]] +// CHECK: define void @pacbti1() #[[#PACBTI]] __attribute__ ((target("branch-protection=pac-ret+leaf"))) void leaf() {} -// NO-OVERRIDE: define void @leaf() #[[#PACLEAF:]] -// OVERRIDE: define void @leaf() #[[#PACLEAF:]] +// CHECK: define void @leaf() #[[#PACLEAF:]] __attribute__ ((target("branch-protection=pac-ret+b-key"))) void bkey() {} -// NO-OVERRIDE: define void @bkey() #[[#PACBKEY:]] -// OVERRIDE: define void @bkey() #[[#PACBKEY:]] +// CHECK: define void @bkey() #[[#PACBKEY:]] __attribute__ ((target("branch-protection=pac-ret+b-key+leaf"))) void bkeyleaf0() {} -// NO-OVERRIDE: define void @bkeyleaf0() #[[#PACBKEYLEAF:]] -// OVERRIDE: define void @bkeyleaf0() #[[#PACBKEYLEAF:]] +// CHECK: define void @bkeyleaf0() #[[#PACBKEYLEAF:]] __attribute__ ((target("branch-protection=pac-ret+leaf+b-key"))) void bkeyleaf1() {} -// NO-OVERRIDE: define void @bkeyleaf1() #[[#PACBKEYLEAF]] -// OVERRIDE: define void @bkeyleaf1() #[[#PACBKEYLEAF]] +// CHECK: define void @bkeyleaf1() #[[#PACBKEYLEAF]] __attribute__ ((target("branch-protection=pac-ret+leaf+bti"))) void btileaf() {} -// NO-OVERRIDE: define void @btileaf() #[[#BTIPACLEAF:]] -// OVERRIDE: define void @btileaf() #[[#BTIPACLEAF:]] +// CHECK: define void @btileaf() #[[#BTIPACLEAF:]] -// CHECK-DAG: attributes #[[#NONE]] +// CHECK-DAG: attributes #[[#NONE]] = { {{.*}} "branch-target-enforcement"="false" {{.*}} "sign-return-address"="none" -// CHECK-DAG: attributes #[[#STD]] = { {{.*}} "branch-target-enforcement" {{.*}} "sign-return-address"="non-leaf" "sign-return-address-key"="a_key" +// CHECK-DAG: attributes #[[#STD]] = { {{.*}} "branch-target-enforcement"="true" {{.*}} "sign-return-address"="non-leaf" "sign-return-address-key"="a_key" -// CHECK-DAG: attributes #[[#BTI]] = { {{.*}}"branch-target-enforcement" +// CHECK-DAG: attributes #[[#BTI]] = { {{.*}} "branch-target-enforcement"="true" {{.*}} "sign-return-address"="none" -// CHECK-DAG: attributes #[[#PAC]] = { {{.*}} "sign-return-address"="non-leaf" "sign-return-address-key"="a_key" +// CHECK-DAG: attributes #[[#PAC]] = { {{.*}} "branch-target-enforcement"="false" {{.*}} "sign-return-address"="non-leaf" "sign-return-address-key"="a_key" -// CHECK-DAG: attributes #[[#PACLEAF]] = { {{.*}} "sign-return-address"="all" "sign-return-address-key"="a_key" +// CHECK-DAG: attributes #[[#PACLEAF]] = { {{.*}} "branch-target-enforcement"="false" {{.*}}"sign-return-address"="all" "sign-return-address-key"="a_key" -// CHECK-DAG: attributes #[[#PACBKEY]] = { {{.*}} "sign-return-address"="non-leaf" "sign-return-address-key"="b_key" +// CHECK-DAG: attributes #[[#PACBKEY]] = { {{.*}}"branch-target-enforcement"="false" {{.*}} "sign-return-address"="non-leaf" "sign-return-address-key"="b_key" -// CHECK-DAG: attributes #[[#PACBKEYLEAF]] = { {{.*}} "sign-return-address"="all" "sign-return-address-key"="b_key" +// CHECK-DAG: attributes #[[#PACBKEYLEAF]] = { {{.*}} "branch-target-enforcement"="false" {{.*}}"sign-return-address"="all" "sign-return-address-key"="b_key" -// CHECK-DAG: attributes #[[#BTIPACLEAF]] = { {{.*}}"branch-target-enforcement" {{.*}} "sign-return-address"="all" "sign-return-address-key"="a_key" +// CHECK-DAG: attributes #[[#BTIPACLEAF]] = { {{.*}}"branch-target-enforcement"="true" {{.*}} "sign-return-address"="all" "sign-return-address-key"="a_key" diff --git a/clang/test/CodeGen/aarch64-sign-return-address.c b/clang/test/CodeGen/aarch64-sign-return-address.c index d062685e40c9..7dd7231e824a 100644 --- a/clang/test/CodeGen/aarch64-sign-return-address.c +++ b/clang/test/CodeGen/aarch64-sign-return-address.c @@ -1,27 +1,46 @@ -// RUN: %clang -target aarch64-arm-none-eabi -march=armv8.3-a -S -emit-llvm -o - -msign-return-address=none %s | FileCheck %s --check-prefix=CHECK --check-prefix=NONE -// RUN: %clang -target aarch64-arm-none-eabi -march=armv8.2-a -S -emit-llvm -o - -msign-return-address=all %s | FileCheck %s --check-prefix=CHECK --check-prefix=ALL --check-prefix=A-KEY -// RUN: %clang -target aarch64-arm-none-eabi -march=armv8.3-a -S -emit-llvm -o - -msign-return-address=all %s | FileCheck %s --check-prefix=CHECK --check-prefix=ALL --check-prefix=A-KEY -// RUN: %clang -target aarch64-arm-none-eabi -march=armv8.3-a -S -emit-llvm -o - -msign-return-address=non-leaf %s | FileCheck %s --check-prefix=CHECK --check-prefix=PARTIAL --check-prefix=A-KEY -// RUN: %clang -target aarch64-arm-none-eabi -march=armv8.3-a -S -emit-llvm -o - -msign-return-address=all %s | FileCheck %s --check-prefix=CHECK --check-prefix=ALL --check-prefix=A-KEY -// RUN: %clang -target aarch64-arm-none-eabi -march=armv8.4-a -S -emit-llvm -o - -msign-return-address=all %s | FileCheck %s --check-prefix=CHECK --check-prefix=ALL --check-prefix=A-KEY -// RUN: %clang -target aarch64-arm-none-eabi -march=armv8.3-a -S -emit-llvm -o - -mbranch-protection=pac-ret+b-key %s | FileCheck %s --check-prefix=CHECK --check-prefix=PARTIAL --check-prefix=B-KEY -// RUN: %clang -target aarch64-arm-none-eabi -march=armv8.3-a -S -emit-llvm -o - -mbranch-protection=pac-ret+b-key+leaf %s | FileCheck %s --check-prefix=CHECK --check-prefix=ALL --check-prefix=B-KEY -// RUN: %clang -target aarch64-arm-none-eabi -march=armv8.3-a -S -emit-llvm -o - -mbranch-protection=bti %s | FileCheck %s --check-prefix=CHECK --check-prefix=BTE +// RUN: %clang -target aarch64-arm-none-eabi -S -emit-llvm -o - -msign-return-address=none %s | FileCheck %s --check-prefix=CHECK --check-prefix=NONE +// RUN: %clang -target aarch64-arm-none-eabi -S -emit-llvm -o - -msign-return-address=all %s | FileCheck %s --check-prefix=CHECK --check-prefix=ALL +// RUN: %clang -target aarch64-arm-none-eabi -S -emit-llvm -o - -msign-return-address=non-leaf %s | FileCheck %s --check-prefix=CHECK --check-prefix=PART + +// RUN: %clang -target aarch64-arm-none-eabi -S -emit-llvm -o - -mbranch-protection=none %s | FileCheck %s --check-prefix=CHECK --check-prefix=NONE +// RUN: %clang -target aarch64-arm-none-eabi -S -emit-llvm -o - -mbranch-protection=pac-ret+leaf %s | FileCheck %s --check-prefix=CHECK --check-prefix=ALL +// RUN: %clang -target aarch64-arm-none-eabi -S -emit-llvm -o - -mbranch-protection=pac-ret+b-key %s | FileCheck %s --check-prefix=CHECK --check-prefix=B-KEY +// RUN: %clang -target aarch64-arm-none-eabi -S -emit-llvm -o - -mbranch-protection=bti %s | FileCheck %s --check-prefix=CHECK --check-prefix=BTE // REQUIRES: aarch64-registered-target -// CHECK: @foo() #[[ATTR:[0-9]*]] -// -// NONE-NOT: "sign-return-address"={{.*}} +// Check there are no branch protection function attributes + +// CHECK-LABEL: @foo() #[[#ATTR:]] + +// CHECK-NOT: attributes #[[#ATTR]] = { {{.*}} "sign-return-address" +// CHECK-NOT: attributes #[[#ATTR]] = { {{.*}} "sign-return-address-key" +// CHECK-NOT: attributes #[[#ATTR]] = { {{.*}} "branch-target-enforcement" -// PARTIAL: "sign-return-address"="non-leaf" +// Check module attributes -// ALL: "sign-return-address"="all" +// NONE: !{i32 1, !"branch-target-enforcement", i32 0} +// ALL: !{i32 1, !"branch-target-enforcement", i32 0} +// PART: !{i32 1, !"branch-target-enforcement", i32 0} +// BTE: !{i32 1, !"branch-target-enforcement", i32 1} +// B-KEY: !{i32 1, !"branch-target-enforcement", i32 0} -// BTE: "branch-target-enforcement" +// NONE: !{i32 1, !"sign-return-address", i32 0} +// ALL: !{i32 1, !"sign-return-address", i32 1} +// PART: !{i32 1, !"sign-return-address", i32 1} +// BTE: !{i32 1, !"sign-return-address", i32 0} +// B-KEY: !{i32 1, !"sign-return-address", i32 1} -// A-KEY: "sign-return-address-key"="a_key" +// NONE: !{i32 1, !"sign-return-address-all", i32 0} +// ALL: !{i32 1, !"sign-return-address-all", i32 1} +// PART: !{i32 1, !"sign-return-address-all", i32 0} +// BTE: !{i32 1, !"sign-return-address-all", i32 0} +// B-KEY: !{i32 1, !"sign-return-address-all", i32 0} -// B-KEY: "sign-return-address-key"="b_key" +// NONE: !{i32 1, !"sign-return-address-with-bkey", i32 0} +// ALL: !{i32 1, !"sign-return-address-with-bkey", i32 0} +// PART: !{i32 1, !"sign-return-address-with-bkey", i32 0} +// BTE: !{i32 1, !"sign-return-address-with-bkey", i32 0} +// B-KEY: !{i32 1, !"sign-return-address-with-bkey", i32 1} void foo() {} diff --git a/clang/test/CodeGenCXX/aarch64-sign-return-address-static-ctor.cpp b/clang/test/CodeGenCXX/aarch64-sign-return-address-static-ctor.cpp deleted file mode 100644 index 3971c22287f9..000000000000 --- a/clang/test/CodeGenCXX/aarch64-sign-return-address-static-ctor.cpp +++ /dev/null @@ -1,41 +0,0 @@ -// RUN: %clang -target aarch64-arm-none-eabi -S -emit-llvm -o - -msign-return-address=none %s | \ -// RUN: FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-NONE -// RUN: %clang -target aarch64-arm-none-eabi -S -emit-llvm -o - -msign-return-address=non-leaf %s | \ -// RUN: FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-PARTIAL --check-prefix=CHECK-A-KEY -// RUN: %clang -target aarch64-arm-none-eabi -S -emit-llvm -o - -msign-return-address=all %s | \ -// RUN: FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-ALL --check-prefix=CHECK-A-KEY - -// RUN: %clang -target aarch64-arm-none-eabi -S -emit-llvm -o - -mbranch-protection=none %s | \ -// RUN: FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-NONE -// RUN: %clang -target aarch64-arm-none-eabi -S -emit-llvm -o - -mbranch-protection=standard %s | \ -// RUN: FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-PARTIAL --check-prefix=CHECK-A-KEY --check-prefix=CHECK-BTE -// RUN: %clang -target aarch64-arm-none-eabi -S -emit-llvm -o - -mbranch-protection=pac-ret %s | \ -// RUN: FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-PARTIAL --check-prefix=CHECK-A-KEY -// RUN: %clang -target aarch64-arm-none-eabi -S -emit-llvm -o - -mbranch-protection=pac-ret+leaf %s | \ -// RUN: FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-ALL --check-prefix=CHECK-A-KEY -// RUN: %clang -target aarch64-arm-none-eabi -S -emit-llvm -o - -mbranch-protection=pac-ret+b-key %s | \ -// RUN: FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-PARTIAL --check-prefix=CHECK-B-KEY -// RUN: %clang -target aarch64-arm-none-eabi -S -emit-llvm -o - -mbranch-protection=pac-ret+b-key+leaf %s | \ -// RUN: FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-ALL --check-prefix=CHECK-B-KEY -// RUN: %clang -target aarch64-arm-none-eabi -S -emit-llvm -o - -mbranch-protection=bti %s | \ -// RUN: FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-BTE -// RUN: %clang -target aarch64-arm-none-eabi -S -emit-llvm -o - -mbranch-protection=pac-ret+b-key+leaf+bti %s | \ -// RUN: FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-ALL --check-prefix=CHECK-B-KEY --check-prefix=BTE - -struct Foo { - Foo() {} - ~Foo() {} -}; - -Foo f; - -// CHECK: @llvm.global_ctors {{.*}}i32 65535, void ()* @[[CTOR_FN:.*]], i8* null - -// CHECK: @[[CTOR_FN]]() #[[ATTR:[0-9]*]] - -// CHECK-NONE-NOT: "sign-return-address"={{.*}} -// CHECK-PARTIAL: "sign-return-address"="non-leaf" -// CHECK-ALL: "sign-return-address"="all" -// CHECK-A-KEY: "sign-return-address-key"="a_key" -// CHECK-B-KEY: "sign-return-address-key"="b_key" -// CHECK-BTE: "branch-target-enforcement" diff --git a/llvm/lib/Target/AArch64/AArch64BranchTargets.cpp b/llvm/lib/Target/AArch64/AArch64BranchTargets.cpp index 1956014b738d..d3b5166585c3 100644 --- a/llvm/lib/Target/AArch64/AArch64BranchTargets.cpp +++ b/llvm/lib/Target/AArch64/AArch64BranchTargets.cpp @@ -1,135 +1,136 @@ //===-- AArch64BranchTargets.cpp -- Harden code using v8.5-A BTI extension -==// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // // This pass inserts BTI instructions at the start of every function and basic // block which could be indirectly called. The hardware will (when enabled) // trap when an indirect branch or call instruction targets an instruction // which is not a valid BTI instruction. This is intended to guard against // control-flow hijacking attacks. Note that this does not do anything for RET // instructions, as they can be more precisely protected by return address // signing. // //===----------------------------------------------------------------------===// +#include "AArch64MachineFunctionInfo.h" #include "AArch64Subtarget.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineJumpTableInfo.h" #include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/Support/Debug.h" using namespace llvm; #define DEBUG_TYPE "aarch64-branch-targets" #define AARCH64_BRANCH_TARGETS_NAME "AArch64 Branch Targets" namespace { class AArch64BranchTargets : public MachineFunctionPass { public: static char ID; AArch64BranchTargets() : MachineFunctionPass(ID) {} void getAnalysisUsage(AnalysisUsage &AU) const override; bool runOnMachineFunction(MachineFunction &MF) override; StringRef getPassName() const override { return AARCH64_BRANCH_TARGETS_NAME; } private: void addBTI(MachineBasicBlock &MBB, bool CouldCall, bool CouldJump); }; } // end anonymous namespace char AArch64BranchTargets::ID = 0; INITIALIZE_PASS(AArch64BranchTargets, "aarch64-branch-targets", AARCH64_BRANCH_TARGETS_NAME, false, false) void AArch64BranchTargets::getAnalysisUsage(AnalysisUsage &AU) const { AU.setPreservesCFG(); MachineFunctionPass::getAnalysisUsage(AU); } FunctionPass *llvm::createAArch64BranchTargetsPass() { return new AArch64BranchTargets(); } bool AArch64BranchTargets::runOnMachineFunction(MachineFunction &MF) { - const Function &F = MF.getFunction(); - if (!F.hasFnAttribute("branch-target-enforcement")) + if (!MF.getInfo()->branchTargetEnforcement()) return false; LLVM_DEBUG( dbgs() << "********** AArch64 Branch Targets **********\n" << "********** Function: " << MF.getName() << '\n'); + const Function &F = MF.getFunction(); // LLVM does not consider basic blocks which are the targets of jump tables // to be address-taken (the address can't escape anywhere else), but they are // used for indirect branches, so need BTI instructions. SmallPtrSet JumpTableTargets; if (auto *JTI = MF.getJumpTableInfo()) for (auto &JTE : JTI->getJumpTables()) for (auto *MBB : JTE.MBBs) JumpTableTargets.insert(MBB); bool MadeChange = false; for (MachineBasicBlock &MBB : MF) { bool CouldCall = false, CouldJump = false; // If the function is address-taken or externally-visible, it could be // indirectly called. PLT entries and tail-calls use BR, but when they are // are in guarded pages should all use x16 or x17 to hold the called // address, so we don't need to set CouldJump here. BR instructions in // non-guarded pages (which might be non-BTI-aware code) are allowed to // branch to a "BTI c" using any register. if (&MBB == &*MF.begin() && (F.hasAddressTaken() || !F.hasLocalLinkage())) CouldCall = true; // If the block itself is address-taken, it could be indirectly branched // to, but not called. if (MBB.hasAddressTaken() || JumpTableTargets.count(&MBB)) CouldJump = true; if (CouldCall || CouldJump) { addBTI(MBB, CouldCall, CouldJump); MadeChange = true; } } return MadeChange; } void AArch64BranchTargets::addBTI(MachineBasicBlock &MBB, bool CouldCall, bool CouldJump) { LLVM_DEBUG(dbgs() << "Adding BTI " << (CouldJump ? "j" : "") << (CouldCall ? "c" : "") << " to " << MBB.getName() << "\n"); const AArch64InstrInfo *TII = static_cast( MBB.getParent()->getSubtarget().getInstrInfo()); unsigned HintNum = 32; if (CouldCall) HintNum |= 2; if (CouldJump) HintNum |= 4; assert(HintNum != 32 && "No target kinds!"); auto MBBI = MBB.begin(); // Skip the meta instuctions, those will be removed anyway. for (; MBBI != MBB.end() && MBBI->isMetaInstruction(); ++MBBI) ; // SCTLR_EL1.BT[01] is set to 0 by default which means // PACI[AB]SP are implicitly BTI C so no BTI C instruction is needed there. if (MBBI != MBB.end() && HintNum == 34 && (MBBI->getOpcode() == AArch64::PACIASP || MBBI->getOpcode() == AArch64::PACIBSP)) return; BuildMI(MBB, MBB.begin(), MBB.findDebugLoc(MBB.begin()), TII->get(AArch64::HINT)) .addImm(HintNum); } diff --git a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp index 625415e93d9c..7f4498da317c 100644 --- a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp @@ -1,3259 +1,3225 @@ //===- AArch64FrameLowering.cpp - AArch64 Frame Lowering -------*- C++ -*-====// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // // This file contains the AArch64 implementation of TargetFrameLowering class. // // On AArch64, stack frames are structured as follows: // // The stack grows downward. // // All of the individual frame areas on the frame below are optional, i.e. it's // possible to create a function so that the particular area isn't present // in the frame. // // At function entry, the "frame" looks as follows: // // | | Higher address // |-----------------------------------| // | | // | arguments passed on the stack | // | | // |-----------------------------------| <- sp // | | Lower address // // // After the prologue has run, the frame has the following general structure. // Note that this doesn't depict the case where a red-zone is used. Also, // technically the last frame area (VLAs) doesn't get created until in the // main function body, after the prologue is run. However, it's depicted here // for completeness. // // | | Higher address // |-----------------------------------| // | | // | arguments passed on the stack | // | | // |-----------------------------------| // | | // | (Win64 only) varargs from reg | // | | // |-----------------------------------| // | | // | callee-saved gpr registers | <--. // | | | On Darwin platforms these // |- - - - - - - - - - - - - - - - - -| | callee saves are swapped, // | | | (frame record first) // | prev_fp, prev_lr | <--' // | (a.k.a. "frame record") | // |-----------------------------------| <- fp(=x29) // | | // | callee-saved fp/simd/SVE regs | // | | // |-----------------------------------| // | | // | SVE stack objects | // | | // |-----------------------------------| // |.empty.space.to.make.part.below....| // |.aligned.in.case.it.needs.more.than| (size of this area is unknown at // |.the.standard.16-byte.alignment....| compile time; if present) // |-----------------------------------| // | | // | local variables of fixed size | // | including spill slots | // |-----------------------------------| <- bp(not defined by ABI, // |.variable-sized.local.variables....| LLVM chooses X19) // |.(VLAs)............................| (size of this area is unknown at // |...................................| compile time) // |-----------------------------------| <- sp // | | Lower address // // // To access the data in a frame, at-compile time, a constant offset must be // computable from one of the pointers (fp, bp, sp) to access it. The size // of the areas with a dotted background cannot be computed at compile-time // if they are present, making it required to have all three of fp, bp and // sp to be set up to be able to access all contents in the frame areas, // assuming all of the frame areas are non-empty. // // For most functions, some of the frame areas are empty. For those functions, // it may not be necessary to set up fp or bp: // * A base pointer is definitely needed when there are both VLAs and local // variables with more-than-default alignment requirements. // * A frame pointer is definitely needed when there are local variables with // more-than-default alignment requirements. // // For Darwin platforms the frame-record (fp, lr) is stored at the top of the // callee-saved area, since the unwind encoding does not allow for encoding // this dynamically and existing tools depend on this layout. For other // platforms, the frame-record is stored at the bottom of the (gpr) callee-saved // area to allow SVE stack objects (allocated directly below the callee-saves, // if available) to be accessed directly from the framepointer. // The SVE spill/fill instructions have VL-scaled addressing modes such // as: // ldr z8, [fp, #-7 mul vl] // For SVE the size of the vector length (VL) is not known at compile-time, so // '#-7 mul vl' is an offset that can only be evaluated at runtime. With this // layout, we don't need to add an unscaled offset to the framepointer before // accessing the SVE object in the frame. // // In some cases when a base pointer is not strictly needed, it is generated // anyway when offsets from the frame pointer to access local variables become // so large that the offset can't be encoded in the immediate fields of loads // or stores. // // FIXME: also explain the redzone concept. // FIXME: also explain the concept of reserved call frames. // //===----------------------------------------------------------------------===// #include "AArch64FrameLowering.h" #include "AArch64InstrInfo.h" #include "AArch64MachineFunctionInfo.h" #include "AArch64RegisterInfo.h" #include "AArch64StackOffset.h" #include "AArch64Subtarget.h" #include "AArch64TargetMachine.h" #include "MCTargetDesc/AArch64AddressingModes.h" #include "llvm/ADT/ScopeExit.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" #include "llvm/CodeGen/LivePhysRegs.h" #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineMemOperand.h" #include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/CodeGen/MachineOperand.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/RegisterScavenging.h" #include "llvm/CodeGen/TargetInstrInfo.h" #include "llvm/CodeGen/TargetRegisterInfo.h" #include "llvm/CodeGen/TargetSubtargetInfo.h" #include "llvm/CodeGen/WinEHFuncInfo.h" #include "llvm/IR/Attributes.h" #include "llvm/IR/CallingConv.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/DebugLoc.h" #include "llvm/IR/Function.h" #include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCDwarf.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/LEB128.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetOptions.h" #include #include #include #include using namespace llvm; #define DEBUG_TYPE "frame-info" static cl::opt EnableRedZone("aarch64-redzone", cl::desc("enable use of redzone on AArch64"), cl::init(false), cl::Hidden); static cl::opt ReverseCSRRestoreSeq("reverse-csr-restore-seq", cl::desc("reverse the CSR restore sequence"), cl::init(false), cl::Hidden); static cl::opt StackTaggingMergeSetTag( "stack-tagging-merge-settag", cl::desc("merge settag instruction in function epilog"), cl::init(true), cl::Hidden); STATISTIC(NumRedZoneFunctions, "Number of functions using red zone"); /// Returns the argument pop size. static uint64_t getArgumentPopSize(MachineFunction &MF, MachineBasicBlock &MBB) { MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr(); bool IsTailCallReturn = false; if (MBB.end() != MBBI) { unsigned RetOpcode = MBBI->getOpcode(); IsTailCallReturn = RetOpcode == AArch64::TCRETURNdi || RetOpcode == AArch64::TCRETURNri || RetOpcode == AArch64::TCRETURNriBTI; } AArch64FunctionInfo *AFI = MF.getInfo(); uint64_t ArgumentPopSize = 0; if (IsTailCallReturn) { MachineOperand &StackAdjust = MBBI->getOperand(1); // For a tail-call in a callee-pops-arguments environment, some or all of // the stack may actually be in use for the call's arguments, this is // calculated during LowerCall and consumed here... ArgumentPopSize = StackAdjust.getImm(); } else { // ... otherwise the amount to pop is *all* of the argument space, // conveniently stored in the MachineFunctionInfo by // LowerFormalArguments. This will, of course, be zero for the C calling // convention. ArgumentPopSize = AFI->getArgumentStackToRestore(); } return ArgumentPopSize; } /// This is the biggest offset to the stack pointer we can encode in aarch64 /// instructions (without using a separate calculation and a temp register). /// Note that the exception here are vector stores/loads which cannot encode any /// displacements (see estimateRSStackSizeLimit(), isAArch64FrameOffsetLegal()). static const unsigned DefaultSafeSPDisplacement = 255; /// Look at each instruction that references stack frames and return the stack /// size limit beyond which some of these instructions will require a scratch /// register during their expansion later. static unsigned estimateRSStackSizeLimit(MachineFunction &MF) { // FIXME: For now, just conservatively guestimate based on unscaled indexing // range. We'll end up allocating an unnecessary spill slot a lot, but // realistically that's not a big deal at this stage of the game. for (MachineBasicBlock &MBB : MF) { for (MachineInstr &MI : MBB) { if (MI.isDebugInstr() || MI.isPseudo() || MI.getOpcode() == AArch64::ADDXri || MI.getOpcode() == AArch64::ADDSXri) continue; for (const MachineOperand &MO : MI.operands()) { if (!MO.isFI()) continue; StackOffset Offset; if (isAArch64FrameOffsetLegal(MI, Offset, nullptr, nullptr, nullptr) == AArch64FrameOffsetCannotUpdate) return 0; } } } return DefaultSafeSPDisplacement; } TargetStackID::Value AArch64FrameLowering::getStackIDForScalableVectors() const { return TargetStackID::SVEVector; } /// Returns the size of the fixed object area (allocated next to sp on entry) /// On Win64 this may include a var args area and an UnwindHelp object for EH. static unsigned getFixedObjectSize(const MachineFunction &MF, const AArch64FunctionInfo *AFI, bool IsWin64, bool IsFunclet) { if (!IsWin64 || IsFunclet) { // Only Win64 uses fixed objects, and then only for the function (not // funclets) return 0; } else { // Var args are stored here in the primary function. const unsigned VarArgsArea = AFI->getVarArgsGPRSize(); // To support EH funclets we allocate an UnwindHelp object const unsigned UnwindHelpObject = (MF.hasEHFunclets() ? 8 : 0); return alignTo(VarArgsArea + UnwindHelpObject, 16); } } /// Returns the size of the entire SVE stackframe (calleesaves + spills). static StackOffset getSVEStackSize(const MachineFunction &MF) { const AArch64FunctionInfo *AFI = MF.getInfo(); return {(int64_t)AFI->getStackSizeSVE(), MVT::nxv1i8}; } bool AArch64FrameLowering::canUseRedZone(const MachineFunction &MF) const { if (!EnableRedZone) return false; // Don't use the red zone if the function explicitly asks us not to. // This is typically used for kernel code. if (MF.getFunction().hasFnAttribute(Attribute::NoRedZone)) return false; const MachineFrameInfo &MFI = MF.getFrameInfo(); const AArch64FunctionInfo *AFI = MF.getInfo(); uint64_t NumBytes = AFI->getLocalStackSize(); return !(MFI.hasCalls() || hasFP(MF) || NumBytes > 128 || getSVEStackSize(MF)); } /// hasFP - Return true if the specified function should have a dedicated frame /// pointer register. bool AArch64FrameLowering::hasFP(const MachineFunction &MF) const { const MachineFrameInfo &MFI = MF.getFrameInfo(); const TargetRegisterInfo *RegInfo = MF.getSubtarget().getRegisterInfo(); // Win64 EH requires a frame pointer if funclets are present, as the locals // are accessed off the frame pointer in both the parent function and the // funclets. if (MF.hasEHFunclets()) return true; // Retain behavior of always omitting the FP for leaf functions when possible. if (MF.getTarget().Options.DisableFramePointerElim(MF)) return true; if (MFI.hasVarSizedObjects() || MFI.isFrameAddressTaken() || MFI.hasStackMap() || MFI.hasPatchPoint() || RegInfo->needsStackRealignment(MF)) return true; // With large callframes around we may need to use FP to access the scavenging // emergency spillslot. // // Unfortunately some calls to hasFP() like machine verifier -> // getReservedReg() -> hasFP in the middle of global isel are too early // to know the max call frame size. Hopefully conservatively returning "true" // in those cases is fine. // DefaultSafeSPDisplacement is fine as we only emergency spill GP regs. if (!MFI.isMaxCallFrameSizeComputed() || MFI.getMaxCallFrameSize() > DefaultSafeSPDisplacement) return true; return false; } /// hasReservedCallFrame - Under normal circumstances, when a frame pointer is /// not required, we reserve argument space for call sites in the function /// immediately on entry to the current function. This eliminates the need for /// add/sub sp brackets around call sites. Returns true if the call frame is /// included as part of the stack frame. bool AArch64FrameLowering::hasReservedCallFrame(const MachineFunction &MF) const { return !MF.getFrameInfo().hasVarSizedObjects(); } MachineBasicBlock::iterator AArch64FrameLowering::eliminateCallFramePseudoInstr( MachineFunction &MF, MachineBasicBlock &MBB, MachineBasicBlock::iterator I) const { const AArch64InstrInfo *TII = static_cast(MF.getSubtarget().getInstrInfo()); DebugLoc DL = I->getDebugLoc(); unsigned Opc = I->getOpcode(); bool IsDestroy = Opc == TII->getCallFrameDestroyOpcode(); uint64_t CalleePopAmount = IsDestroy ? I->getOperand(1).getImm() : 0; if (!hasReservedCallFrame(MF)) { int64_t Amount = I->getOperand(0).getImm(); Amount = alignTo(Amount, getStackAlign()); if (!IsDestroy) Amount = -Amount; // N.b. if CalleePopAmount is valid but zero (i.e. callee would pop, but it // doesn't have to pop anything), then the first operand will be zero too so // this adjustment is a no-op. if (CalleePopAmount == 0) { // FIXME: in-function stack adjustment for calls is limited to 24-bits // because there's no guaranteed temporary register available. // // ADD/SUB (immediate) has only LSL #0 and LSL #12 available. // 1) For offset <= 12-bit, we use LSL #0 // 2) For 12-bit <= offset <= 24-bit, we use two instructions. One uses // LSL #0, and the other uses LSL #12. // // Most call frames will be allocated at the start of a function so // this is OK, but it is a limitation that needs dealing with. assert(Amount > -0xffffff && Amount < 0xffffff && "call frame too large"); emitFrameOffset(MBB, I, DL, AArch64::SP, AArch64::SP, {Amount, MVT::i8}, TII); } } else if (CalleePopAmount != 0) { // If the calling convention demands that the callee pops arguments from the // stack, we want to add it back if we have a reserved call frame. assert(CalleePopAmount < 0xffffff && "call frame too large"); emitFrameOffset(MBB, I, DL, AArch64::SP, AArch64::SP, {-(int64_t)CalleePopAmount, MVT::i8}, TII); } return MBB.erase(I); } -static bool ShouldSignReturnAddress(MachineFunction &MF) { - // The function should be signed in the following situations: - // - sign-return-address=all - // - sign-return-address=non-leaf and the functions spills the LR - - const Function &F = MF.getFunction(); - if (!F.hasFnAttribute("sign-return-address")) - return false; - - StringRef Scope = F.getFnAttribute("sign-return-address").getValueAsString(); - if (Scope.equals("none")) - return false; - - if (Scope.equals("all")) - return true; - - assert(Scope.equals("non-leaf") && "Expected all, none or non-leaf"); - - for (const auto &Info : MF.getFrameInfo().getCalleeSavedInfo()) - if (Info.getReg() == AArch64::LR) - return true; - - return false; -} - // Convenience function to create a DWARF expression for // Expr + NumBytes + NumVGScaledBytes * AArch64::VG static void appendVGScaledOffsetExpr(SmallVectorImpl &Expr, int NumBytes, int NumVGScaledBytes, unsigned VG, llvm::raw_string_ostream &Comment) { uint8_t buffer[16]; if (NumBytes) { Expr.push_back(dwarf::DW_OP_consts); Expr.append(buffer, buffer + encodeSLEB128(NumBytes, buffer)); Expr.push_back((uint8_t)dwarf::DW_OP_plus); Comment << (NumBytes < 0 ? " - " : " + ") << std::abs(NumBytes); } if (NumVGScaledBytes) { Expr.push_back((uint8_t)dwarf::DW_OP_consts); Expr.append(buffer, buffer + encodeSLEB128(NumVGScaledBytes, buffer)); Expr.push_back((uint8_t)dwarf::DW_OP_bregx); Expr.append(buffer, buffer + encodeULEB128(VG, buffer)); Expr.push_back(0); Expr.push_back((uint8_t)dwarf::DW_OP_mul); Expr.push_back((uint8_t)dwarf::DW_OP_plus); Comment << (NumVGScaledBytes < 0 ? " - " : " + ") << std::abs(NumVGScaledBytes) << " * VG"; } } // Creates an MCCFIInstruction: // { DW_CFA_def_cfa_expression, ULEB128 (sizeof expr), expr } MCCFIInstruction AArch64FrameLowering::createDefCFAExpressionFromSP( const TargetRegisterInfo &TRI, const StackOffset &OffsetFromSP) const { int64_t NumBytes, NumVGScaledBytes; OffsetFromSP.getForDwarfOffset(NumBytes, NumVGScaledBytes); std::string CommentBuffer = "sp"; llvm::raw_string_ostream Comment(CommentBuffer); // Build up the expression (SP + NumBytes + NumVGScaledBytes * AArch64::VG) SmallString<64> Expr; Expr.push_back((uint8_t)(dwarf::DW_OP_breg0 + /*SP*/ 31)); Expr.push_back(0); appendVGScaledOffsetExpr(Expr, NumBytes, NumVGScaledBytes, TRI.getDwarfRegNum(AArch64::VG, true), Comment); // Wrap this into DW_CFA_def_cfa. SmallString<64> DefCfaExpr; DefCfaExpr.push_back(dwarf::DW_CFA_def_cfa_expression); uint8_t buffer[16]; DefCfaExpr.append(buffer, buffer + encodeULEB128(Expr.size(), buffer)); DefCfaExpr.append(Expr.str()); return MCCFIInstruction::createEscape(nullptr, DefCfaExpr.str(), Comment.str()); } MCCFIInstruction AArch64FrameLowering::createCfaOffset( const TargetRegisterInfo &TRI, unsigned Reg, const StackOffset &OffsetFromDefCFA) const { int64_t NumBytes, NumVGScaledBytes; OffsetFromDefCFA.getForDwarfOffset(NumBytes, NumVGScaledBytes); unsigned DwarfReg = TRI.getDwarfRegNum(Reg, true); // Non-scalable offsets can use DW_CFA_offset directly. if (!NumVGScaledBytes) return MCCFIInstruction::createOffset(nullptr, DwarfReg, NumBytes); std::string CommentBuffer; llvm::raw_string_ostream Comment(CommentBuffer); Comment << printReg(Reg, &TRI) << " @ cfa"; // Build up expression (NumBytes + NumVGScaledBytes * AArch64::VG) SmallString<64> OffsetExpr; appendVGScaledOffsetExpr(OffsetExpr, NumBytes, NumVGScaledBytes, TRI.getDwarfRegNum(AArch64::VG, true), Comment); // Wrap this into DW_CFA_expression SmallString<64> CfaExpr; CfaExpr.push_back(dwarf::DW_CFA_expression); uint8_t buffer[16]; CfaExpr.append(buffer, buffer + encodeULEB128(DwarfReg, buffer)); CfaExpr.append(buffer, buffer + encodeULEB128(OffsetExpr.size(), buffer)); CfaExpr.append(OffsetExpr.str()); return MCCFIInstruction::createEscape(nullptr, CfaExpr.str(), Comment.str()); } void AArch64FrameLowering::emitCalleeSavedFrameMoves( MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI) const { MachineFunction &MF = *MBB.getParent(); MachineFrameInfo &MFI = MF.getFrameInfo(); const TargetSubtargetInfo &STI = MF.getSubtarget(); const TargetRegisterInfo *TRI = STI.getRegisterInfo(); const TargetInstrInfo *TII = STI.getInstrInfo(); DebugLoc DL = MBB.findDebugLoc(MBBI); // Add callee saved registers to move list. const std::vector &CSI = MFI.getCalleeSavedInfo(); if (CSI.empty()) return; for (const auto &Info : CSI) { unsigned Reg = Info.getReg(); // Not all unwinders may know about SVE registers, so assume the lowest // common demoninator. unsigned NewReg; if (static_cast(TRI)->regNeedsCFI(Reg, NewReg)) Reg = NewReg; else continue; StackOffset Offset; if (MFI.getStackID(Info.getFrameIdx()) == TargetStackID::SVEVector) { AArch64FunctionInfo *AFI = MF.getInfo(); Offset = StackOffset(MFI.getObjectOffset(Info.getFrameIdx()), MVT::nxv1i8) - StackOffset(AFI->getCalleeSavedStackSize(MFI), MVT::i8); } else { Offset = {MFI.getObjectOffset(Info.getFrameIdx()) - getOffsetOfLocalArea(), MVT::i8}; } unsigned CFIIndex = MF.addFrameInst(createCfaOffset(*TRI, Reg, Offset)); BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION)) .addCFIIndex(CFIIndex) .setMIFlags(MachineInstr::FrameSetup); } } // Find a scratch register that we can use at the start of the prologue to // re-align the stack pointer. We avoid using callee-save registers since they // may appear to be free when this is called from canUseAsPrologue (during // shrink wrapping), but then no longer be free when this is called from // emitPrologue. // // FIXME: This is a bit conservative, since in the above case we could use one // of the callee-save registers as a scratch temp to re-align the stack pointer, // but we would then have to make sure that we were in fact saving at least one // callee-save register in the prologue, which is additional complexity that // doesn't seem worth the benefit. static unsigned findScratchNonCalleeSaveRegister(MachineBasicBlock *MBB) { MachineFunction *MF = MBB->getParent(); // If MBB is an entry block, use X9 as the scratch register if (&MF->front() == MBB) return AArch64::X9; const AArch64Subtarget &Subtarget = MF->getSubtarget(); const AArch64RegisterInfo &TRI = *Subtarget.getRegisterInfo(); LivePhysRegs LiveRegs(TRI); LiveRegs.addLiveIns(*MBB); // Mark callee saved registers as used so we will not choose them. const MCPhysReg *CSRegs = MF->getRegInfo().getCalleeSavedRegs(); for (unsigned i = 0; CSRegs[i]; ++i) LiveRegs.addReg(CSRegs[i]); // Prefer X9 since it was historically used for the prologue scratch reg. const MachineRegisterInfo &MRI = MF->getRegInfo(); if (LiveRegs.available(MRI, AArch64::X9)) return AArch64::X9; for (unsigned Reg : AArch64::GPR64RegClass) { if (LiveRegs.available(MRI, Reg)) return Reg; } return AArch64::NoRegister; } bool AArch64FrameLowering::canUseAsPrologue( const MachineBasicBlock &MBB) const { const MachineFunction *MF = MBB.getParent(); MachineBasicBlock *TmpMBB = const_cast(&MBB); const AArch64Subtarget &Subtarget = MF->getSubtarget(); const AArch64RegisterInfo *RegInfo = Subtarget.getRegisterInfo(); // Don't need a scratch register if we're not going to re-align the stack. if (!RegInfo->needsStackRealignment(*MF)) return true; // Otherwise, we can use any block as long as it has a scratch register // available. return findScratchNonCalleeSaveRegister(TmpMBB) != AArch64::NoRegister; } static bool windowsRequiresStackProbe(MachineFunction &MF, uint64_t StackSizeInBytes) { const AArch64Subtarget &Subtarget = MF.getSubtarget(); if (!Subtarget.isTargetWindows()) return false; const Function &F = MF.getFunction(); // TODO: When implementing stack protectors, take that into account // for the probe threshold. unsigned StackProbeSize = 4096; if (F.hasFnAttribute("stack-probe-size")) F.getFnAttribute("stack-probe-size") .getValueAsString() .getAsInteger(0, StackProbeSize); return (StackSizeInBytes >= StackProbeSize) && !F.hasFnAttribute("no-stack-arg-probe"); } bool AArch64FrameLowering::shouldCombineCSRLocalStackBump( MachineFunction &MF, uint64_t StackBumpBytes) const { AArch64FunctionInfo *AFI = MF.getInfo(); const MachineFrameInfo &MFI = MF.getFrameInfo(); const AArch64Subtarget &Subtarget = MF.getSubtarget(); const AArch64RegisterInfo *RegInfo = Subtarget.getRegisterInfo(); if (AFI->getLocalStackSize() == 0) return false; // 512 is the maximum immediate for stp/ldp that will be used for // callee-save save/restores if (StackBumpBytes >= 512 || windowsRequiresStackProbe(MF, StackBumpBytes)) return false; if (MFI.hasVarSizedObjects()) return false; if (RegInfo->needsStackRealignment(MF)) return false; // This isn't strictly necessary, but it simplifies things a bit since the // current RedZone handling code assumes the SP is adjusted by the // callee-save save/restore code. if (canUseRedZone(MF)) return false; // When there is an SVE area on the stack, always allocate the // callee-saves and spills/locals separately. if (getSVEStackSize(MF)) return false; return true; } bool AArch64FrameLowering::shouldCombineCSRLocalStackBumpInEpilogue( MachineBasicBlock &MBB, unsigned StackBumpBytes) const { if (!shouldCombineCSRLocalStackBump(*MBB.getParent(), StackBumpBytes)) return false; if (MBB.empty()) return true; // Disable combined SP bump if the last instruction is an MTE tag store. It // is almost always better to merge SP adjustment into those instructions. MachineBasicBlock::iterator LastI = MBB.getFirstTerminator(); MachineBasicBlock::iterator Begin = MBB.begin(); while (LastI != Begin) { --LastI; if (LastI->isTransient()) continue; if (!LastI->getFlag(MachineInstr::FrameDestroy)) break; } switch (LastI->getOpcode()) { case AArch64::STGloop: case AArch64::STZGloop: case AArch64::STGOffset: case AArch64::STZGOffset: case AArch64::ST2GOffset: case AArch64::STZ2GOffset: return false; default: return true; } llvm_unreachable("unreachable"); } // Given a load or a store instruction, generate an appropriate unwinding SEH // code on Windows. static MachineBasicBlock::iterator InsertSEH(MachineBasicBlock::iterator MBBI, const TargetInstrInfo &TII, MachineInstr::MIFlag Flag) { unsigned Opc = MBBI->getOpcode(); MachineBasicBlock *MBB = MBBI->getParent(); MachineFunction &MF = *MBB->getParent(); DebugLoc DL = MBBI->getDebugLoc(); unsigned ImmIdx = MBBI->getNumOperands() - 1; int Imm = MBBI->getOperand(ImmIdx).getImm(); MachineInstrBuilder MIB; const AArch64Subtarget &Subtarget = MF.getSubtarget(); const AArch64RegisterInfo *RegInfo = Subtarget.getRegisterInfo(); switch (Opc) { default: llvm_unreachable("No SEH Opcode for this instruction"); case AArch64::LDPDpost: Imm = -Imm; LLVM_FALLTHROUGH; case AArch64::STPDpre: { unsigned Reg0 = RegInfo->getSEHRegNum(MBBI->getOperand(1).getReg()); unsigned Reg1 = RegInfo->getSEHRegNum(MBBI->getOperand(2).getReg()); MIB = BuildMI(MF, DL, TII.get(AArch64::SEH_SaveFRegP_X)) .addImm(Reg0) .addImm(Reg1) .addImm(Imm * 8) .setMIFlag(Flag); break; } case AArch64::LDPXpost: Imm = -Imm; LLVM_FALLTHROUGH; case AArch64::STPXpre: { Register Reg0 = MBBI->getOperand(1).getReg(); Register Reg1 = MBBI->getOperand(2).getReg(); if (Reg0 == AArch64::FP && Reg1 == AArch64::LR) MIB = BuildMI(MF, DL, TII.get(AArch64::SEH_SaveFPLR_X)) .addImm(Imm * 8) .setMIFlag(Flag); else MIB = BuildMI(MF, DL, TII.get(AArch64::SEH_SaveRegP_X)) .addImm(RegInfo->getSEHRegNum(Reg0)) .addImm(RegInfo->getSEHRegNum(Reg1)) .addImm(Imm * 8) .setMIFlag(Flag); break; } case AArch64::LDRDpost: Imm = -Imm; LLVM_FALLTHROUGH; case AArch64::STRDpre: { unsigned Reg = RegInfo->getSEHRegNum(MBBI->getOperand(1).getReg()); MIB = BuildMI(MF, DL, TII.get(AArch64::SEH_SaveFReg_X)) .addImm(Reg) .addImm(Imm) .setMIFlag(Flag); break; } case AArch64::LDRXpost: Imm = -Imm; LLVM_FALLTHROUGH; case AArch64::STRXpre: { unsigned Reg = RegInfo->getSEHRegNum(MBBI->getOperand(1).getReg()); MIB = BuildMI(MF, DL, TII.get(AArch64::SEH_SaveReg_X)) .addImm(Reg) .addImm(Imm) .setMIFlag(Flag); break; } case AArch64::STPDi: case AArch64::LDPDi: { unsigned Reg0 = RegInfo->getSEHRegNum(MBBI->getOperand(0).getReg()); unsigned Reg1 = RegInfo->getSEHRegNum(MBBI->getOperand(1).getReg()); MIB = BuildMI(MF, DL, TII.get(AArch64::SEH_SaveFRegP)) .addImm(Reg0) .addImm(Reg1) .addImm(Imm * 8) .setMIFlag(Flag); break; } case AArch64::STPXi: case AArch64::LDPXi: { Register Reg0 = MBBI->getOperand(0).getReg(); Register Reg1 = MBBI->getOperand(1).getReg(); if (Reg0 == AArch64::FP && Reg1 == AArch64::LR) MIB = BuildMI(MF, DL, TII.get(AArch64::SEH_SaveFPLR)) .addImm(Imm * 8) .setMIFlag(Flag); else MIB = BuildMI(MF, DL, TII.get(AArch64::SEH_SaveRegP)) .addImm(RegInfo->getSEHRegNum(Reg0)) .addImm(RegInfo->getSEHRegNum(Reg1)) .addImm(Imm * 8) .setMIFlag(Flag); break; } case AArch64::STRXui: case AArch64::LDRXui: { int Reg = RegInfo->getSEHRegNum(MBBI->getOperand(0).getReg()); MIB = BuildMI(MF, DL, TII.get(AArch64::SEH_SaveReg)) .addImm(Reg) .addImm(Imm * 8) .setMIFlag(Flag); break; } case AArch64::STRDui: case AArch64::LDRDui: { unsigned Reg = RegInfo->getSEHRegNum(MBBI->getOperand(0).getReg()); MIB = BuildMI(MF, DL, TII.get(AArch64::SEH_SaveFReg)) .addImm(Reg) .addImm(Imm * 8) .setMIFlag(Flag); break; } } auto I = MBB->insertAfter(MBBI, MIB); return I; } // Fix up the SEH opcode associated with the save/restore instruction. static void fixupSEHOpcode(MachineBasicBlock::iterator MBBI, unsigned LocalStackSize) { MachineOperand *ImmOpnd = nullptr; unsigned ImmIdx = MBBI->getNumOperands() - 1; switch (MBBI->getOpcode()) { default: llvm_unreachable("Fix the offset in the SEH instruction"); case AArch64::SEH_SaveFPLR: case AArch64::SEH_SaveRegP: case AArch64::SEH_SaveReg: case AArch64::SEH_SaveFRegP: case AArch64::SEH_SaveFReg: ImmOpnd = &MBBI->getOperand(ImmIdx); break; } if (ImmOpnd) ImmOpnd->setImm(ImmOpnd->getImm() + LocalStackSize); } // Convert callee-save register save/restore instruction to do stack pointer // decrement/increment to allocate/deallocate the callee-save stack area by // converting store/load to use pre/post increment version. static MachineBasicBlock::iterator convertCalleeSaveRestoreToSPPrePostIncDec( MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, const DebugLoc &DL, const TargetInstrInfo *TII, int CSStackSizeInc, bool NeedsWinCFI, bool *HasWinCFI, bool InProlog = true) { // Ignore instructions that do not operate on SP, i.e. shadow call stack // instructions and associated CFI instruction. while (MBBI->getOpcode() == AArch64::STRXpost || MBBI->getOpcode() == AArch64::LDRXpre || MBBI->getOpcode() == AArch64::CFI_INSTRUCTION) { if (MBBI->getOpcode() != AArch64::CFI_INSTRUCTION) assert(MBBI->getOperand(0).getReg() != AArch64::SP); ++MBBI; } unsigned NewOpc; int Scale = 1; switch (MBBI->getOpcode()) { default: llvm_unreachable("Unexpected callee-save save/restore opcode!"); case AArch64::STPXi: NewOpc = AArch64::STPXpre; Scale = 8; break; case AArch64::STPDi: NewOpc = AArch64::STPDpre; Scale = 8; break; case AArch64::STPQi: NewOpc = AArch64::STPQpre; Scale = 16; break; case AArch64::STRXui: NewOpc = AArch64::STRXpre; break; case AArch64::STRDui: NewOpc = AArch64::STRDpre; break; case AArch64::STRQui: NewOpc = AArch64::STRQpre; break; case AArch64::LDPXi: NewOpc = AArch64::LDPXpost; Scale = 8; break; case AArch64::LDPDi: NewOpc = AArch64::LDPDpost; Scale = 8; break; case AArch64::LDPQi: NewOpc = AArch64::LDPQpost; Scale = 16; break; case AArch64::LDRXui: NewOpc = AArch64::LDRXpost; break; case AArch64::LDRDui: NewOpc = AArch64::LDRDpost; break; case AArch64::LDRQui: NewOpc = AArch64::LDRQpost; break; } // Get rid of the SEH code associated with the old instruction. if (NeedsWinCFI) { auto SEH = std::next(MBBI); if (AArch64InstrInfo::isSEHInstruction(*SEH)) SEH->eraseFromParent(); } MachineInstrBuilder MIB = BuildMI(MBB, MBBI, DL, TII->get(NewOpc)); MIB.addReg(AArch64::SP, RegState::Define); // Copy all operands other than the immediate offset. unsigned OpndIdx = 0; for (unsigned OpndEnd = MBBI->getNumOperands() - 1; OpndIdx < OpndEnd; ++OpndIdx) MIB.add(MBBI->getOperand(OpndIdx)); assert(MBBI->getOperand(OpndIdx).getImm() == 0 && "Unexpected immediate offset in first/last callee-save save/restore " "instruction!"); assert(MBBI->getOperand(OpndIdx - 1).getReg() == AArch64::SP && "Unexpected base register in callee-save save/restore instruction!"); assert(CSStackSizeInc % Scale == 0); MIB.addImm(CSStackSizeInc / Scale); MIB.setMIFlags(MBBI->getFlags()); MIB.setMemRefs(MBBI->memoperands()); // Generate a new SEH code that corresponds to the new instruction. if (NeedsWinCFI) { *HasWinCFI = true; InsertSEH(*MIB, *TII, InProlog ? MachineInstr::FrameSetup : MachineInstr::FrameDestroy); } return std::prev(MBB.erase(MBBI)); } // Fixup callee-save register save/restore instructions to take into account // combined SP bump by adding the local stack size to the stack offsets. static void fixupCalleeSaveRestoreStackOffset(MachineInstr &MI, uint64_t LocalStackSize, bool NeedsWinCFI, bool *HasWinCFI) { if (AArch64InstrInfo::isSEHInstruction(MI)) return; unsigned Opc = MI.getOpcode(); // Ignore instructions that do not operate on SP, i.e. shadow call stack // instructions and associated CFI instruction. if (Opc == AArch64::STRXpost || Opc == AArch64::LDRXpre || Opc == AArch64::CFI_INSTRUCTION) { if (Opc != AArch64::CFI_INSTRUCTION) assert(MI.getOperand(0).getReg() != AArch64::SP); return; } unsigned Scale; switch (Opc) { case AArch64::STPXi: case AArch64::STRXui: case AArch64::STPDi: case AArch64::STRDui: case AArch64::LDPXi: case AArch64::LDRXui: case AArch64::LDPDi: case AArch64::LDRDui: Scale = 8; break; case AArch64::STPQi: case AArch64::STRQui: case AArch64::LDPQi: case AArch64::LDRQui: Scale = 16; break; default: llvm_unreachable("Unexpected callee-save save/restore opcode!"); } unsigned OffsetIdx = MI.getNumExplicitOperands() - 1; assert(MI.getOperand(OffsetIdx - 1).getReg() == AArch64::SP && "Unexpected base register in callee-save save/restore instruction!"); // Last operand is immediate offset that needs fixing. MachineOperand &OffsetOpnd = MI.getOperand(OffsetIdx); // All generated opcodes have scaled offsets. assert(LocalStackSize % Scale == 0); OffsetOpnd.setImm(OffsetOpnd.getImm() + LocalStackSize / Scale); if (NeedsWinCFI) { *HasWinCFI = true; auto MBBI = std::next(MachineBasicBlock::iterator(MI)); assert(MBBI != MI.getParent()->end() && "Expecting a valid instruction"); assert(AArch64InstrInfo::isSEHInstruction(*MBBI) && "Expecting a SEH instruction"); fixupSEHOpcode(MBBI, LocalStackSize); } } static void adaptForLdStOpt(MachineBasicBlock &MBB, MachineBasicBlock::iterator FirstSPPopI, MachineBasicBlock::iterator LastPopI) { // Sometimes (when we restore in the same order as we save), we can end up // with code like this: // // ldp x26, x25, [sp] // ldp x24, x23, [sp, #16] // ldp x22, x21, [sp, #32] // ldp x20, x19, [sp, #48] // add sp, sp, #64 // // In this case, it is always better to put the first ldp at the end, so // that the load-store optimizer can run and merge the ldp and the add into // a post-index ldp. // If we managed to grab the first pop instruction, move it to the end. if (ReverseCSRRestoreSeq) MBB.splice(FirstSPPopI, &MBB, LastPopI); // We should end up with something like this now: // // ldp x24, x23, [sp, #16] // ldp x22, x21, [sp, #32] // ldp x20, x19, [sp, #48] // ldp x26, x25, [sp] // add sp, sp, #64 // // and the load-store optimizer can merge the last two instructions into: // // ldp x26, x25, [sp], #64 // } -static bool ShouldSignWithAKey(MachineFunction &MF) { - const Function &F = MF.getFunction(); - if (!F.hasFnAttribute("sign-return-address-key")) - return true; - - const StringRef Key = - F.getFnAttribute("sign-return-address-key").getValueAsString(); - assert(Key.equals_lower("a_key") || Key.equals_lower("b_key")); - return Key.equals_lower("a_key"); -} - static bool needsWinCFI(const MachineFunction &MF) { const Function &F = MF.getFunction(); return MF.getTarget().getMCAsmInfo()->usesWindowsCFI() && F.needsUnwindTableEntry(); } static bool isTargetWindows(const MachineFunction &MF) { return MF.getSubtarget().isTargetWindows(); } // Convenience function to determine whether I is an SVE callee save. static bool IsSVECalleeSave(MachineBasicBlock::iterator I) { switch (I->getOpcode()) { default: return false; case AArch64::STR_ZXI: case AArch64::STR_PXI: case AArch64::LDR_ZXI: case AArch64::LDR_PXI: return I->getFlag(MachineInstr::FrameSetup) || I->getFlag(MachineInstr::FrameDestroy); } } void AArch64FrameLowering::emitPrologue(MachineFunction &MF, MachineBasicBlock &MBB) const { MachineBasicBlock::iterator MBBI = MBB.begin(); const MachineFrameInfo &MFI = MF.getFrameInfo(); const Function &F = MF.getFunction(); const AArch64Subtarget &Subtarget = MF.getSubtarget(); const AArch64RegisterInfo *RegInfo = Subtarget.getRegisterInfo(); const TargetInstrInfo *TII = Subtarget.getInstrInfo(); MachineModuleInfo &MMI = MF.getMMI(); AArch64FunctionInfo *AFI = MF.getInfo(); bool needsFrameMoves = MF.needsFrameMoves() && !MF.getTarget().getMCAsmInfo()->usesWindowsCFI(); bool HasFP = hasFP(MF); bool NeedsWinCFI = needsWinCFI(MF); bool HasWinCFI = false; auto Cleanup = make_scope_exit([&]() { MF.setHasWinCFI(HasWinCFI); }); bool IsFunclet = MBB.isEHFuncletEntry(); // At this point, we're going to decide whether or not the function uses a // redzone. In most cases, the function doesn't have a redzone so let's // assume that's false and set it to true in the case that there's a redzone. AFI->setHasRedZone(false); // Debug location must be unknown since the first debug location is used // to determine the end of the prologue. DebugLoc DL; - if (ShouldSignReturnAddress(MF)) { - if (ShouldSignWithAKey(MF)) - BuildMI(MBB, MBBI, DL, TII->get(AArch64::PACIASP)) - .setMIFlag(MachineInstr::FrameSetup); - else { + const auto &MFnI = *MF.getInfo(); + if (MFnI.shouldSignReturnAddress()) { + if (MFnI.shouldSignWithBKey()) { BuildMI(MBB, MBBI, DL, TII->get(AArch64::EMITBKEY)) .setMIFlag(MachineInstr::FrameSetup); BuildMI(MBB, MBBI, DL, TII->get(AArch64::PACIBSP)) .setMIFlag(MachineInstr::FrameSetup); + } else { + BuildMI(MBB, MBBI, DL, TII->get(AArch64::PACIASP)) + .setMIFlag(MachineInstr::FrameSetup); } unsigned CFIIndex = MF.addFrameInst(MCCFIInstruction::createNegateRAState(nullptr)); BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION)) .addCFIIndex(CFIIndex) .setMIFlags(MachineInstr::FrameSetup); } // All calls are tail calls in GHC calling conv, and functions have no // prologue/epilogue. if (MF.getFunction().getCallingConv() == CallingConv::GHC) return; // Set tagged base pointer to the bottom of the stack frame. // Ideally it should match SP value after prologue. AFI->setTaggedBasePointerOffset(MFI.getStackSize()); const StackOffset &SVEStackSize = getSVEStackSize(MF); // getStackSize() includes all the locals in its size calculation. We don't // include these locals when computing the stack size of a funclet, as they // are allocated in the parent's stack frame and accessed via the frame // pointer from the funclet. We only save the callee saved registers in the // funclet, which are really the callee saved registers of the parent // function, including the funclet. int64_t NumBytes = IsFunclet ? getWinEHFuncletFrameSize(MF) : MFI.getStackSize(); if (!AFI->hasStackFrame() && !windowsRequiresStackProbe(MF, NumBytes)) { assert(!HasFP && "unexpected function without stack frame but with FP"); assert(!SVEStackSize && "unexpected function without stack frame but with SVE objects"); // All of the stack allocation is for locals. AFI->setLocalStackSize(NumBytes); if (!NumBytes) return; // REDZONE: If the stack size is less than 128 bytes, we don't need // to actually allocate. if (canUseRedZone(MF)) { AFI->setHasRedZone(true); ++NumRedZoneFunctions; } else { emitFrameOffset(MBB, MBBI, DL, AArch64::SP, AArch64::SP, {-NumBytes, MVT::i8}, TII, MachineInstr::FrameSetup, false, NeedsWinCFI, &HasWinCFI); if (!NeedsWinCFI && needsFrameMoves) { // Label used to tie together the PROLOG_LABEL and the MachineMoves. MCSymbol *FrameLabel = MMI.getContext().createTempSymbol(); // Encode the stack size of the leaf function. unsigned CFIIndex = MF.addFrameInst( MCCFIInstruction::cfiDefCfaOffset(FrameLabel, NumBytes)); BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION)) .addCFIIndex(CFIIndex) .setMIFlags(MachineInstr::FrameSetup); } } if (NeedsWinCFI) { HasWinCFI = true; BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_PrologEnd)) .setMIFlag(MachineInstr::FrameSetup); } return; } bool IsWin64 = Subtarget.isCallingConvWin64(MF.getFunction().getCallingConv()); unsigned FixedObject = getFixedObjectSize(MF, AFI, IsWin64, IsFunclet); auto PrologueSaveSize = AFI->getCalleeSavedStackSize() + FixedObject; // All of the remaining stack allocations are for locals. AFI->setLocalStackSize(NumBytes - PrologueSaveSize); bool CombineSPBump = shouldCombineCSRLocalStackBump(MF, NumBytes); if (CombineSPBump) { assert(!SVEStackSize && "Cannot combine SP bump with SVE"); emitFrameOffset(MBB, MBBI, DL, AArch64::SP, AArch64::SP, {-NumBytes, MVT::i8}, TII, MachineInstr::FrameSetup, false, NeedsWinCFI, &HasWinCFI); NumBytes = 0; } else if (PrologueSaveSize != 0) { MBBI = convertCalleeSaveRestoreToSPPrePostIncDec( MBB, MBBI, DL, TII, -PrologueSaveSize, NeedsWinCFI, &HasWinCFI); NumBytes -= PrologueSaveSize; } assert(NumBytes >= 0 && "Negative stack allocation size!?"); // Move past the saves of the callee-saved registers, fixing up the offsets // and pre-inc if we decided to combine the callee-save and local stack // pointer bump above. MachineBasicBlock::iterator End = MBB.end(); while (MBBI != End && MBBI->getFlag(MachineInstr::FrameSetup) && !IsSVECalleeSave(MBBI)) { if (CombineSPBump) fixupCalleeSaveRestoreStackOffset(*MBBI, AFI->getLocalStackSize(), NeedsWinCFI, &HasWinCFI); ++MBBI; } // For funclets the FP belongs to the containing function. if (!IsFunclet && HasFP) { // Only set up FP if we actually need to. int64_t FPOffset = AFI->getCalleeSaveBaseToFrameRecordOffset(); if (CombineSPBump) FPOffset += AFI->getLocalStackSize(); // Issue sub fp, sp, FPOffset or // mov fp,sp when FPOffset is zero. // Note: All stores of callee-saved registers are marked as "FrameSetup". // This code marks the instruction(s) that set the FP also. emitFrameOffset(MBB, MBBI, DL, AArch64::FP, AArch64::SP, {FPOffset, MVT::i8}, TII, MachineInstr::FrameSetup, false, NeedsWinCFI, &HasWinCFI); } if (windowsRequiresStackProbe(MF, NumBytes)) { uint64_t NumWords = NumBytes >> 4; if (NeedsWinCFI) { HasWinCFI = true; // alloc_l can hold at most 256MB, so assume that NumBytes doesn't // exceed this amount. We need to move at most 2^24 - 1 into x15. // This is at most two instructions, MOVZ follwed by MOVK. // TODO: Fix to use multiple stack alloc unwind codes for stacks // exceeding 256MB in size. if (NumBytes >= (1 << 28)) report_fatal_error("Stack size cannot exceed 256MB for stack " "unwinding purposes"); uint32_t LowNumWords = NumWords & 0xFFFF; BuildMI(MBB, MBBI, DL, TII->get(AArch64::MOVZXi), AArch64::X15) .addImm(LowNumWords) .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, 0)) .setMIFlag(MachineInstr::FrameSetup); BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_Nop)) .setMIFlag(MachineInstr::FrameSetup); if ((NumWords & 0xFFFF0000) != 0) { BuildMI(MBB, MBBI, DL, TII->get(AArch64::MOVKXi), AArch64::X15) .addReg(AArch64::X15) .addImm((NumWords & 0xFFFF0000) >> 16) // High half .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, 16)) .setMIFlag(MachineInstr::FrameSetup); BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_Nop)) .setMIFlag(MachineInstr::FrameSetup); } } else { BuildMI(MBB, MBBI, DL, TII->get(AArch64::MOVi64imm), AArch64::X15) .addImm(NumWords) .setMIFlags(MachineInstr::FrameSetup); } switch (MF.getTarget().getCodeModel()) { case CodeModel::Tiny: case CodeModel::Small: case CodeModel::Medium: case CodeModel::Kernel: BuildMI(MBB, MBBI, DL, TII->get(AArch64::BL)) .addExternalSymbol("__chkstk") .addReg(AArch64::X15, RegState::Implicit) .addReg(AArch64::X16, RegState::Implicit | RegState::Define | RegState::Dead) .addReg(AArch64::X17, RegState::Implicit | RegState::Define | RegState::Dead) .addReg(AArch64::NZCV, RegState::Implicit | RegState::Define | RegState::Dead) .setMIFlags(MachineInstr::FrameSetup); if (NeedsWinCFI) { HasWinCFI = true; BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_Nop)) .setMIFlag(MachineInstr::FrameSetup); } break; case CodeModel::Large: BuildMI(MBB, MBBI, DL, TII->get(AArch64::MOVaddrEXT)) .addReg(AArch64::X16, RegState::Define) .addExternalSymbol("__chkstk") .addExternalSymbol("__chkstk") .setMIFlags(MachineInstr::FrameSetup); if (NeedsWinCFI) { HasWinCFI = true; BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_Nop)) .setMIFlag(MachineInstr::FrameSetup); } BuildMI(MBB, MBBI, DL, TII->get(getBLRCallOpcode(MF))) .addReg(AArch64::X16, RegState::Kill) .addReg(AArch64::X15, RegState::Implicit | RegState::Define) .addReg(AArch64::X16, RegState::Implicit | RegState::Define | RegState::Dead) .addReg(AArch64::X17, RegState::Implicit | RegState::Define | RegState::Dead) .addReg(AArch64::NZCV, RegState::Implicit | RegState::Define | RegState::Dead) .setMIFlags(MachineInstr::FrameSetup); if (NeedsWinCFI) { HasWinCFI = true; BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_Nop)) .setMIFlag(MachineInstr::FrameSetup); } break; } BuildMI(MBB, MBBI, DL, TII->get(AArch64::SUBXrx64), AArch64::SP) .addReg(AArch64::SP, RegState::Kill) .addReg(AArch64::X15, RegState::Kill) .addImm(AArch64_AM::getArithExtendImm(AArch64_AM::UXTX, 4)) .setMIFlags(MachineInstr::FrameSetup); if (NeedsWinCFI) { HasWinCFI = true; BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_StackAlloc)) .addImm(NumBytes) .setMIFlag(MachineInstr::FrameSetup); } NumBytes = 0; } StackOffset AllocateBefore = SVEStackSize, AllocateAfter = {}; MachineBasicBlock::iterator CalleeSavesBegin = MBBI, CalleeSavesEnd = MBBI; // Process the SVE callee-saves to determine what space needs to be // allocated. if (int64_t CalleeSavedSize = AFI->getSVECalleeSavedStackSize()) { // Find callee save instructions in frame. CalleeSavesBegin = MBBI; assert(IsSVECalleeSave(CalleeSavesBegin) && "Unexpected instruction"); while (IsSVECalleeSave(MBBI) && MBBI != MBB.getFirstTerminator()) ++MBBI; CalleeSavesEnd = MBBI; AllocateBefore = {CalleeSavedSize, MVT::nxv1i8}; AllocateAfter = SVEStackSize - AllocateBefore; } // Allocate space for the callee saves (if any). emitFrameOffset(MBB, CalleeSavesBegin, DL, AArch64::SP, AArch64::SP, -AllocateBefore, TII, MachineInstr::FrameSetup); // Finally allocate remaining SVE stack space. emitFrameOffset(MBB, CalleeSavesEnd, DL, AArch64::SP, AArch64::SP, -AllocateAfter, TII, MachineInstr::FrameSetup); // Allocate space for the rest of the frame. if (NumBytes) { // Alignment is required for the parent frame, not the funclet const bool NeedsRealignment = !IsFunclet && RegInfo->needsStackRealignment(MF); unsigned scratchSPReg = AArch64::SP; if (NeedsRealignment) { scratchSPReg = findScratchNonCalleeSaveRegister(&MBB); assert(scratchSPReg != AArch64::NoRegister); } // If we're a leaf function, try using the red zone. if (!canUseRedZone(MF)) // FIXME: in the case of dynamic re-alignment, NumBytes doesn't have // the correct value here, as NumBytes also includes padding bytes, // which shouldn't be counted here. emitFrameOffset(MBB, MBBI, DL, scratchSPReg, AArch64::SP, {-NumBytes, MVT::i8}, TII, MachineInstr::FrameSetup, false, NeedsWinCFI, &HasWinCFI); if (NeedsRealignment) { const unsigned NrBitsToZero = Log2(MFI.getMaxAlign()); assert(NrBitsToZero > 1); assert(scratchSPReg != AArch64::SP); // SUB X9, SP, NumBytes // -- X9 is temporary register, so shouldn't contain any live data here, // -- free to use. This is already produced by emitFrameOffset above. // AND SP, X9, 0b11111...0000 // The logical immediates have a non-trivial encoding. The following // formula computes the encoded immediate with all ones but // NrBitsToZero zero bits as least significant bits. uint32_t andMaskEncoded = (1 << 12) // = N | ((64 - NrBitsToZero) << 6) // immr | ((64 - NrBitsToZero - 1) << 0); // imms BuildMI(MBB, MBBI, DL, TII->get(AArch64::ANDXri), AArch64::SP) .addReg(scratchSPReg, RegState::Kill) .addImm(andMaskEncoded); AFI->setStackRealigned(true); if (NeedsWinCFI) { HasWinCFI = true; BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_StackAlloc)) .addImm(NumBytes & andMaskEncoded) .setMIFlag(MachineInstr::FrameSetup); } } } // If we need a base pointer, set it up here. It's whatever the value of the // stack pointer is at this point. Any variable size objects will be allocated // after this, so we can still use the base pointer to reference locals. // // FIXME: Clarify FrameSetup flags here. // Note: Use emitFrameOffset() like above for FP if the FrameSetup flag is // needed. // For funclets the BP belongs to the containing function. if (!IsFunclet && RegInfo->hasBasePointer(MF)) { TII->copyPhysReg(MBB, MBBI, DL, RegInfo->getBaseRegister(), AArch64::SP, false); if (NeedsWinCFI) { HasWinCFI = true; BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_Nop)) .setMIFlag(MachineInstr::FrameSetup); } } // The very last FrameSetup instruction indicates the end of prologue. Emit a // SEH opcode indicating the prologue end. if (NeedsWinCFI && HasWinCFI) { BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_PrologEnd)) .setMIFlag(MachineInstr::FrameSetup); } // SEH funclets are passed the frame pointer in X1. If the parent // function uses the base register, then the base register is used // directly, and is not retrieved from X1. if (IsFunclet && F.hasPersonalityFn()) { EHPersonality Per = classifyEHPersonality(F.getPersonalityFn()); if (isAsynchronousEHPersonality(Per)) { BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::COPY), AArch64::FP) .addReg(AArch64::X1) .setMIFlag(MachineInstr::FrameSetup); MBB.addLiveIn(AArch64::X1); } } if (needsFrameMoves) { // An example of the prologue: // // .globl __foo // .align 2 // __foo: // Ltmp0: // .cfi_startproc // .cfi_personality 155, ___gxx_personality_v0 // Leh_func_begin: // .cfi_lsda 16, Lexception33 // // stp xa,bx, [sp, -#offset]! // ... // stp x28, x27, [sp, #offset-32] // stp fp, lr, [sp, #offset-16] // add fp, sp, #offset - 16 // sub sp, sp, #1360 // // The Stack: // +-------------------------------------------+ // 10000 | ........ | ........ | ........ | ........ | // 10004 | ........ | ........ | ........ | ........ | // +-------------------------------------------+ // 10008 | ........ | ........ | ........ | ........ | // 1000c | ........ | ........ | ........ | ........ | // +===========================================+ // 10010 | X28 Register | // 10014 | X28 Register | // +-------------------------------------------+ // 10018 | X27 Register | // 1001c | X27 Register | // +===========================================+ // 10020 | Frame Pointer | // 10024 | Frame Pointer | // +-------------------------------------------+ // 10028 | Link Register | // 1002c | Link Register | // +===========================================+ // 10030 | ........ | ........ | ........ | ........ | // 10034 | ........ | ........ | ........ | ........ | // +-------------------------------------------+ // 10038 | ........ | ........ | ........ | ........ | // 1003c | ........ | ........ | ........ | ........ | // +-------------------------------------------+ // // [sp] = 10030 :: >>initial value<< // sp = 10020 :: stp fp, lr, [sp, #-16]! // fp = sp == 10020 :: mov fp, sp // [sp] == 10020 :: stp x28, x27, [sp, #-16]! // sp == 10010 :: >>final value<< // // The frame pointer (w29) points to address 10020. If we use an offset of // '16' from 'w29', we get the CFI offsets of -8 for w30, -16 for w29, -24 // for w27, and -32 for w28: // // Ltmp1: // .cfi_def_cfa w29, 16 // Ltmp2: // .cfi_offset w30, -8 // Ltmp3: // .cfi_offset w29, -16 // Ltmp4: // .cfi_offset w27, -24 // Ltmp5: // .cfi_offset w28, -32 if (HasFP) { const int OffsetToFirstCalleeSaveFromFP = AFI->getCalleeSaveBaseToFrameRecordOffset() - AFI->getCalleeSavedStackSize(); Register FramePtr = RegInfo->getFrameRegister(MF); // Define the current CFA rule to use the provided FP. unsigned Reg = RegInfo->getDwarfRegNum(FramePtr, true); unsigned CFIIndex = MF.addFrameInst( MCCFIInstruction::cfiDefCfa(nullptr, Reg, FixedObject - OffsetToFirstCalleeSaveFromFP)); BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION)) .addCFIIndex(CFIIndex) .setMIFlags(MachineInstr::FrameSetup); } else { unsigned CFIIndex; if (SVEStackSize) { const TargetSubtargetInfo &STI = MF.getSubtarget(); const TargetRegisterInfo &TRI = *STI.getRegisterInfo(); StackOffset TotalSize = SVEStackSize + StackOffset((int64_t)MFI.getStackSize(), MVT::i8); CFIIndex = MF.addFrameInst(createDefCFAExpressionFromSP(TRI, TotalSize)); } else { // Encode the stack size of the leaf function. CFIIndex = MF.addFrameInst( MCCFIInstruction::cfiDefCfaOffset(nullptr, MFI.getStackSize())); } BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION)) .addCFIIndex(CFIIndex) .setMIFlags(MachineInstr::FrameSetup); } // Now emit the moves for whatever callee saved regs we have (including FP, // LR if those are saved). emitCalleeSavedFrameMoves(MBB, MBBI); } } static void InsertReturnAddressAuth(MachineFunction &MF, MachineBasicBlock &MBB) { - if (!ShouldSignReturnAddress(MF)) + const auto &MFI = *MF.getInfo(); + if (!MFI.shouldSignReturnAddress()) return; const AArch64Subtarget &Subtarget = MF.getSubtarget(); const TargetInstrInfo *TII = Subtarget.getInstrInfo(); MachineBasicBlock::iterator MBBI = MBB.getFirstTerminator(); DebugLoc DL; if (MBBI != MBB.end()) DL = MBBI->getDebugLoc(); // The AUTIASP instruction assembles to a hint instruction before v8.3a so // this instruction can safely used for any v8a architecture. // From v8.3a onwards there are optimised authenticate LR and return // instructions, namely RETA{A,B}, that can be used instead. if (Subtarget.hasV8_3aOps() && MBBI != MBB.end() && MBBI->getOpcode() == AArch64::RET_ReallyLR) { BuildMI(MBB, MBBI, DL, - TII->get(ShouldSignWithAKey(MF) ? AArch64::RETAA : AArch64::RETAB)) + TII->get(MFI.shouldSignWithBKey() ? AArch64::RETAB : AArch64::RETAA)) .copyImplicitOps(*MBBI); MBB.erase(MBBI); } else { BuildMI( MBB, MBBI, DL, - TII->get(ShouldSignWithAKey(MF) ? AArch64::AUTIASP : AArch64::AUTIBSP)) + TII->get(MFI.shouldSignWithBKey() ? AArch64::AUTIBSP : AArch64::AUTIASP)) .setMIFlag(MachineInstr::FrameDestroy); } } static bool isFuncletReturnInstr(const MachineInstr &MI) { switch (MI.getOpcode()) { default: return false; case AArch64::CATCHRET: case AArch64::CLEANUPRET: return true; } } void AArch64FrameLowering::emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const { MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr(); MachineFrameInfo &MFI = MF.getFrameInfo(); const AArch64Subtarget &Subtarget = MF.getSubtarget(); const TargetInstrInfo *TII = Subtarget.getInstrInfo(); DebugLoc DL; bool NeedsWinCFI = needsWinCFI(MF); bool HasWinCFI = false; bool IsFunclet = false; auto WinCFI = make_scope_exit([&]() { if (!MF.hasWinCFI()) MF.setHasWinCFI(HasWinCFI); }); if (MBB.end() != MBBI) { DL = MBBI->getDebugLoc(); IsFunclet = isFuncletReturnInstr(*MBBI); } int64_t NumBytes = IsFunclet ? getWinEHFuncletFrameSize(MF) : MFI.getStackSize(); AArch64FunctionInfo *AFI = MF.getInfo(); // All calls are tail calls in GHC calling conv, and functions have no // prologue/epilogue. if (MF.getFunction().getCallingConv() == CallingConv::GHC) return; // Initial and residual are named for consistency with the prologue. Note that // in the epilogue, the residual adjustment is executed first. uint64_t ArgumentPopSize = getArgumentPopSize(MF, MBB); // The stack frame should be like below, // // ---------------------- --- // | | | // | BytesInStackArgArea| CalleeArgStackSize // | (NumReusableBytes) | (of tail call) // | | --- // | | | // ---------------------| --- | // | | | | // | CalleeSavedReg | | | // | (CalleeSavedStackSize)| | | // | | | | // ---------------------| | NumBytes // | | StackSize (StackAdjustUp) // | LocalStackSize | | | // | (covering callee | | | // | args) | | | // | | | | // ---------------------- --- --- // // So NumBytes = StackSize + BytesInStackArgArea - CalleeArgStackSize // = StackSize + ArgumentPopSize // // AArch64TargetLowering::LowerCall figures out ArgumentPopSize and keeps // it as the 2nd argument of AArch64ISD::TC_RETURN. auto Cleanup = make_scope_exit([&] { InsertReturnAddressAuth(MF, MBB); }); bool IsWin64 = Subtarget.isCallingConvWin64(MF.getFunction().getCallingConv()); unsigned FixedObject = getFixedObjectSize(MF, AFI, IsWin64, IsFunclet); uint64_t AfterCSRPopSize = ArgumentPopSize; auto PrologueSaveSize = AFI->getCalleeSavedStackSize() + FixedObject; // We cannot rely on the local stack size set in emitPrologue if the function // has funclets, as funclets have different local stack size requirements, and // the current value set in emitPrologue may be that of the containing // function. if (MF.hasEHFunclets()) AFI->setLocalStackSize(NumBytes - PrologueSaveSize); bool CombineSPBump = shouldCombineCSRLocalStackBumpInEpilogue(MBB, NumBytes); // Assume we can't combine the last pop with the sp restore. if (!CombineSPBump && PrologueSaveSize != 0) { MachineBasicBlock::iterator Pop = std::prev(MBB.getFirstTerminator()); while (AArch64InstrInfo::isSEHInstruction(*Pop)) Pop = std::prev(Pop); // Converting the last ldp to a post-index ldp is valid only if the last // ldp's offset is 0. const MachineOperand &OffsetOp = Pop->getOperand(Pop->getNumOperands() - 1); // If the offset is 0, convert it to a post-index ldp. if (OffsetOp.getImm() == 0) convertCalleeSaveRestoreToSPPrePostIncDec( MBB, Pop, DL, TII, PrologueSaveSize, NeedsWinCFI, &HasWinCFI, false); else { // If not, make sure to emit an add after the last ldp. // We're doing this by transfering the size to be restored from the // adjustment *before* the CSR pops to the adjustment *after* the CSR // pops. AfterCSRPopSize += PrologueSaveSize; } } // Move past the restores of the callee-saved registers. // If we plan on combining the sp bump of the local stack size and the callee // save stack size, we might need to adjust the CSR save and restore offsets. MachineBasicBlock::iterator LastPopI = MBB.getFirstTerminator(); MachineBasicBlock::iterator Begin = MBB.begin(); while (LastPopI != Begin) { --LastPopI; if (!LastPopI->getFlag(MachineInstr::FrameDestroy) || IsSVECalleeSave(LastPopI)) { ++LastPopI; break; } else if (CombineSPBump) fixupCalleeSaveRestoreStackOffset(*LastPopI, AFI->getLocalStackSize(), NeedsWinCFI, &HasWinCFI); } if (NeedsWinCFI) { HasWinCFI = true; BuildMI(MBB, LastPopI, DL, TII->get(AArch64::SEH_EpilogStart)) .setMIFlag(MachineInstr::FrameDestroy); } const StackOffset &SVEStackSize = getSVEStackSize(MF); // If there is a single SP update, insert it before the ret and we're done. if (CombineSPBump) { assert(!SVEStackSize && "Cannot combine SP bump with SVE"); emitFrameOffset(MBB, MBB.getFirstTerminator(), DL, AArch64::SP, AArch64::SP, {NumBytes + (int64_t)AfterCSRPopSize, MVT::i8}, TII, MachineInstr::FrameDestroy, false, NeedsWinCFI, &HasWinCFI); if (NeedsWinCFI && HasWinCFI) BuildMI(MBB, MBB.getFirstTerminator(), DL, TII->get(AArch64::SEH_EpilogEnd)) .setMIFlag(MachineInstr::FrameDestroy); return; } NumBytes -= PrologueSaveSize; assert(NumBytes >= 0 && "Negative stack allocation size!?"); // Process the SVE callee-saves to determine what space needs to be // deallocated. StackOffset DeallocateBefore = {}, DeallocateAfter = SVEStackSize; MachineBasicBlock::iterator RestoreBegin = LastPopI, RestoreEnd = LastPopI; if (int64_t CalleeSavedSize = AFI->getSVECalleeSavedStackSize()) { RestoreBegin = std::prev(RestoreEnd); while (RestoreBegin != MBB.begin() && IsSVECalleeSave(std::prev(RestoreBegin))) --RestoreBegin; assert(IsSVECalleeSave(RestoreBegin) && IsSVECalleeSave(std::prev(RestoreEnd)) && "Unexpected instruction"); StackOffset CalleeSavedSizeAsOffset = {CalleeSavedSize, MVT::nxv1i8}; DeallocateBefore = SVEStackSize - CalleeSavedSizeAsOffset; DeallocateAfter = CalleeSavedSizeAsOffset; } // Deallocate the SVE area. if (SVEStackSize) { if (AFI->isStackRealigned()) { if (int64_t CalleeSavedSize = AFI->getSVECalleeSavedStackSize()) // Set SP to start of SVE callee-save area from which they can // be reloaded. The code below will deallocate the stack space // space by moving FP -> SP. emitFrameOffset(MBB, RestoreBegin, DL, AArch64::SP, AArch64::FP, {-CalleeSavedSize, MVT::nxv1i8}, TII, MachineInstr::FrameDestroy); } else { if (AFI->getSVECalleeSavedStackSize()) { // Deallocate the non-SVE locals first before we can deallocate (and // restore callee saves) from the SVE area. emitFrameOffset(MBB, RestoreBegin, DL, AArch64::SP, AArch64::SP, {NumBytes, MVT::i8}, TII, MachineInstr::FrameDestroy); NumBytes = 0; } emitFrameOffset(MBB, RestoreBegin, DL, AArch64::SP, AArch64::SP, DeallocateBefore, TII, MachineInstr::FrameDestroy); emitFrameOffset(MBB, RestoreEnd, DL, AArch64::SP, AArch64::SP, DeallocateAfter, TII, MachineInstr::FrameDestroy); } } if (!hasFP(MF)) { bool RedZone = canUseRedZone(MF); // If this was a redzone leaf function, we don't need to restore the // stack pointer (but we may need to pop stack args for fastcc). if (RedZone && AfterCSRPopSize == 0) return; bool NoCalleeSaveRestore = PrologueSaveSize == 0; int64_t StackRestoreBytes = RedZone ? 0 : NumBytes; if (NoCalleeSaveRestore) StackRestoreBytes += AfterCSRPopSize; // If we were able to combine the local stack pop with the argument pop, // then we're done. bool Done = NoCalleeSaveRestore || AfterCSRPopSize == 0; // If we're done after this, make sure to help the load store optimizer. if (Done) adaptForLdStOpt(MBB, MBB.getFirstTerminator(), LastPopI); emitFrameOffset(MBB, LastPopI, DL, AArch64::SP, AArch64::SP, {StackRestoreBytes, MVT::i8}, TII, MachineInstr::FrameDestroy, false, NeedsWinCFI, &HasWinCFI); if (Done) { if (NeedsWinCFI) { HasWinCFI = true; BuildMI(MBB, MBB.getFirstTerminator(), DL, TII->get(AArch64::SEH_EpilogEnd)) .setMIFlag(MachineInstr::FrameDestroy); } return; } NumBytes = 0; } // Restore the original stack pointer. // FIXME: Rather than doing the math here, we should instead just use // non-post-indexed loads for the restores if we aren't actually going to // be able to save any instructions. if (!IsFunclet && (MFI.hasVarSizedObjects() || AFI->isStackRealigned())) { emitFrameOffset(MBB, LastPopI, DL, AArch64::SP, AArch64::FP, {-AFI->getCalleeSaveBaseToFrameRecordOffset(), MVT::i8}, TII, MachineInstr::FrameDestroy, false, NeedsWinCFI); } else if (NumBytes) emitFrameOffset(MBB, LastPopI, DL, AArch64::SP, AArch64::SP, {NumBytes, MVT::i8}, TII, MachineInstr::FrameDestroy, false, NeedsWinCFI); // This must be placed after the callee-save restore code because that code // assumes the SP is at the same location as it was after the callee-save save // code in the prologue. if (AfterCSRPopSize) { // Find an insertion point for the first ldp so that it goes before the // shadow call stack epilog instruction. This ensures that the restore of // lr from x18 is placed after the restore from sp. auto FirstSPPopI = MBB.getFirstTerminator(); while (FirstSPPopI != Begin) { auto Prev = std::prev(FirstSPPopI); if (Prev->getOpcode() != AArch64::LDRXpre || Prev->getOperand(0).getReg() == AArch64::SP) break; FirstSPPopI = Prev; } adaptForLdStOpt(MBB, FirstSPPopI, LastPopI); emitFrameOffset(MBB, FirstSPPopI, DL, AArch64::SP, AArch64::SP, {(int64_t)AfterCSRPopSize, MVT::i8}, TII, MachineInstr::FrameDestroy, false, NeedsWinCFI, &HasWinCFI); } if (NeedsWinCFI && HasWinCFI) BuildMI(MBB, MBB.getFirstTerminator(), DL, TII->get(AArch64::SEH_EpilogEnd)) .setMIFlag(MachineInstr::FrameDestroy); MF.setHasWinCFI(HasWinCFI); } /// getFrameIndexReference - Provide a base+offset reference to an FI slot for /// debug info. It's the same as what we use for resolving the code-gen /// references for now. FIXME: This can go wrong when references are /// SP-relative and simple call frames aren't used. int AArch64FrameLowering::getFrameIndexReference(const MachineFunction &MF, int FI, Register &FrameReg) const { return resolveFrameIndexReference( MF, FI, FrameReg, /*PreferFP=*/ MF.getFunction().hasFnAttribute(Attribute::SanitizeHWAddress), /*ForSimm=*/false) .getBytes(); } int AArch64FrameLowering::getNonLocalFrameIndexReference( const MachineFunction &MF, int FI) const { return getSEHFrameIndexOffset(MF, FI); } static StackOffset getFPOffset(const MachineFunction &MF, int64_t ObjectOffset) { const auto *AFI = MF.getInfo(); const auto &Subtarget = MF.getSubtarget(); bool IsWin64 = Subtarget.isCallingConvWin64(MF.getFunction().getCallingConv()); unsigned FixedObject = getFixedObjectSize(MF, AFI, IsWin64, /*IsFunclet=*/false); int64_t CalleeSaveSize = AFI->getCalleeSavedStackSize(MF.getFrameInfo()); int64_t FPAdjust = CalleeSaveSize - AFI->getCalleeSaveBaseToFrameRecordOffset(); return {ObjectOffset + FixedObject + FPAdjust, MVT::i8}; } static StackOffset getStackOffset(const MachineFunction &MF, int64_t ObjectOffset) { const auto &MFI = MF.getFrameInfo(); return {ObjectOffset + (int64_t)MFI.getStackSize(), MVT::i8}; } int AArch64FrameLowering::getSEHFrameIndexOffset(const MachineFunction &MF, int FI) const { const auto *RegInfo = static_cast( MF.getSubtarget().getRegisterInfo()); int ObjectOffset = MF.getFrameInfo().getObjectOffset(FI); return RegInfo->getLocalAddressRegister(MF) == AArch64::FP ? getFPOffset(MF, ObjectOffset).getBytes() : getStackOffset(MF, ObjectOffset).getBytes(); } StackOffset AArch64FrameLowering::resolveFrameIndexReference( const MachineFunction &MF, int FI, Register &FrameReg, bool PreferFP, bool ForSimm) const { const auto &MFI = MF.getFrameInfo(); int64_t ObjectOffset = MFI.getObjectOffset(FI); bool isFixed = MFI.isFixedObjectIndex(FI); bool isSVE = MFI.getStackID(FI) == TargetStackID::SVEVector; return resolveFrameOffsetReference(MF, ObjectOffset, isFixed, isSVE, FrameReg, PreferFP, ForSimm); } StackOffset AArch64FrameLowering::resolveFrameOffsetReference( const MachineFunction &MF, int64_t ObjectOffset, bool isFixed, bool isSVE, Register &FrameReg, bool PreferFP, bool ForSimm) const { const auto &MFI = MF.getFrameInfo(); const auto *RegInfo = static_cast( MF.getSubtarget().getRegisterInfo()); const auto *AFI = MF.getInfo(); const auto &Subtarget = MF.getSubtarget(); int64_t FPOffset = getFPOffset(MF, ObjectOffset).getBytes(); int64_t Offset = getStackOffset(MF, ObjectOffset).getBytes(); bool isCSR = !isFixed && ObjectOffset >= -((int)AFI->getCalleeSavedStackSize(MFI)); const StackOffset &SVEStackSize = getSVEStackSize(MF); // Use frame pointer to reference fixed objects. Use it for locals if // there are VLAs or a dynamically realigned SP (and thus the SP isn't // reliable as a base). Make sure useFPForScavengingIndex() does the // right thing for the emergency spill slot. bool UseFP = false; if (AFI->hasStackFrame() && !isSVE) { // We shouldn't prefer using the FP when there is an SVE area // in between the FP and the non-SVE locals/spills. PreferFP &= !SVEStackSize; // Note: Keeping the following as multiple 'if' statements rather than // merging to a single expression for readability. // // Argument access should always use the FP. if (isFixed) { UseFP = hasFP(MF); } else if (isCSR && RegInfo->needsStackRealignment(MF)) { // References to the CSR area must use FP if we're re-aligning the stack // since the dynamically-sized alignment padding is between the SP/BP and // the CSR area. assert(hasFP(MF) && "Re-aligned stack must have frame pointer"); UseFP = true; } else if (hasFP(MF) && !RegInfo->needsStackRealignment(MF)) { // If the FPOffset is negative and we're producing a signed immediate, we // have to keep in mind that the available offset range for negative // offsets is smaller than for positive ones. If an offset is available // via the FP and the SP, use whichever is closest. bool FPOffsetFits = !ForSimm || FPOffset >= -256; PreferFP |= Offset > -FPOffset; if (MFI.hasVarSizedObjects()) { // If we have variable sized objects, we can use either FP or BP, as the // SP offset is unknown. We can use the base pointer if we have one and // FP is not preferred. If not, we're stuck with using FP. bool CanUseBP = RegInfo->hasBasePointer(MF); if (FPOffsetFits && CanUseBP) // Both are ok. Pick the best. UseFP = PreferFP; else if (!CanUseBP) // Can't use BP. Forced to use FP. UseFP = true; // else we can use BP and FP, but the offset from FP won't fit. // That will make us scavenge registers which we can probably avoid by // using BP. If it won't fit for BP either, we'll scavenge anyway. } else if (FPOffset >= 0) { // Use SP or FP, whichever gives us the best chance of the offset // being in range for direct access. If the FPOffset is positive, // that'll always be best, as the SP will be even further away. UseFP = true; } else if (MF.hasEHFunclets() && !RegInfo->hasBasePointer(MF)) { // Funclets access the locals contained in the parent's stack frame // via the frame pointer, so we have to use the FP in the parent // function. (void) Subtarget; assert( Subtarget.isCallingConvWin64(MF.getFunction().getCallingConv()) && "Funclets should only be present on Win64"); UseFP = true; } else { // We have the choice between FP and (SP or BP). if (FPOffsetFits && PreferFP) // If FP is the best fit, use it. UseFP = true; } } } assert(((isFixed || isCSR) || !RegInfo->needsStackRealignment(MF) || !UseFP) && "In the presence of dynamic stack pointer realignment, " "non-argument/CSR objects cannot be accessed through the frame pointer"); if (isSVE) { int64_t OffsetFromSPToSVEArea = MFI.getStackSize() - AFI->getCalleeSavedStackSize(); int64_t OffsetFromFPToSVEArea = -AFI->getCalleeSaveBaseToFrameRecordOffset(); StackOffset FPOffset = StackOffset(OffsetFromFPToSVEArea, MVT::i8) + StackOffset(ObjectOffset, MVT::nxv1i8); StackOffset SPOffset = SVEStackSize + StackOffset(ObjectOffset, MVT::nxv1i8) + StackOffset(OffsetFromSPToSVEArea, MVT::i8); // Always use the FP for SVE spills if available and beneficial. if (hasFP(MF) && (SPOffset.getBytes() || FPOffset.getScalableBytes() < SPOffset.getScalableBytes() || RegInfo->needsStackRealignment(MF))) { FrameReg = RegInfo->getFrameRegister(MF); return FPOffset; } FrameReg = RegInfo->hasBasePointer(MF) ? RegInfo->getBaseRegister() : (unsigned)AArch64::SP; return SPOffset; } StackOffset ScalableOffset = {}; if (UseFP && !(isFixed || isCSR)) ScalableOffset = -SVEStackSize; if (!UseFP && (isFixed || isCSR)) ScalableOffset = SVEStackSize; if (UseFP) { FrameReg = RegInfo->getFrameRegister(MF); return StackOffset(FPOffset, MVT::i8) + ScalableOffset; } // Use the base pointer if we have one. if (RegInfo->hasBasePointer(MF)) FrameReg = RegInfo->getBaseRegister(); else { assert(!MFI.hasVarSizedObjects() && "Can't use SP when we have var sized objects."); FrameReg = AArch64::SP; // If we're using the red zone for this function, the SP won't actually // be adjusted, so the offsets will be negative. They're also all // within range of the signed 9-bit immediate instructions. if (canUseRedZone(MF)) Offset -= AFI->getLocalStackSize(); } return StackOffset(Offset, MVT::i8) + ScalableOffset; } static unsigned getPrologueDeath(MachineFunction &MF, unsigned Reg) { // Do not set a kill flag on values that are also marked as live-in. This // happens with the @llvm-returnaddress intrinsic and with arguments passed in // callee saved registers. // Omitting the kill flags is conservatively correct even if the live-in // is not used after all. bool IsLiveIn = MF.getRegInfo().isLiveIn(Reg); return getKillRegState(!IsLiveIn); } static bool produceCompactUnwindFrame(MachineFunction &MF) { const AArch64Subtarget &Subtarget = MF.getSubtarget(); AttributeList Attrs = MF.getFunction().getAttributes(); return Subtarget.isTargetMachO() && !(Subtarget.getTargetLowering()->supportSwiftError() && Attrs.hasAttrSomewhere(Attribute::SwiftError)); } static bool invalidateWindowsRegisterPairing(unsigned Reg1, unsigned Reg2, bool NeedsWinCFI) { // If we are generating register pairs for a Windows function that requires // EH support, then pair consecutive registers only. There are no unwind // opcodes for saves/restores of non-consectuve register pairs. // The unwind opcodes are save_regp, save_regp_x, save_fregp, save_frepg_x. // https://docs.microsoft.com/en-us/cpp/build/arm64-exception-handling // TODO: LR can be paired with any register. We don't support this yet in // the MCLayer. We need to add support for the save_lrpair unwind code. if (Reg2 == AArch64::FP) return true; if (!NeedsWinCFI) return false; if (Reg2 == Reg1 + 1) return false; return true; } /// Returns true if Reg1 and Reg2 cannot be paired using a ldp/stp instruction. /// WindowsCFI requires that only consecutive registers can be paired. /// LR and FP need to be allocated together when the frame needs to save /// the frame-record. This means any other register pairing with LR is invalid. static bool invalidateRegisterPairing(unsigned Reg1, unsigned Reg2, bool UsesWinAAPCS, bool NeedsWinCFI, bool NeedsFrameRecord) { if (UsesWinAAPCS) return invalidateWindowsRegisterPairing(Reg1, Reg2, NeedsWinCFI); // If we need to store the frame record, don't pair any register // with LR other than FP. if (NeedsFrameRecord) return Reg2 == AArch64::LR; return false; } namespace { struct RegPairInfo { unsigned Reg1 = AArch64::NoRegister; unsigned Reg2 = AArch64::NoRegister; int FrameIdx; int Offset; enum RegType { GPR, FPR64, FPR128, PPR, ZPR } Type; RegPairInfo() = default; bool isPaired() const { return Reg2 != AArch64::NoRegister; } unsigned getScale() const { switch (Type) { case PPR: return 2; case GPR: case FPR64: return 8; case ZPR: case FPR128: return 16; } llvm_unreachable("Unsupported type"); } bool isScalable() const { return Type == PPR || Type == ZPR; } }; } // end anonymous namespace static void computeCalleeSaveRegisterPairs( MachineFunction &MF, ArrayRef CSI, const TargetRegisterInfo *TRI, SmallVectorImpl &RegPairs, bool &NeedShadowCallStackProlog, bool NeedsFrameRecord) { if (CSI.empty()) return; bool IsWindows = isTargetWindows(MF); bool NeedsWinCFI = needsWinCFI(MF); AArch64FunctionInfo *AFI = MF.getInfo(); MachineFrameInfo &MFI = MF.getFrameInfo(); CallingConv::ID CC = MF.getFunction().getCallingConv(); unsigned Count = CSI.size(); (void)CC; // MachO's compact unwind format relies on all registers being stored in // pairs. assert((!produceCompactUnwindFrame(MF) || CC == CallingConv::PreserveMost || (Count & 1) == 0) && "Odd number of callee-saved regs to spill!"); int ByteOffset = AFI->getCalleeSavedStackSize(); int ScalableByteOffset = AFI->getSVECalleeSavedStackSize(); // On Linux, we will have either one or zero non-paired register. On Windows // with CFI, we can have multiple unpaired registers in order to utilize the // available unwind codes. This flag assures that the alignment fixup is done // only once, as intened. bool FixupDone = false; for (unsigned i = 0; i < Count; ++i) { RegPairInfo RPI; RPI.Reg1 = CSI[i].getReg(); if (AArch64::GPR64RegClass.contains(RPI.Reg1)) RPI.Type = RegPairInfo::GPR; else if (AArch64::FPR64RegClass.contains(RPI.Reg1)) RPI.Type = RegPairInfo::FPR64; else if (AArch64::FPR128RegClass.contains(RPI.Reg1)) RPI.Type = RegPairInfo::FPR128; else if (AArch64::ZPRRegClass.contains(RPI.Reg1)) RPI.Type = RegPairInfo::ZPR; else if (AArch64::PPRRegClass.contains(RPI.Reg1)) RPI.Type = RegPairInfo::PPR; else llvm_unreachable("Unsupported register class."); // Add the next reg to the pair if it is in the same register class. if (i + 1 < Count) { unsigned NextReg = CSI[i + 1].getReg(); switch (RPI.Type) { case RegPairInfo::GPR: if (AArch64::GPR64RegClass.contains(NextReg) && !invalidateRegisterPairing(RPI.Reg1, NextReg, IsWindows, NeedsWinCFI, NeedsFrameRecord)) RPI.Reg2 = NextReg; break; case RegPairInfo::FPR64: if (AArch64::FPR64RegClass.contains(NextReg) && !invalidateWindowsRegisterPairing(RPI.Reg1, NextReg, NeedsWinCFI)) RPI.Reg2 = NextReg; break; case RegPairInfo::FPR128: if (AArch64::FPR128RegClass.contains(NextReg)) RPI.Reg2 = NextReg; break; case RegPairInfo::PPR: case RegPairInfo::ZPR: break; } } // If either of the registers to be saved is the lr register, it means that // we also need to save lr in the shadow call stack. if ((RPI.Reg1 == AArch64::LR || RPI.Reg2 == AArch64::LR) && MF.getFunction().hasFnAttribute(Attribute::ShadowCallStack)) { if (!MF.getSubtarget().isXRegisterReserved(18)) report_fatal_error("Must reserve x18 to use shadow call stack"); NeedShadowCallStackProlog = true; } // GPRs and FPRs are saved in pairs of 64-bit regs. We expect the CSI // list to come in sorted by frame index so that we can issue the store // pair instructions directly. Assert if we see anything otherwise. // // The order of the registers in the list is controlled by // getCalleeSavedRegs(), so they will always be in-order, as well. assert((!RPI.isPaired() || (CSI[i].getFrameIdx() + 1 == CSI[i + 1].getFrameIdx())) && "Out of order callee saved regs!"); assert((!RPI.isPaired() || !NeedsFrameRecord || RPI.Reg2 != AArch64::FP || RPI.Reg1 == AArch64::LR) && "FrameRecord must be allocated together with LR"); // Windows AAPCS has FP and LR reversed. assert((!RPI.isPaired() || !NeedsFrameRecord || RPI.Reg1 != AArch64::FP || RPI.Reg2 == AArch64::LR) && "FrameRecord must be allocated together with LR"); // MachO's compact unwind format relies on all registers being stored in // adjacent register pairs. assert((!produceCompactUnwindFrame(MF) || CC == CallingConv::PreserveMost || (RPI.isPaired() && ((RPI.Reg1 == AArch64::LR && RPI.Reg2 == AArch64::FP) || RPI.Reg1 + 1 == RPI.Reg2))) && "Callee-save registers not saved as adjacent register pair!"); RPI.FrameIdx = CSI[i].getFrameIdx(); int Scale = RPI.getScale(); if (RPI.isScalable()) ScalableByteOffset -= Scale; else ByteOffset -= RPI.isPaired() ? 2 * Scale : Scale; assert(!(RPI.isScalable() && RPI.isPaired()) && "Paired spill/fill instructions don't exist for SVE vectors"); // Round up size of non-pair to pair size if we need to pad the // callee-save area to ensure 16-byte alignment. if (AFI->hasCalleeSaveStackFreeSpace() && !FixupDone && !RPI.isScalable() && RPI.Type != RegPairInfo::FPR128 && !RPI.isPaired()) { FixupDone = true; ByteOffset -= 8; assert(ByteOffset % 16 == 0); assert(MFI.getObjectAlign(RPI.FrameIdx) <= Align(16)); MFI.setObjectAlignment(RPI.FrameIdx, Align(16)); } int Offset = RPI.isScalable() ? ScalableByteOffset : ByteOffset; assert(Offset % Scale == 0); RPI.Offset = Offset / Scale; assert(((!RPI.isScalable() && RPI.Offset >= -64 && RPI.Offset <= 63) || (RPI.isScalable() && RPI.Offset >= -256 && RPI.Offset <= 255)) && "Offset out of bounds for LDP/STP immediate"); // Save the offset to frame record so that the FP register can point to the // innermost frame record (spilled FP and LR registers). if (NeedsFrameRecord && ((!IsWindows && RPI.Reg1 == AArch64::LR && RPI.Reg2 == AArch64::FP) || (IsWindows && RPI.Reg1 == AArch64::FP && RPI.Reg2 == AArch64::LR))) AFI->setCalleeSaveBaseToFrameRecordOffset(Offset); RegPairs.push_back(RPI); if (RPI.isPaired()) ++i; } } bool AArch64FrameLowering::spillCalleeSavedRegisters( MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, ArrayRef CSI, const TargetRegisterInfo *TRI) const { MachineFunction &MF = *MBB.getParent(); const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo(); bool NeedsWinCFI = needsWinCFI(MF); DebugLoc DL; SmallVector RegPairs; bool NeedShadowCallStackProlog = false; computeCalleeSaveRegisterPairs(MF, CSI, TRI, RegPairs, NeedShadowCallStackProlog, hasFP(MF)); const MachineRegisterInfo &MRI = MF.getRegInfo(); if (NeedShadowCallStackProlog) { // Shadow call stack prolog: str x30, [x18], #8 BuildMI(MBB, MI, DL, TII.get(AArch64::STRXpost)) .addReg(AArch64::X18, RegState::Define) .addReg(AArch64::LR) .addReg(AArch64::X18) .addImm(8) .setMIFlag(MachineInstr::FrameSetup); if (NeedsWinCFI) BuildMI(MBB, MI, DL, TII.get(AArch64::SEH_Nop)) .setMIFlag(MachineInstr::FrameSetup); if (!MF.getFunction().hasFnAttribute(Attribute::NoUnwind)) { // Emit a CFI instruction that causes 8 to be subtracted from the value of // x18 when unwinding past this frame. static const char CFIInst[] = { dwarf::DW_CFA_val_expression, 18, // register 2, // length static_cast(unsigned(dwarf::DW_OP_breg18)), static_cast(-8) & 0x7f, // addend (sleb128) }; unsigned CFIIndex = MF.addFrameInst(MCCFIInstruction::createEscape( nullptr, StringRef(CFIInst, sizeof(CFIInst)))); BuildMI(MBB, MI, DL, TII.get(AArch64::CFI_INSTRUCTION)) .addCFIIndex(CFIIndex) .setMIFlag(MachineInstr::FrameSetup); } // This instruction also makes x18 live-in to the entry block. MBB.addLiveIn(AArch64::X18); } for (auto RPII = RegPairs.rbegin(), RPIE = RegPairs.rend(); RPII != RPIE; ++RPII) { RegPairInfo RPI = *RPII; unsigned Reg1 = RPI.Reg1; unsigned Reg2 = RPI.Reg2; unsigned StrOpc; // Issue sequence of spills for cs regs. The first spill may be converted // to a pre-decrement store later by emitPrologue if the callee-save stack // area allocation can't be combined with the local stack area allocation. // For example: // stp x22, x21, [sp, #0] // addImm(+0) // stp x20, x19, [sp, #16] // addImm(+2) // stp fp, lr, [sp, #32] // addImm(+4) // Rationale: This sequence saves uop updates compared to a sequence of // pre-increment spills like stp xi,xj,[sp,#-16]! // Note: Similar rationale and sequence for restores in epilog. unsigned Size; Align Alignment; switch (RPI.Type) { case RegPairInfo::GPR: StrOpc = RPI.isPaired() ? AArch64::STPXi : AArch64::STRXui; Size = 8; Alignment = Align(8); break; case RegPairInfo::FPR64: StrOpc = RPI.isPaired() ? AArch64::STPDi : AArch64::STRDui; Size = 8; Alignment = Align(8); break; case RegPairInfo::FPR128: StrOpc = RPI.isPaired() ? AArch64::STPQi : AArch64::STRQui; Size = 16; Alignment = Align(16); break; case RegPairInfo::ZPR: StrOpc = AArch64::STR_ZXI; Size = 16; Alignment = Align(16); break; case RegPairInfo::PPR: StrOpc = AArch64::STR_PXI; Size = 2; Alignment = Align(2); break; } LLVM_DEBUG(dbgs() << "CSR spill: (" << printReg(Reg1, TRI); if (RPI.isPaired()) dbgs() << ", " << printReg(Reg2, TRI); dbgs() << ") -> fi#(" << RPI.FrameIdx; if (RPI.isPaired()) dbgs() << ", " << RPI.FrameIdx + 1; dbgs() << ")\n"); assert((!NeedsWinCFI || !(Reg1 == AArch64::LR && Reg2 == AArch64::FP)) && "Windows unwdinding requires a consecutive (FP,LR) pair"); // Windows unwind codes require consecutive registers if registers are // paired. Make the switch here, so that the code below will save (x,x+1) // and not (x+1,x). unsigned FrameIdxReg1 = RPI.FrameIdx; unsigned FrameIdxReg2 = RPI.FrameIdx + 1; if (NeedsWinCFI && RPI.isPaired()) { std::swap(Reg1, Reg2); std::swap(FrameIdxReg1, FrameIdxReg2); } MachineInstrBuilder MIB = BuildMI(MBB, MI, DL, TII.get(StrOpc)); if (!MRI.isReserved(Reg1)) MBB.addLiveIn(Reg1); if (RPI.isPaired()) { if (!MRI.isReserved(Reg2)) MBB.addLiveIn(Reg2); MIB.addReg(Reg2, getPrologueDeath(MF, Reg2)); MIB.addMemOperand(MF.getMachineMemOperand( MachinePointerInfo::getFixedStack(MF, FrameIdxReg2), MachineMemOperand::MOStore, Size, Alignment)); } MIB.addReg(Reg1, getPrologueDeath(MF, Reg1)) .addReg(AArch64::SP) .addImm(RPI.Offset) // [sp, #offset*scale], // where factor*scale is implicit .setMIFlag(MachineInstr::FrameSetup); MIB.addMemOperand(MF.getMachineMemOperand( MachinePointerInfo::getFixedStack(MF, FrameIdxReg1), MachineMemOperand::MOStore, Size, Alignment)); if (NeedsWinCFI) InsertSEH(MIB, TII, MachineInstr::FrameSetup); // Update the StackIDs of the SVE stack slots. MachineFrameInfo &MFI = MF.getFrameInfo(); if (RPI.Type == RegPairInfo::ZPR || RPI.Type == RegPairInfo::PPR) MFI.setStackID(RPI.FrameIdx, TargetStackID::SVEVector); } return true; } bool AArch64FrameLowering::restoreCalleeSavedRegisters( MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, MutableArrayRef CSI, const TargetRegisterInfo *TRI) const { MachineFunction &MF = *MBB.getParent(); const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo(); DebugLoc DL; SmallVector RegPairs; bool NeedsWinCFI = needsWinCFI(MF); if (MI != MBB.end()) DL = MI->getDebugLoc(); bool NeedShadowCallStackProlog = false; computeCalleeSaveRegisterPairs(MF, CSI, TRI, RegPairs, NeedShadowCallStackProlog, hasFP(MF)); auto EmitMI = [&](const RegPairInfo &RPI) { unsigned Reg1 = RPI.Reg1; unsigned Reg2 = RPI.Reg2; // Issue sequence of restores for cs regs. The last restore may be converted // to a post-increment load later by emitEpilogue if the callee-save stack // area allocation can't be combined with the local stack area allocation. // For example: // ldp fp, lr, [sp, #32] // addImm(+4) // ldp x20, x19, [sp, #16] // addImm(+2) // ldp x22, x21, [sp, #0] // addImm(+0) // Note: see comment in spillCalleeSavedRegisters() unsigned LdrOpc; unsigned Size; Align Alignment; switch (RPI.Type) { case RegPairInfo::GPR: LdrOpc = RPI.isPaired() ? AArch64::LDPXi : AArch64::LDRXui; Size = 8; Alignment = Align(8); break; case RegPairInfo::FPR64: LdrOpc = RPI.isPaired() ? AArch64::LDPDi : AArch64::LDRDui; Size = 8; Alignment = Align(8); break; case RegPairInfo::FPR128: LdrOpc = RPI.isPaired() ? AArch64::LDPQi : AArch64::LDRQui; Size = 16; Alignment = Align(16); break; case RegPairInfo::ZPR: LdrOpc = AArch64::LDR_ZXI; Size = 16; Alignment = Align(16); break; case RegPairInfo::PPR: LdrOpc = AArch64::LDR_PXI; Size = 2; Alignment = Align(2); break; } LLVM_DEBUG(dbgs() << "CSR restore: (" << printReg(Reg1, TRI); if (RPI.isPaired()) dbgs() << ", " << printReg(Reg2, TRI); dbgs() << ") -> fi#(" << RPI.FrameIdx; if (RPI.isPaired()) dbgs() << ", " << RPI.FrameIdx + 1; dbgs() << ")\n"); // Windows unwind codes require consecutive registers if registers are // paired. Make the switch here, so that the code below will save (x,x+1) // and not (x+1,x). unsigned FrameIdxReg1 = RPI.FrameIdx; unsigned FrameIdxReg2 = RPI.FrameIdx + 1; if (NeedsWinCFI && RPI.isPaired()) { std::swap(Reg1, Reg2); std::swap(FrameIdxReg1, FrameIdxReg2); } MachineInstrBuilder MIB = BuildMI(MBB, MI, DL, TII.get(LdrOpc)); if (RPI.isPaired()) { MIB.addReg(Reg2, getDefRegState(true)); MIB.addMemOperand(MF.getMachineMemOperand( MachinePointerInfo::getFixedStack(MF, FrameIdxReg2), MachineMemOperand::MOLoad, Size, Alignment)); } MIB.addReg(Reg1, getDefRegState(true)) .addReg(AArch64::SP) .addImm(RPI.Offset) // [sp, #offset*scale] // where factor*scale is implicit .setMIFlag(MachineInstr::FrameDestroy); MIB.addMemOperand(MF.getMachineMemOperand( MachinePointerInfo::getFixedStack(MF, FrameIdxReg1), MachineMemOperand::MOLoad, Size, Alignment)); if (NeedsWinCFI) InsertSEH(MIB, TII, MachineInstr::FrameDestroy); }; // SVE objects are always restored in reverse order. for (const RegPairInfo &RPI : reverse(RegPairs)) if (RPI.isScalable()) EmitMI(RPI); if (ReverseCSRRestoreSeq) { for (const RegPairInfo &RPI : reverse(RegPairs)) if (!RPI.isScalable()) EmitMI(RPI); } else for (const RegPairInfo &RPI : RegPairs) if (!RPI.isScalable()) EmitMI(RPI); if (NeedShadowCallStackProlog) { // Shadow call stack epilog: ldr x30, [x18, #-8]! BuildMI(MBB, MI, DL, TII.get(AArch64::LDRXpre)) .addReg(AArch64::X18, RegState::Define) .addReg(AArch64::LR, RegState::Define) .addReg(AArch64::X18) .addImm(-8) .setMIFlag(MachineInstr::FrameDestroy); } return true; } void AArch64FrameLowering::determineCalleeSaves(MachineFunction &MF, BitVector &SavedRegs, RegScavenger *RS) const { // All calls are tail calls in GHC calling conv, and functions have no // prologue/epilogue. if (MF.getFunction().getCallingConv() == CallingConv::GHC) return; TargetFrameLowering::determineCalleeSaves(MF, SavedRegs, RS); const AArch64RegisterInfo *RegInfo = static_cast( MF.getSubtarget().getRegisterInfo()); const AArch64Subtarget &Subtarget = MF.getSubtarget(); AArch64FunctionInfo *AFI = MF.getInfo(); unsigned UnspilledCSGPR = AArch64::NoRegister; unsigned UnspilledCSGPRPaired = AArch64::NoRegister; MachineFrameInfo &MFI = MF.getFrameInfo(); const MCPhysReg *CSRegs = MF.getRegInfo().getCalleeSavedRegs(); unsigned BasePointerReg = RegInfo->hasBasePointer(MF) ? RegInfo->getBaseRegister() : (unsigned)AArch64::NoRegister; unsigned ExtraCSSpill = 0; // Figure out which callee-saved registers to save/restore. for (unsigned i = 0; CSRegs[i]; ++i) { const unsigned Reg = CSRegs[i]; // Add the base pointer register to SavedRegs if it is callee-save. if (Reg == BasePointerReg) SavedRegs.set(Reg); bool RegUsed = SavedRegs.test(Reg); unsigned PairedReg = AArch64::NoRegister; if (AArch64::GPR64RegClass.contains(Reg) || AArch64::FPR64RegClass.contains(Reg) || AArch64::FPR128RegClass.contains(Reg)) PairedReg = CSRegs[i ^ 1]; if (!RegUsed) { if (AArch64::GPR64RegClass.contains(Reg) && !RegInfo->isReservedReg(MF, Reg)) { UnspilledCSGPR = Reg; UnspilledCSGPRPaired = PairedReg; } continue; } // MachO's compact unwind format relies on all registers being stored in // pairs. // FIXME: the usual format is actually better if unwinding isn't needed. if (produceCompactUnwindFrame(MF) && PairedReg != AArch64::NoRegister && !SavedRegs.test(PairedReg)) { SavedRegs.set(PairedReg); if (AArch64::GPR64RegClass.contains(PairedReg) && !RegInfo->isReservedReg(MF, PairedReg)) ExtraCSSpill = PairedReg; } } if (MF.getFunction().getCallingConv() == CallingConv::Win64 && !Subtarget.isTargetWindows()) { // For Windows calling convention on a non-windows OS, where X18 is treated // as reserved, back up X18 when entering non-windows code (marked with the // Windows calling convention) and restore when returning regardless of // whether the individual function uses it - it might call other functions // that clobber it. SavedRegs.set(AArch64::X18); } // Calculates the callee saved stack size. unsigned CSStackSize = 0; unsigned SVECSStackSize = 0; const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo(); const MachineRegisterInfo &MRI = MF.getRegInfo(); for (unsigned Reg : SavedRegs.set_bits()) { auto RegSize = TRI->getRegSizeInBits(Reg, MRI) / 8; if (AArch64::PPRRegClass.contains(Reg) || AArch64::ZPRRegClass.contains(Reg)) SVECSStackSize += RegSize; else CSStackSize += RegSize; } // Save number of saved regs, so we can easily update CSStackSize later. unsigned NumSavedRegs = SavedRegs.count(); // The frame record needs to be created by saving the appropriate registers uint64_t EstimatedStackSize = MFI.estimateStackSize(MF); if (hasFP(MF) || windowsRequiresStackProbe(MF, EstimatedStackSize + CSStackSize + 16)) { SavedRegs.set(AArch64::FP); SavedRegs.set(AArch64::LR); } LLVM_DEBUG(dbgs() << "*** determineCalleeSaves\nSaved CSRs:"; for (unsigned Reg : SavedRegs.set_bits()) dbgs() << ' ' << printReg(Reg, RegInfo); dbgs() << "\n";); // If any callee-saved registers are used, the frame cannot be eliminated. int64_t SVEStackSize = alignTo(SVECSStackSize + estimateSVEStackObjectOffsets(MFI), 16); bool CanEliminateFrame = (SavedRegs.count() == 0) && !SVEStackSize; // The CSR spill slots have not been allocated yet, so estimateStackSize // won't include them. unsigned EstimatedStackSizeLimit = estimateRSStackSizeLimit(MF); // Conservatively always assume BigStack when there are SVE spills. bool BigStack = SVEStackSize || (EstimatedStackSize + CSStackSize) > EstimatedStackSizeLimit; if (BigStack || !CanEliminateFrame || RegInfo->cannotEliminateFrame(MF)) AFI->setHasStackFrame(true); // Estimate if we might need to scavenge a register at some point in order // to materialize a stack offset. If so, either spill one additional // callee-saved register or reserve a special spill slot to facilitate // register scavenging. If we already spilled an extra callee-saved register // above to keep the number of spills even, we don't need to do anything else // here. if (BigStack) { if (!ExtraCSSpill && UnspilledCSGPR != AArch64::NoRegister) { LLVM_DEBUG(dbgs() << "Spilling " << printReg(UnspilledCSGPR, RegInfo) << " to get a scratch register.\n"); SavedRegs.set(UnspilledCSGPR); // MachO's compact unwind format relies on all registers being stored in // pairs, so if we need to spill one extra for BigStack, then we need to // store the pair. if (produceCompactUnwindFrame(MF)) SavedRegs.set(UnspilledCSGPRPaired); ExtraCSSpill = UnspilledCSGPR; } // If we didn't find an extra callee-saved register to spill, create // an emergency spill slot. if (!ExtraCSSpill || MF.getRegInfo().isPhysRegUsed(ExtraCSSpill)) { const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo(); const TargetRegisterClass &RC = AArch64::GPR64RegClass; unsigned Size = TRI->getSpillSize(RC); Align Alignment = TRI->getSpillAlign(RC); int FI = MFI.CreateStackObject(Size, Alignment, false); RS->addScavengingFrameIndex(FI); LLVM_DEBUG(dbgs() << "No available CS registers, allocated fi#" << FI << " as the emergency spill slot.\n"); } } // Adding the size of additional 64bit GPR saves. CSStackSize += 8 * (SavedRegs.count() - NumSavedRegs); uint64_t AlignedCSStackSize = alignTo(CSStackSize, 16); LLVM_DEBUG(dbgs() << "Estimated stack frame size: " << EstimatedStackSize + AlignedCSStackSize << " bytes.\n"); assert((!MFI.isCalleeSavedInfoValid() || AFI->getCalleeSavedStackSize() == AlignedCSStackSize) && "Should not invalidate callee saved info"); // Round up to register pair alignment to avoid additional SP adjustment // instructions. AFI->setCalleeSavedStackSize(AlignedCSStackSize); AFI->setCalleeSaveStackHasFreeSpace(AlignedCSStackSize != CSStackSize); AFI->setSVECalleeSavedStackSize(alignTo(SVECSStackSize, 16)); } bool AArch64FrameLowering::enableStackSlotScavenging( const MachineFunction &MF) const { const AArch64FunctionInfo *AFI = MF.getInfo(); return AFI->hasCalleeSaveStackFreeSpace(); } /// returns true if there are any SVE callee saves. static bool getSVECalleeSaveSlotRange(const MachineFrameInfo &MFI, int &Min, int &Max) { Min = std::numeric_limits::max(); Max = std::numeric_limits::min(); if (!MFI.isCalleeSavedInfoValid()) return false; const std::vector &CSI = MFI.getCalleeSavedInfo(); for (auto &CS : CSI) { if (AArch64::ZPRRegClass.contains(CS.getReg()) || AArch64::PPRRegClass.contains(CS.getReg())) { assert((Max == std::numeric_limits::min() || Max + 1 == CS.getFrameIdx()) && "SVE CalleeSaves are not consecutive"); Min = std::min(Min, CS.getFrameIdx()); Max = std::max(Max, CS.getFrameIdx()); } } return Min != std::numeric_limits::max(); } // Process all the SVE stack objects and determine offsets for each // object. If AssignOffsets is true, the offsets get assigned. // Fills in the first and last callee-saved frame indices into // Min/MaxCSFrameIndex, respectively. // Returns the size of the stack. static int64_t determineSVEStackObjectOffsets(MachineFrameInfo &MFI, int &MinCSFrameIndex, int &MaxCSFrameIndex, bool AssignOffsets) { #ifndef NDEBUG // First process all fixed stack objects. for (int I = MFI.getObjectIndexBegin(); I != 0; ++I) assert(MFI.getStackID(I) != TargetStackID::SVEVector && "SVE vectors should never be passed on the stack by value, only by " "reference."); #endif auto Assign = [&MFI](int FI, int64_t Offset) { LLVM_DEBUG(dbgs() << "alloc FI(" << FI << ") at SP[" << Offset << "]\n"); MFI.setObjectOffset(FI, Offset); }; int64_t Offset = 0; // Then process all callee saved slots. if (getSVECalleeSaveSlotRange(MFI, MinCSFrameIndex, MaxCSFrameIndex)) { // Assign offsets to the callee save slots. for (int I = MinCSFrameIndex; I <= MaxCSFrameIndex; ++I) { Offset += MFI.getObjectSize(I); Offset = alignTo(Offset, MFI.getObjectAlign(I)); if (AssignOffsets) Assign(I, -Offset); } } // Ensure that the Callee-save area is aligned to 16bytes. Offset = alignTo(Offset, Align(16U)); // Create a buffer of SVE objects to allocate and sort it. SmallVector ObjectsToAllocate; for (int I = 0, E = MFI.getObjectIndexEnd(); I != E; ++I) { unsigned StackID = MFI.getStackID(I); if (StackID != TargetStackID::SVEVector) continue; if (MaxCSFrameIndex >= I && I >= MinCSFrameIndex) continue; if (MFI.isDeadObjectIndex(I)) continue; ObjectsToAllocate.push_back(I); } // Allocate all SVE locals and spills for (unsigned FI : ObjectsToAllocate) { Align Alignment = MFI.getObjectAlign(FI); // FIXME: Given that the length of SVE vectors is not necessarily a power of // two, we'd need to align every object dynamically at runtime if the // alignment is larger than 16. This is not yet supported. if (Alignment > Align(16)) report_fatal_error( "Alignment of scalable vectors > 16 bytes is not yet supported"); Offset = alignTo(Offset + MFI.getObjectSize(FI), Alignment); if (AssignOffsets) Assign(FI, -Offset); } return Offset; } int64_t AArch64FrameLowering::estimateSVEStackObjectOffsets( MachineFrameInfo &MFI) const { int MinCSFrameIndex, MaxCSFrameIndex; return determineSVEStackObjectOffsets(MFI, MinCSFrameIndex, MaxCSFrameIndex, false); } int64_t AArch64FrameLowering::assignSVEStackObjectOffsets( MachineFrameInfo &MFI, int &MinCSFrameIndex, int &MaxCSFrameIndex) const { return determineSVEStackObjectOffsets(MFI, MinCSFrameIndex, MaxCSFrameIndex, true); } void AArch64FrameLowering::processFunctionBeforeFrameFinalized( MachineFunction &MF, RegScavenger *RS) const { MachineFrameInfo &MFI = MF.getFrameInfo(); assert(getStackGrowthDirection() == TargetFrameLowering::StackGrowsDown && "Upwards growing stack unsupported"); int MinCSFrameIndex, MaxCSFrameIndex; int64_t SVEStackSize = assignSVEStackObjectOffsets(MFI, MinCSFrameIndex, MaxCSFrameIndex); AArch64FunctionInfo *AFI = MF.getInfo(); AFI->setStackSizeSVE(alignTo(SVEStackSize, 16U)); AFI->setMinMaxSVECSFrameIndex(MinCSFrameIndex, MaxCSFrameIndex); // If this function isn't doing Win64-style C++ EH, we don't need to do // anything. if (!MF.hasEHFunclets()) return; const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo(); WinEHFuncInfo &EHInfo = *MF.getWinEHFuncInfo(); MachineBasicBlock &MBB = MF.front(); auto MBBI = MBB.begin(); while (MBBI != MBB.end() && MBBI->getFlag(MachineInstr::FrameSetup)) ++MBBI; // Create an UnwindHelp object. // The UnwindHelp object is allocated at the start of the fixed object area int64_t FixedObject = getFixedObjectSize(MF, AFI, /*IsWin64*/ true, /*IsFunclet*/ false); int UnwindHelpFI = MFI.CreateFixedObject(/*Size*/ 8, /*SPOffset*/ -FixedObject, /*IsImmutable=*/false); EHInfo.UnwindHelpFrameIdx = UnwindHelpFI; // We need to store -2 into the UnwindHelp object at the start of the // function. DebugLoc DL; RS->enterBasicBlockEnd(MBB); RS->backward(std::prev(MBBI)); unsigned DstReg = RS->FindUnusedReg(&AArch64::GPR64commonRegClass); assert(DstReg && "There must be a free register after frame setup"); BuildMI(MBB, MBBI, DL, TII.get(AArch64::MOVi64imm), DstReg).addImm(-2); BuildMI(MBB, MBBI, DL, TII.get(AArch64::STURXi)) .addReg(DstReg, getKillRegState(true)) .addFrameIndex(UnwindHelpFI) .addImm(0); } namespace { struct TagStoreInstr { MachineInstr *MI; int64_t Offset, Size; explicit TagStoreInstr(MachineInstr *MI, int64_t Offset, int64_t Size) : MI(MI), Offset(Offset), Size(Size) {} }; class TagStoreEdit { MachineFunction *MF; MachineBasicBlock *MBB; MachineRegisterInfo *MRI; // Tag store instructions that are being replaced. SmallVector TagStores; // Combined memref arguments of the above instructions. SmallVector CombinedMemRefs; // Replace allocation tags in [FrameReg + FrameRegOffset, FrameReg + // FrameRegOffset + Size) with the address tag of SP. Register FrameReg; StackOffset FrameRegOffset; int64_t Size; // If not None, move FrameReg to (FrameReg + FrameRegUpdate) at the end. Optional FrameRegUpdate; // MIFlags for any FrameReg updating instructions. unsigned FrameRegUpdateFlags; // Use zeroing instruction variants. bool ZeroData; DebugLoc DL; void emitUnrolled(MachineBasicBlock::iterator InsertI); void emitLoop(MachineBasicBlock::iterator InsertI); public: TagStoreEdit(MachineBasicBlock *MBB, bool ZeroData) : MBB(MBB), ZeroData(ZeroData) { MF = MBB->getParent(); MRI = &MF->getRegInfo(); } // Add an instruction to be replaced. Instructions must be added in the // ascending order of Offset, and have to be adjacent. void addInstruction(TagStoreInstr I) { assert((TagStores.empty() || TagStores.back().Offset + TagStores.back().Size == I.Offset) && "Non-adjacent tag store instructions."); TagStores.push_back(I); } void clear() { TagStores.clear(); } // Emit equivalent code at the given location, and erase the current set of // instructions. May skip if the replacement is not profitable. May invalidate // the input iterator and replace it with a valid one. void emitCode(MachineBasicBlock::iterator &InsertI, const AArch64FrameLowering *TFI, bool IsLast); }; void TagStoreEdit::emitUnrolled(MachineBasicBlock::iterator InsertI) { const AArch64InstrInfo *TII = MF->getSubtarget().getInstrInfo(); const int64_t kMinOffset = -256 * 16; const int64_t kMaxOffset = 255 * 16; Register BaseReg = FrameReg; int64_t BaseRegOffsetBytes = FrameRegOffset.getBytes(); if (BaseRegOffsetBytes < kMinOffset || BaseRegOffsetBytes + (Size - Size % 32) > kMaxOffset) { Register ScratchReg = MRI->createVirtualRegister(&AArch64::GPR64RegClass); emitFrameOffset(*MBB, InsertI, DL, ScratchReg, BaseReg, {BaseRegOffsetBytes, MVT::i8}, TII); BaseReg = ScratchReg; BaseRegOffsetBytes = 0; } MachineInstr *LastI = nullptr; while (Size) { int64_t InstrSize = (Size > 16) ? 32 : 16; unsigned Opcode = InstrSize == 16 ? (ZeroData ? AArch64::STZGOffset : AArch64::STGOffset) : (ZeroData ? AArch64::STZ2GOffset : AArch64::ST2GOffset); MachineInstr *I = BuildMI(*MBB, InsertI, DL, TII->get(Opcode)) .addReg(AArch64::SP) .addReg(BaseReg) .addImm(BaseRegOffsetBytes / 16) .setMemRefs(CombinedMemRefs); // A store to [BaseReg, #0] should go last for an opportunity to fold the // final SP adjustment in the epilogue. if (BaseRegOffsetBytes == 0) LastI = I; BaseRegOffsetBytes += InstrSize; Size -= InstrSize; } if (LastI) MBB->splice(InsertI, MBB, LastI); } void TagStoreEdit::emitLoop(MachineBasicBlock::iterator InsertI) { const AArch64InstrInfo *TII = MF->getSubtarget().getInstrInfo(); Register BaseReg = FrameRegUpdate ? FrameReg : MRI->createVirtualRegister(&AArch64::GPR64RegClass); Register SizeReg = MRI->createVirtualRegister(&AArch64::GPR64RegClass); emitFrameOffset(*MBB, InsertI, DL, BaseReg, FrameReg, FrameRegOffset, TII); int64_t LoopSize = Size; // If the loop size is not a multiple of 32, split off one 16-byte store at // the end to fold BaseReg update into. if (FrameRegUpdate && *FrameRegUpdate) LoopSize -= LoopSize % 32; MachineInstr *LoopI = BuildMI(*MBB, InsertI, DL, TII->get(ZeroData ? AArch64::STZGloop_wback : AArch64::STGloop_wback)) .addDef(SizeReg) .addDef(BaseReg) .addImm(LoopSize) .addReg(BaseReg) .setMemRefs(CombinedMemRefs); if (FrameRegUpdate) LoopI->setFlags(FrameRegUpdateFlags); int64_t ExtraBaseRegUpdate = FrameRegUpdate ? (*FrameRegUpdate - FrameRegOffset.getBytes() - Size) : 0; if (LoopSize < Size) { assert(FrameRegUpdate); assert(Size - LoopSize == 16); // Tag 16 more bytes at BaseReg and update BaseReg. BuildMI(*MBB, InsertI, DL, TII->get(ZeroData ? AArch64::STZGPostIndex : AArch64::STGPostIndex)) .addDef(BaseReg) .addReg(BaseReg) .addReg(BaseReg) .addImm(1 + ExtraBaseRegUpdate / 16) .setMemRefs(CombinedMemRefs) .setMIFlags(FrameRegUpdateFlags); } else if (ExtraBaseRegUpdate) { // Update BaseReg. BuildMI( *MBB, InsertI, DL, TII->get(ExtraBaseRegUpdate > 0 ? AArch64::ADDXri : AArch64::SUBXri)) .addDef(BaseReg) .addReg(BaseReg) .addImm(std::abs(ExtraBaseRegUpdate)) .addImm(0) .setMIFlags(FrameRegUpdateFlags); } } // Check if *II is a register update that can be merged into STGloop that ends // at (Reg + Size). RemainingOffset is the required adjustment to Reg after the // end of the loop. bool canMergeRegUpdate(MachineBasicBlock::iterator II, unsigned Reg, int64_t Size, int64_t *TotalOffset) { MachineInstr &MI = *II; if ((MI.getOpcode() == AArch64::ADDXri || MI.getOpcode() == AArch64::SUBXri) && MI.getOperand(0).getReg() == Reg && MI.getOperand(1).getReg() == Reg) { unsigned Shift = AArch64_AM::getShiftValue(MI.getOperand(3).getImm()); int64_t Offset = MI.getOperand(2).getImm() << Shift; if (MI.getOpcode() == AArch64::SUBXri) Offset = -Offset; int64_t AbsPostOffset = std::abs(Offset - Size); const int64_t kMaxOffset = 0xFFF; // Max encoding for unshifted ADDXri / SUBXri if (AbsPostOffset <= kMaxOffset && AbsPostOffset % 16 == 0) { *TotalOffset = Offset; return true; } } return false; } void mergeMemRefs(const SmallVectorImpl &TSE, SmallVectorImpl &MemRefs) { MemRefs.clear(); for (auto &TS : TSE) { MachineInstr *MI = TS.MI; // An instruction without memory operands may access anything. Be // conservative and return an empty list. if (MI->memoperands_empty()) { MemRefs.clear(); return; } MemRefs.append(MI->memoperands_begin(), MI->memoperands_end()); } } void TagStoreEdit::emitCode(MachineBasicBlock::iterator &InsertI, const AArch64FrameLowering *TFI, bool IsLast) { if (TagStores.empty()) return; TagStoreInstr &FirstTagStore = TagStores[0]; TagStoreInstr &LastTagStore = TagStores[TagStores.size() - 1]; Size = LastTagStore.Offset - FirstTagStore.Offset + LastTagStore.Size; DL = TagStores[0].MI->getDebugLoc(); Register Reg; FrameRegOffset = TFI->resolveFrameOffsetReference( *MF, FirstTagStore.Offset, false /*isFixed*/, false /*isSVE*/, Reg, /*PreferFP=*/false, /*ForSimm=*/true); FrameReg = Reg; FrameRegUpdate = None; mergeMemRefs(TagStores, CombinedMemRefs); LLVM_DEBUG(dbgs() << "Replacing adjacent STG instructions:\n"; for (const auto &Instr : TagStores) { dbgs() << " " << *Instr.MI; }); // Size threshold where a loop becomes shorter than a linear sequence of // tagging instructions. const int kSetTagLoopThreshold = 176; if (Size < kSetTagLoopThreshold) { if (TagStores.size() < 2) return; emitUnrolled(InsertI); } else { MachineInstr *UpdateInstr = nullptr; int64_t TotalOffset; if (IsLast) { // See if we can merge base register update into the STGloop. // This is done in AArch64LoadStoreOptimizer for "normal" stores, // but STGloop is way too unusual for that, and also it only // realistically happens in function epilogue. Also, STGloop is expanded // before that pass. if (InsertI != MBB->end() && canMergeRegUpdate(InsertI, FrameReg, FrameRegOffset.getBytes() + Size, &TotalOffset)) { UpdateInstr = &*InsertI++; LLVM_DEBUG(dbgs() << "Folding SP update into loop:\n " << *UpdateInstr); } } if (!UpdateInstr && TagStores.size() < 2) return; if (UpdateInstr) { FrameRegUpdate = TotalOffset; FrameRegUpdateFlags = UpdateInstr->getFlags(); } emitLoop(InsertI); if (UpdateInstr) UpdateInstr->eraseFromParent(); } for (auto &TS : TagStores) TS.MI->eraseFromParent(); } bool isMergeableStackTaggingInstruction(MachineInstr &MI, int64_t &Offset, int64_t &Size, bool &ZeroData) { MachineFunction &MF = *MI.getParent()->getParent(); const MachineFrameInfo &MFI = MF.getFrameInfo(); unsigned Opcode = MI.getOpcode(); ZeroData = (Opcode == AArch64::STZGloop || Opcode == AArch64::STZGOffset || Opcode == AArch64::STZ2GOffset); if (Opcode == AArch64::STGloop || Opcode == AArch64::STZGloop) { if (!MI.getOperand(0).isDead() || !MI.getOperand(1).isDead()) return false; if (!MI.getOperand(2).isImm() || !MI.getOperand(3).isFI()) return false; Offset = MFI.getObjectOffset(MI.getOperand(3).getIndex()); Size = MI.getOperand(2).getImm(); return true; } if (Opcode == AArch64::STGOffset || Opcode == AArch64::STZGOffset) Size = 16; else if (Opcode == AArch64::ST2GOffset || Opcode == AArch64::STZ2GOffset) Size = 32; else return false; if (MI.getOperand(0).getReg() != AArch64::SP || !MI.getOperand(1).isFI()) return false; Offset = MFI.getObjectOffset(MI.getOperand(1).getIndex()) + 16 * MI.getOperand(2).getImm(); return true; } // Detect a run of memory tagging instructions for adjacent stack frame slots, // and replace them with a shorter instruction sequence: // * replace STG + STG with ST2G // * replace STGloop + STGloop with STGloop // This code needs to run when stack slot offsets are already known, but before // FrameIndex operands in STG instructions are eliminated. MachineBasicBlock::iterator tryMergeAdjacentSTG(MachineBasicBlock::iterator II, const AArch64FrameLowering *TFI, RegScavenger *RS) { bool FirstZeroData; int64_t Size, Offset; MachineInstr &MI = *II; MachineBasicBlock *MBB = MI.getParent(); MachineBasicBlock::iterator NextI = ++II; if (&MI == &MBB->instr_back()) return II; if (!isMergeableStackTaggingInstruction(MI, Offset, Size, FirstZeroData)) return II; SmallVector Instrs; Instrs.emplace_back(&MI, Offset, Size); constexpr int kScanLimit = 10; int Count = 0; for (MachineBasicBlock::iterator E = MBB->end(); NextI != E && Count < kScanLimit; ++NextI) { MachineInstr &MI = *NextI; bool ZeroData; int64_t Size, Offset; // Collect instructions that update memory tags with a FrameIndex operand // and (when applicable) constant size, and whose output registers are dead // (the latter is almost always the case in practice). Since these // instructions effectively have no inputs or outputs, we are free to skip // any non-aliasing instructions in between without tracking used registers. if (isMergeableStackTaggingInstruction(MI, Offset, Size, ZeroData)) { if (ZeroData != FirstZeroData) break; Instrs.emplace_back(&MI, Offset, Size); continue; } // Only count non-transient, non-tagging instructions toward the scan // limit. if (!MI.isTransient()) ++Count; // Just in case, stop before the epilogue code starts. if (MI.getFlag(MachineInstr::FrameSetup) || MI.getFlag(MachineInstr::FrameDestroy)) break; // Reject anything that may alias the collected instructions. if (MI.mayLoadOrStore() || MI.hasUnmodeledSideEffects()) break; } // New code will be inserted after the last tagging instruction we've found. MachineBasicBlock::iterator InsertI = Instrs.back().MI; InsertI++; llvm::stable_sort(Instrs, [](const TagStoreInstr &Left, const TagStoreInstr &Right) { return Left.Offset < Right.Offset; }); // Make sure that we don't have any overlapping stores. int64_t CurOffset = Instrs[0].Offset; for (auto &Instr : Instrs) { if (CurOffset > Instr.Offset) return NextI; CurOffset = Instr.Offset + Instr.Size; } // Find contiguous runs of tagged memory and emit shorter instruction // sequencies for them when possible. TagStoreEdit TSE(MBB, FirstZeroData); Optional EndOffset; for (auto &Instr : Instrs) { if (EndOffset && *EndOffset != Instr.Offset) { // Found a gap. TSE.emitCode(InsertI, TFI, /*IsLast = */ false); TSE.clear(); } TSE.addInstruction(Instr); EndOffset = Instr.Offset + Instr.Size; } TSE.emitCode(InsertI, TFI, /*IsLast = */ true); return InsertI; } } // namespace void AArch64FrameLowering::processFunctionBeforeFrameIndicesReplaced( MachineFunction &MF, RegScavenger *RS = nullptr) const { if (StackTaggingMergeSetTag) for (auto &BB : MF) for (MachineBasicBlock::iterator II = BB.begin(); II != BB.end();) II = tryMergeAdjacentSTG(II, this, RS); } /// For Win64 AArch64 EH, the offset to the Unwind object is from the SP /// before the update. This is easily retrieved as it is exactly the offset /// that is set in processFunctionBeforeFrameFinalized. int AArch64FrameLowering::getFrameIndexReferencePreferSP( const MachineFunction &MF, int FI, Register &FrameReg, bool IgnoreSPUpdates) const { const MachineFrameInfo &MFI = MF.getFrameInfo(); if (IgnoreSPUpdates) { LLVM_DEBUG(dbgs() << "Offset from the SP for " << FI << " is " << MFI.getObjectOffset(FI) << "\n"); FrameReg = AArch64::SP; return MFI.getObjectOffset(FI); } return getFrameIndexReference(MF, FI, FrameReg); } /// The parent frame offset (aka dispFrame) is only used on X86_64 to retrieve /// the parent's frame pointer unsigned AArch64FrameLowering::getWinEHParentFrameOffset( const MachineFunction &MF) const { return 0; } /// Funclets only need to account for space for the callee saved registers, /// as the locals are accounted for in the parent's stack frame. unsigned AArch64FrameLowering::getWinEHFuncletFrameSize( const MachineFunction &MF) const { // This is the size of the pushed CSRs. unsigned CSSize = MF.getInfo()->getCalleeSavedStackSize(); // This is the amount of stack a funclet needs to allocate. return alignTo(CSSize + MF.getFrameInfo().getMaxCallFrameSize(), getStackAlign()); } diff --git a/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp b/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp index fda514b2006c..16820fea0a7d 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp @@ -1,4952 +1,4953 @@ //===-- AArch64ISelDAGToDAG.cpp - A dag to dag inst selector for AArch64 --===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // // This file defines an instruction selector for the AArch64 target. // //===----------------------------------------------------------------------===// +#include "AArch64MachineFunctionInfo.h" #include "AArch64TargetMachine.h" #include "MCTargetDesc/AArch64AddressingModes.h" #include "llvm/ADT/APSInt.h" #include "llvm/CodeGen/SelectionDAGISel.h" #include "llvm/IR/Function.h" // To access function attributes. #include "llvm/IR/GlobalValue.h" #include "llvm/IR/Intrinsics.h" #include "llvm/IR/IntrinsicsAArch64.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/KnownBits.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/raw_ostream.h" using namespace llvm; #define DEBUG_TYPE "aarch64-isel" //===--------------------------------------------------------------------===// /// AArch64DAGToDAGISel - AArch64 specific code to select AArch64 machine /// instructions for SelectionDAG operations. /// namespace { class AArch64DAGToDAGISel : public SelectionDAGISel { /// Subtarget - Keep a pointer to the AArch64Subtarget around so that we can /// make the right decision when generating code for different targets. const AArch64Subtarget *Subtarget; public: explicit AArch64DAGToDAGISel(AArch64TargetMachine &tm, CodeGenOpt::Level OptLevel) : SelectionDAGISel(tm, OptLevel), Subtarget(nullptr) {} StringRef getPassName() const override { return "AArch64 Instruction Selection"; } bool runOnMachineFunction(MachineFunction &MF) override { Subtarget = &MF.getSubtarget(); return SelectionDAGISel::runOnMachineFunction(MF); } void Select(SDNode *Node) override; /// SelectInlineAsmMemoryOperand - Implement addressing mode selection for /// inline asm expressions. bool SelectInlineAsmMemoryOperand(const SDValue &Op, unsigned ConstraintID, std::vector &OutOps) override; template bool SelectRDVLImm(SDValue N, SDValue &Imm); bool tryMLAV64LaneV128(SDNode *N); bool tryMULLV64LaneV128(unsigned IntNo, SDNode *N); bool SelectArithExtendedRegister(SDValue N, SDValue &Reg, SDValue &Shift); bool SelectArithImmed(SDValue N, SDValue &Val, SDValue &Shift); bool SelectNegArithImmed(SDValue N, SDValue &Val, SDValue &Shift); bool SelectArithShiftedRegister(SDValue N, SDValue &Reg, SDValue &Shift) { return SelectShiftedRegister(N, false, Reg, Shift); } bool SelectLogicalShiftedRegister(SDValue N, SDValue &Reg, SDValue &Shift) { return SelectShiftedRegister(N, true, Reg, Shift); } bool SelectAddrModeIndexed7S8(SDValue N, SDValue &Base, SDValue &OffImm) { return SelectAddrModeIndexed7S(N, 1, Base, OffImm); } bool SelectAddrModeIndexed7S16(SDValue N, SDValue &Base, SDValue &OffImm) { return SelectAddrModeIndexed7S(N, 2, Base, OffImm); } bool SelectAddrModeIndexed7S32(SDValue N, SDValue &Base, SDValue &OffImm) { return SelectAddrModeIndexed7S(N, 4, Base, OffImm); } bool SelectAddrModeIndexed7S64(SDValue N, SDValue &Base, SDValue &OffImm) { return SelectAddrModeIndexed7S(N, 8, Base, OffImm); } bool SelectAddrModeIndexed7S128(SDValue N, SDValue &Base, SDValue &OffImm) { return SelectAddrModeIndexed7S(N, 16, Base, OffImm); } bool SelectAddrModeIndexedS9S128(SDValue N, SDValue &Base, SDValue &OffImm) { return SelectAddrModeIndexedBitWidth(N, true, 9, 16, Base, OffImm); } bool SelectAddrModeIndexedU6S128(SDValue N, SDValue &Base, SDValue &OffImm) { return SelectAddrModeIndexedBitWidth(N, false, 6, 16, Base, OffImm); } bool SelectAddrModeIndexed8(SDValue N, SDValue &Base, SDValue &OffImm) { return SelectAddrModeIndexed(N, 1, Base, OffImm); } bool SelectAddrModeIndexed16(SDValue N, SDValue &Base, SDValue &OffImm) { return SelectAddrModeIndexed(N, 2, Base, OffImm); } bool SelectAddrModeIndexed32(SDValue N, SDValue &Base, SDValue &OffImm) { return SelectAddrModeIndexed(N, 4, Base, OffImm); } bool SelectAddrModeIndexed64(SDValue N, SDValue &Base, SDValue &OffImm) { return SelectAddrModeIndexed(N, 8, Base, OffImm); } bool SelectAddrModeIndexed128(SDValue N, SDValue &Base, SDValue &OffImm) { return SelectAddrModeIndexed(N, 16, Base, OffImm); } bool SelectAddrModeUnscaled8(SDValue N, SDValue &Base, SDValue &OffImm) { return SelectAddrModeUnscaled(N, 1, Base, OffImm); } bool SelectAddrModeUnscaled16(SDValue N, SDValue &Base, SDValue &OffImm) { return SelectAddrModeUnscaled(N, 2, Base, OffImm); } bool SelectAddrModeUnscaled32(SDValue N, SDValue &Base, SDValue &OffImm) { return SelectAddrModeUnscaled(N, 4, Base, OffImm); } bool SelectAddrModeUnscaled64(SDValue N, SDValue &Base, SDValue &OffImm) { return SelectAddrModeUnscaled(N, 8, Base, OffImm); } bool SelectAddrModeUnscaled128(SDValue N, SDValue &Base, SDValue &OffImm) { return SelectAddrModeUnscaled(N, 16, Base, OffImm); } template bool SelectAddrModeWRO(SDValue N, SDValue &Base, SDValue &Offset, SDValue &SignExtend, SDValue &DoShift) { return SelectAddrModeWRO(N, Width / 8, Base, Offset, SignExtend, DoShift); } template bool SelectAddrModeXRO(SDValue N, SDValue &Base, SDValue &Offset, SDValue &SignExtend, SDValue &DoShift) { return SelectAddrModeXRO(N, Width / 8, Base, Offset, SignExtend, DoShift); } bool SelectDupZeroOrUndef(SDValue N) { switch(N->getOpcode()) { case ISD::UNDEF: return true; case AArch64ISD::DUP: case ISD::SPLAT_VECTOR: { auto Opnd0 = N->getOperand(0); if (auto CN = dyn_cast(Opnd0)) if (CN->isNullValue()) return true; if (auto CN = dyn_cast(Opnd0)) if (CN->isZero()) return true; break; } default: break; } return false; } bool SelectDupZero(SDValue N) { switch(N->getOpcode()) { case AArch64ISD::DUP: case ISD::SPLAT_VECTOR: { auto Opnd0 = N->getOperand(0); if (auto CN = dyn_cast(Opnd0)) if (CN->isNullValue()) return true; if (auto CN = dyn_cast(Opnd0)) if (CN->isZero()) return true; break; } } return false; } template bool SelectSVEAddSubImm(SDValue N, SDValue &Imm, SDValue &Shift) { return SelectSVEAddSubImm(N, VT, Imm, Shift); } template bool SelectSVELogicalImm(SDValue N, SDValue &Imm) { return SelectSVELogicalImm(N, VT, Imm); } template bool SelectSVEShiftImm(SDValue N, SDValue &Imm) { return SelectSVEShiftImm(N, Low, High, AllowSaturation, Imm); } // Returns a suitable CNT/INC/DEC/RDVL multiplier to calculate VSCALE*N. template bool SelectCntImm(SDValue N, SDValue &Imm) { if (!isa(N)) return false; int64_t MulImm = cast(N)->getSExtValue(); if (Shift) MulImm = 1LL << MulImm; if ((MulImm % std::abs(Scale)) != 0) return false; MulImm /= Scale; if ((MulImm >= Min) && (MulImm <= Max)) { Imm = CurDAG->getTargetConstant(MulImm, SDLoc(N), MVT::i32); return true; } return false; } /// Form sequences of consecutive 64/128-bit registers for use in NEON /// instructions making use of a vector-list (e.g. ldN, tbl). Vecs must have /// between 1 and 4 elements. If it contains a single element that is returned /// unchanged; otherwise a REG_SEQUENCE value is returned. SDValue createDTuple(ArrayRef Vecs); SDValue createQTuple(ArrayRef Vecs); // Form a sequence of SVE registers for instructions using list of vectors, // e.g. structured loads and stores (ldN, stN). SDValue createZTuple(ArrayRef Vecs); /// Generic helper for the createDTuple/createQTuple /// functions. Those should almost always be called instead. SDValue createTuple(ArrayRef Vecs, const unsigned RegClassIDs[], const unsigned SubRegs[]); void SelectTable(SDNode *N, unsigned NumVecs, unsigned Opc, bool isExt); bool tryIndexedLoad(SDNode *N); bool trySelectStackSlotTagP(SDNode *N); void SelectTagP(SDNode *N); void SelectLoad(SDNode *N, unsigned NumVecs, unsigned Opc, unsigned SubRegIdx); void SelectPostLoad(SDNode *N, unsigned NumVecs, unsigned Opc, unsigned SubRegIdx); void SelectLoadLane(SDNode *N, unsigned NumVecs, unsigned Opc); void SelectPostLoadLane(SDNode *N, unsigned NumVecs, unsigned Opc); void SelectPredicatedLoad(SDNode *N, unsigned NumVecs, unsigned Scale, unsigned Opc_rr, unsigned Opc_ri); bool SelectAddrModeFrameIndexSVE(SDValue N, SDValue &Base, SDValue &OffImm); /// SVE Reg+Imm addressing mode. template bool SelectAddrModeIndexedSVE(SDNode *Root, SDValue N, SDValue &Base, SDValue &OffImm); /// SVE Reg+Reg address mode. template bool SelectSVERegRegAddrMode(SDValue N, SDValue &Base, SDValue &Offset) { return SelectSVERegRegAddrMode(N, Scale, Base, Offset); } void SelectStore(SDNode *N, unsigned NumVecs, unsigned Opc); void SelectPostStore(SDNode *N, unsigned NumVecs, unsigned Opc); void SelectStoreLane(SDNode *N, unsigned NumVecs, unsigned Opc); void SelectPostStoreLane(SDNode *N, unsigned NumVecs, unsigned Opc); void SelectPredicatedStore(SDNode *N, unsigned NumVecs, unsigned Scale, unsigned Opc_rr, unsigned Opc_ri); std::tuple findAddrModeSVELoadStore(SDNode *N, unsigned Opc_rr, unsigned Opc_ri, const SDValue &OldBase, const SDValue &OldOffset, unsigned Scale); bool tryBitfieldExtractOp(SDNode *N); bool tryBitfieldExtractOpFromSExt(SDNode *N); bool tryBitfieldInsertOp(SDNode *N); bool tryBitfieldInsertInZeroOp(SDNode *N); bool tryShiftAmountMod(SDNode *N); bool tryHighFPExt(SDNode *N); bool tryReadRegister(SDNode *N); bool tryWriteRegister(SDNode *N); // Include the pieces autogenerated from the target description. #include "AArch64GenDAGISel.inc" private: bool SelectShiftedRegister(SDValue N, bool AllowROR, SDValue &Reg, SDValue &Shift); bool SelectAddrModeIndexed7S(SDValue N, unsigned Size, SDValue &Base, SDValue &OffImm) { return SelectAddrModeIndexedBitWidth(N, true, 7, Size, Base, OffImm); } bool SelectAddrModeIndexedBitWidth(SDValue N, bool IsSignedImm, unsigned BW, unsigned Size, SDValue &Base, SDValue &OffImm); bool SelectAddrModeIndexed(SDValue N, unsigned Size, SDValue &Base, SDValue &OffImm); bool SelectAddrModeUnscaled(SDValue N, unsigned Size, SDValue &Base, SDValue &OffImm); bool SelectAddrModeWRO(SDValue N, unsigned Size, SDValue &Base, SDValue &Offset, SDValue &SignExtend, SDValue &DoShift); bool SelectAddrModeXRO(SDValue N, unsigned Size, SDValue &Base, SDValue &Offset, SDValue &SignExtend, SDValue &DoShift); bool isWorthFolding(SDValue V) const; bool SelectExtendedSHL(SDValue N, unsigned Size, bool WantExtend, SDValue &Offset, SDValue &SignExtend); template bool SelectCVTFixedPosOperand(SDValue N, SDValue &FixedPos) { return SelectCVTFixedPosOperand(N, FixedPos, RegWidth); } bool SelectCVTFixedPosOperand(SDValue N, SDValue &FixedPos, unsigned Width); bool SelectCMP_SWAP(SDNode *N); bool SelectSVE8BitLslImm(SDValue N, SDValue &Imm, SDValue &Shift); bool SelectSVEAddSubImm(SDValue N, MVT VT, SDValue &Imm, SDValue &Shift); bool SelectSVELogicalImm(SDValue N, MVT VT, SDValue &Imm); bool SelectSVESignedArithImm(SDValue N, SDValue &Imm); bool SelectSVEShiftImm(SDValue N, uint64_t Low, uint64_t High, bool AllowSaturation, SDValue &Imm); bool SelectSVEArithImm(SDValue N, SDValue &Imm); bool SelectSVERegRegAddrMode(SDValue N, unsigned Scale, SDValue &Base, SDValue &Offset); }; } // end anonymous namespace /// isIntImmediate - This method tests to see if the node is a constant /// operand. If so Imm will receive the 32-bit value. static bool isIntImmediate(const SDNode *N, uint64_t &Imm) { if (const ConstantSDNode *C = dyn_cast(N)) { Imm = C->getZExtValue(); return true; } return false; } // isIntImmediate - This method tests to see if a constant operand. // If so Imm will receive the value. static bool isIntImmediate(SDValue N, uint64_t &Imm) { return isIntImmediate(N.getNode(), Imm); } // isOpcWithIntImmediate - This method tests to see if the node is a specific // opcode and that it has a immediate integer right operand. // If so Imm will receive the 32 bit value. static bool isOpcWithIntImmediate(const SDNode *N, unsigned Opc, uint64_t &Imm) { return N->getOpcode() == Opc && isIntImmediate(N->getOperand(1).getNode(), Imm); } bool AArch64DAGToDAGISel::SelectInlineAsmMemoryOperand( const SDValue &Op, unsigned ConstraintID, std::vector &OutOps) { switch(ConstraintID) { default: llvm_unreachable("Unexpected asm memory constraint"); case InlineAsm::Constraint_m: case InlineAsm::Constraint_Q: // We need to make sure that this one operand does not end up in XZR, thus // require the address to be in a PointerRegClass register. const TargetRegisterInfo *TRI = Subtarget->getRegisterInfo(); const TargetRegisterClass *TRC = TRI->getPointerRegClass(*MF); SDLoc dl(Op); SDValue RC = CurDAG->getTargetConstant(TRC->getID(), dl, MVT::i64); SDValue NewOp = SDValue(CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS, dl, Op.getValueType(), Op, RC), 0); OutOps.push_back(NewOp); return false; } return true; } /// SelectArithImmed - Select an immediate value that can be represented as /// a 12-bit value shifted left by either 0 or 12. If so, return true with /// Val set to the 12-bit value and Shift set to the shifter operand. bool AArch64DAGToDAGISel::SelectArithImmed(SDValue N, SDValue &Val, SDValue &Shift) { // This function is called from the addsub_shifted_imm ComplexPattern, // which lists [imm] as the list of opcode it's interested in, however // we still need to check whether the operand is actually an immediate // here because the ComplexPattern opcode list is only used in // root-level opcode matching. if (!isa(N.getNode())) return false; uint64_t Immed = cast(N.getNode())->getZExtValue(); unsigned ShiftAmt; if (Immed >> 12 == 0) { ShiftAmt = 0; } else if ((Immed & 0xfff) == 0 && Immed >> 24 == 0) { ShiftAmt = 12; Immed = Immed >> 12; } else return false; unsigned ShVal = AArch64_AM::getShifterImm(AArch64_AM::LSL, ShiftAmt); SDLoc dl(N); Val = CurDAG->getTargetConstant(Immed, dl, MVT::i32); Shift = CurDAG->getTargetConstant(ShVal, dl, MVT::i32); return true; } /// SelectNegArithImmed - As above, but negates the value before trying to /// select it. bool AArch64DAGToDAGISel::SelectNegArithImmed(SDValue N, SDValue &Val, SDValue &Shift) { // This function is called from the addsub_shifted_imm ComplexPattern, // which lists [imm] as the list of opcode it's interested in, however // we still need to check whether the operand is actually an immediate // here because the ComplexPattern opcode list is only used in // root-level opcode matching. if (!isa(N.getNode())) return false; // The immediate operand must be a 24-bit zero-extended immediate. uint64_t Immed = cast(N.getNode())->getZExtValue(); // This negation is almost always valid, but "cmp wN, #0" and "cmn wN, #0" // have the opposite effect on the C flag, so this pattern mustn't match under // those circumstances. if (Immed == 0) return false; if (N.getValueType() == MVT::i32) Immed = ~((uint32_t)Immed) + 1; else Immed = ~Immed + 1ULL; if (Immed & 0xFFFFFFFFFF000000ULL) return false; Immed &= 0xFFFFFFULL; return SelectArithImmed(CurDAG->getConstant(Immed, SDLoc(N), MVT::i32), Val, Shift); } /// getShiftTypeForNode - Translate a shift node to the corresponding /// ShiftType value. static AArch64_AM::ShiftExtendType getShiftTypeForNode(SDValue N) { switch (N.getOpcode()) { default: return AArch64_AM::InvalidShiftExtend; case ISD::SHL: return AArch64_AM::LSL; case ISD::SRL: return AArch64_AM::LSR; case ISD::SRA: return AArch64_AM::ASR; case ISD::ROTR: return AArch64_AM::ROR; } } /// Determine whether it is worth it to fold SHL into the addressing /// mode. static bool isWorthFoldingSHL(SDValue V) { assert(V.getOpcode() == ISD::SHL && "invalid opcode"); // It is worth folding logical shift of up to three places. auto *CSD = dyn_cast(V.getOperand(1)); if (!CSD) return false; unsigned ShiftVal = CSD->getZExtValue(); if (ShiftVal > 3) return false; // Check if this particular node is reused in any non-memory related // operation. If yes, do not try to fold this node into the address // computation, since the computation will be kept. const SDNode *Node = V.getNode(); for (SDNode *UI : Node->uses()) if (!isa(*UI)) for (SDNode *UII : UI->uses()) if (!isa(*UII)) return false; return true; } /// Determine whether it is worth to fold V into an extended register. bool AArch64DAGToDAGISel::isWorthFolding(SDValue V) const { // Trivial if we are optimizing for code size or if there is only // one use of the value. if (CurDAG->shouldOptForSize() || V.hasOneUse()) return true; // If a subtarget has a fastpath LSL we can fold a logical shift into // the addressing mode and save a cycle. if (Subtarget->hasLSLFast() && V.getOpcode() == ISD::SHL && isWorthFoldingSHL(V)) return true; if (Subtarget->hasLSLFast() && V.getOpcode() == ISD::ADD) { const SDValue LHS = V.getOperand(0); const SDValue RHS = V.getOperand(1); if (LHS.getOpcode() == ISD::SHL && isWorthFoldingSHL(LHS)) return true; if (RHS.getOpcode() == ISD::SHL && isWorthFoldingSHL(RHS)) return true; } // It hurts otherwise, since the value will be reused. return false; } /// SelectShiftedRegister - Select a "shifted register" operand. If the value /// is not shifted, set the Shift operand to default of "LSL 0". The logical /// instructions allow the shifted register to be rotated, but the arithmetic /// instructions do not. The AllowROR parameter specifies whether ROR is /// supported. bool AArch64DAGToDAGISel::SelectShiftedRegister(SDValue N, bool AllowROR, SDValue &Reg, SDValue &Shift) { AArch64_AM::ShiftExtendType ShType = getShiftTypeForNode(N); if (ShType == AArch64_AM::InvalidShiftExtend) return false; if (!AllowROR && ShType == AArch64_AM::ROR) return false; if (ConstantSDNode *RHS = dyn_cast(N.getOperand(1))) { unsigned BitSize = N.getValueSizeInBits(); unsigned Val = RHS->getZExtValue() & (BitSize - 1); unsigned ShVal = AArch64_AM::getShifterImm(ShType, Val); Reg = N.getOperand(0); Shift = CurDAG->getTargetConstant(ShVal, SDLoc(N), MVT::i32); return isWorthFolding(N); } return false; } /// getExtendTypeForNode - Translate an extend node to the corresponding /// ExtendType value. static AArch64_AM::ShiftExtendType getExtendTypeForNode(SDValue N, bool IsLoadStore = false) { if (N.getOpcode() == ISD::SIGN_EXTEND || N.getOpcode() == ISD::SIGN_EXTEND_INREG) { EVT SrcVT; if (N.getOpcode() == ISD::SIGN_EXTEND_INREG) SrcVT = cast(N.getOperand(1))->getVT(); else SrcVT = N.getOperand(0).getValueType(); if (!IsLoadStore && SrcVT == MVT::i8) return AArch64_AM::SXTB; else if (!IsLoadStore && SrcVT == MVT::i16) return AArch64_AM::SXTH; else if (SrcVT == MVT::i32) return AArch64_AM::SXTW; assert(SrcVT != MVT::i64 && "extend from 64-bits?"); return AArch64_AM::InvalidShiftExtend; } else if (N.getOpcode() == ISD::ZERO_EXTEND || N.getOpcode() == ISD::ANY_EXTEND) { EVT SrcVT = N.getOperand(0).getValueType(); if (!IsLoadStore && SrcVT == MVT::i8) return AArch64_AM::UXTB; else if (!IsLoadStore && SrcVT == MVT::i16) return AArch64_AM::UXTH; else if (SrcVT == MVT::i32) return AArch64_AM::UXTW; assert(SrcVT != MVT::i64 && "extend from 64-bits?"); return AArch64_AM::InvalidShiftExtend; } else if (N.getOpcode() == ISD::AND) { ConstantSDNode *CSD = dyn_cast(N.getOperand(1)); if (!CSD) return AArch64_AM::InvalidShiftExtend; uint64_t AndMask = CSD->getZExtValue(); switch (AndMask) { default: return AArch64_AM::InvalidShiftExtend; case 0xFF: return !IsLoadStore ? AArch64_AM::UXTB : AArch64_AM::InvalidShiftExtend; case 0xFFFF: return !IsLoadStore ? AArch64_AM::UXTH : AArch64_AM::InvalidShiftExtend; case 0xFFFFFFFF: return AArch64_AM::UXTW; } } return AArch64_AM::InvalidShiftExtend; } // Helper for SelectMLAV64LaneV128 - Recognize high lane extracts. static bool checkHighLaneIndex(SDNode *DL, SDValue &LaneOp, int &LaneIdx) { if (DL->getOpcode() != AArch64ISD::DUPLANE16 && DL->getOpcode() != AArch64ISD::DUPLANE32) return false; SDValue SV = DL->getOperand(0); if (SV.getOpcode() != ISD::INSERT_SUBVECTOR) return false; SDValue EV = SV.getOperand(1); if (EV.getOpcode() != ISD::EXTRACT_SUBVECTOR) return false; ConstantSDNode *DLidx = cast(DL->getOperand(1).getNode()); ConstantSDNode *EVidx = cast(EV.getOperand(1).getNode()); LaneIdx = DLidx->getSExtValue() + EVidx->getSExtValue(); LaneOp = EV.getOperand(0); return true; } // Helper for SelectOpcV64LaneV128 - Recognize operations where one operand is a // high lane extract. static bool checkV64LaneV128(SDValue Op0, SDValue Op1, SDValue &StdOp, SDValue &LaneOp, int &LaneIdx) { if (!checkHighLaneIndex(Op0.getNode(), LaneOp, LaneIdx)) { std::swap(Op0, Op1); if (!checkHighLaneIndex(Op0.getNode(), LaneOp, LaneIdx)) return false; } StdOp = Op1; return true; } /// SelectMLAV64LaneV128 - AArch64 supports vector MLAs where one multiplicand /// is a lane in the upper half of a 128-bit vector. Recognize and select this /// so that we don't emit unnecessary lane extracts. bool AArch64DAGToDAGISel::tryMLAV64LaneV128(SDNode *N) { SDLoc dl(N); SDValue Op0 = N->getOperand(0); SDValue Op1 = N->getOperand(1); SDValue MLAOp1; // Will hold ordinary multiplicand for MLA. SDValue MLAOp2; // Will hold lane-accessed multiplicand for MLA. int LaneIdx = -1; // Will hold the lane index. if (Op1.getOpcode() != ISD::MUL || !checkV64LaneV128(Op1.getOperand(0), Op1.getOperand(1), MLAOp1, MLAOp2, LaneIdx)) { std::swap(Op0, Op1); if (Op1.getOpcode() != ISD::MUL || !checkV64LaneV128(Op1.getOperand(0), Op1.getOperand(1), MLAOp1, MLAOp2, LaneIdx)) return false; } SDValue LaneIdxVal = CurDAG->getTargetConstant(LaneIdx, dl, MVT::i64); SDValue Ops[] = { Op0, MLAOp1, MLAOp2, LaneIdxVal }; unsigned MLAOpc = ~0U; switch (N->getSimpleValueType(0).SimpleTy) { default: llvm_unreachable("Unrecognized MLA."); case MVT::v4i16: MLAOpc = AArch64::MLAv4i16_indexed; break; case MVT::v8i16: MLAOpc = AArch64::MLAv8i16_indexed; break; case MVT::v2i32: MLAOpc = AArch64::MLAv2i32_indexed; break; case MVT::v4i32: MLAOpc = AArch64::MLAv4i32_indexed; break; } ReplaceNode(N, CurDAG->getMachineNode(MLAOpc, dl, N->getValueType(0), Ops)); return true; } bool AArch64DAGToDAGISel::tryMULLV64LaneV128(unsigned IntNo, SDNode *N) { SDLoc dl(N); SDValue SMULLOp0; SDValue SMULLOp1; int LaneIdx; if (!checkV64LaneV128(N->getOperand(1), N->getOperand(2), SMULLOp0, SMULLOp1, LaneIdx)) return false; SDValue LaneIdxVal = CurDAG->getTargetConstant(LaneIdx, dl, MVT::i64); SDValue Ops[] = { SMULLOp0, SMULLOp1, LaneIdxVal }; unsigned SMULLOpc = ~0U; if (IntNo == Intrinsic::aarch64_neon_smull) { switch (N->getSimpleValueType(0).SimpleTy) { default: llvm_unreachable("Unrecognized SMULL."); case MVT::v4i32: SMULLOpc = AArch64::SMULLv4i16_indexed; break; case MVT::v2i64: SMULLOpc = AArch64::SMULLv2i32_indexed; break; } } else if (IntNo == Intrinsic::aarch64_neon_umull) { switch (N->getSimpleValueType(0).SimpleTy) { default: llvm_unreachable("Unrecognized SMULL."); case MVT::v4i32: SMULLOpc = AArch64::UMULLv4i16_indexed; break; case MVT::v2i64: SMULLOpc = AArch64::UMULLv2i32_indexed; break; } } else llvm_unreachable("Unrecognized intrinsic."); ReplaceNode(N, CurDAG->getMachineNode(SMULLOpc, dl, N->getValueType(0), Ops)); return true; } /// Instructions that accept extend modifiers like UXTW expect the register /// being extended to be a GPR32, but the incoming DAG might be acting on a /// GPR64 (either via SEXT_INREG or AND). Extract the appropriate low bits if /// this is the case. static SDValue narrowIfNeeded(SelectionDAG *CurDAG, SDValue N) { if (N.getValueType() == MVT::i32) return N; SDLoc dl(N); SDValue SubReg = CurDAG->getTargetConstant(AArch64::sub_32, dl, MVT::i32); MachineSDNode *Node = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG, dl, MVT::i32, N, SubReg); return SDValue(Node, 0); } // Returns a suitable CNT/INC/DEC/RDVL multiplier to calculate VSCALE*N. template bool AArch64DAGToDAGISel::SelectRDVLImm(SDValue N, SDValue &Imm) { if (!isa(N)) return false; int64_t MulImm = cast(N)->getSExtValue(); if ((MulImm % std::abs(Scale)) == 0) { int64_t RDVLImm = MulImm / Scale; if ((RDVLImm >= Low) && (RDVLImm <= High)) { Imm = CurDAG->getTargetConstant(RDVLImm, SDLoc(N), MVT::i32); return true; } } return false; } /// SelectArithExtendedRegister - Select a "extended register" operand. This /// operand folds in an extend followed by an optional left shift. bool AArch64DAGToDAGISel::SelectArithExtendedRegister(SDValue N, SDValue &Reg, SDValue &Shift) { unsigned ShiftVal = 0; AArch64_AM::ShiftExtendType Ext; if (N.getOpcode() == ISD::SHL) { ConstantSDNode *CSD = dyn_cast(N.getOperand(1)); if (!CSD) return false; ShiftVal = CSD->getZExtValue(); if (ShiftVal > 4) return false; Ext = getExtendTypeForNode(N.getOperand(0)); if (Ext == AArch64_AM::InvalidShiftExtend) return false; Reg = N.getOperand(0).getOperand(0); } else { Ext = getExtendTypeForNode(N); if (Ext == AArch64_AM::InvalidShiftExtend) return false; Reg = N.getOperand(0); // Don't match if free 32-bit -> 64-bit zext can be used instead. if (Ext == AArch64_AM::UXTW && Reg->getValueType(0).getSizeInBits() == 32 && isDef32(*Reg.getNode())) return false; } // AArch64 mandates that the RHS of the operation must use the smallest // register class that could contain the size being extended from. Thus, // if we're folding a (sext i8), we need the RHS to be a GPR32, even though // there might not be an actual 32-bit value in the program. We can // (harmlessly) synthesize one by injected an EXTRACT_SUBREG here. assert(Ext != AArch64_AM::UXTX && Ext != AArch64_AM::SXTX); Reg = narrowIfNeeded(CurDAG, Reg); Shift = CurDAG->getTargetConstant(getArithExtendImm(Ext, ShiftVal), SDLoc(N), MVT::i32); return isWorthFolding(N); } /// If there's a use of this ADDlow that's not itself a load/store then we'll /// need to create a real ADD instruction from it anyway and there's no point in /// folding it into the mem op. Theoretically, it shouldn't matter, but there's /// a single pseudo-instruction for an ADRP/ADD pair so over-aggressive folding /// leads to duplicated ADRP instructions. static bool isWorthFoldingADDlow(SDValue N) { for (auto Use : N->uses()) { if (Use->getOpcode() != ISD::LOAD && Use->getOpcode() != ISD::STORE && Use->getOpcode() != ISD::ATOMIC_LOAD && Use->getOpcode() != ISD::ATOMIC_STORE) return false; // ldar and stlr have much more restrictive addressing modes (just a // register). if (isStrongerThanMonotonic(cast(Use)->getOrdering())) return false; } return true; } /// SelectAddrModeIndexedBitWidth - Select a "register plus scaled (un)signed BW-bit /// immediate" address. The "Size" argument is the size in bytes of the memory /// reference, which determines the scale. bool AArch64DAGToDAGISel::SelectAddrModeIndexedBitWidth(SDValue N, bool IsSignedImm, unsigned BW, unsigned Size, SDValue &Base, SDValue &OffImm) { SDLoc dl(N); const DataLayout &DL = CurDAG->getDataLayout(); const TargetLowering *TLI = getTargetLowering(); if (N.getOpcode() == ISD::FrameIndex) { int FI = cast(N)->getIndex(); Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL)); OffImm = CurDAG->getTargetConstant(0, dl, MVT::i64); return true; } // As opposed to the (12-bit) Indexed addressing mode below, the 7/9-bit signed // selected here doesn't support labels/immediates, only base+offset. if (CurDAG->isBaseWithConstantOffset(N)) { if (ConstantSDNode *RHS = dyn_cast(N.getOperand(1))) { if (IsSignedImm) { int64_t RHSC = RHS->getSExtValue(); unsigned Scale = Log2_32(Size); int64_t Range = 0x1LL << (BW - 1); if ((RHSC & (Size - 1)) == 0 && RHSC >= -(Range << Scale) && RHSC < (Range << Scale)) { Base = N.getOperand(0); if (Base.getOpcode() == ISD::FrameIndex) { int FI = cast(Base)->getIndex(); Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL)); } OffImm = CurDAG->getTargetConstant(RHSC >> Scale, dl, MVT::i64); return true; } } else { // unsigned Immediate uint64_t RHSC = RHS->getZExtValue(); unsigned Scale = Log2_32(Size); uint64_t Range = 0x1ULL << BW; if ((RHSC & (Size - 1)) == 0 && RHSC < (Range << Scale)) { Base = N.getOperand(0); if (Base.getOpcode() == ISD::FrameIndex) { int FI = cast(Base)->getIndex(); Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL)); } OffImm = CurDAG->getTargetConstant(RHSC >> Scale, dl, MVT::i64); return true; } } } } // Base only. The address will be materialized into a register before // the memory is accessed. // add x0, Xbase, #offset // stp x1, x2, [x0] Base = N; OffImm = CurDAG->getTargetConstant(0, dl, MVT::i64); return true; } /// SelectAddrModeIndexed - Select a "register plus scaled unsigned 12-bit /// immediate" address. The "Size" argument is the size in bytes of the memory /// reference, which determines the scale. bool AArch64DAGToDAGISel::SelectAddrModeIndexed(SDValue N, unsigned Size, SDValue &Base, SDValue &OffImm) { SDLoc dl(N); const DataLayout &DL = CurDAG->getDataLayout(); const TargetLowering *TLI = getTargetLowering(); if (N.getOpcode() == ISD::FrameIndex) { int FI = cast(N)->getIndex(); Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL)); OffImm = CurDAG->getTargetConstant(0, dl, MVT::i64); return true; } if (N.getOpcode() == AArch64ISD::ADDlow && isWorthFoldingADDlow(N)) { GlobalAddressSDNode *GAN = dyn_cast(N.getOperand(1).getNode()); Base = N.getOperand(0); OffImm = N.getOperand(1); if (!GAN) return true; if (GAN->getOffset() % Size == 0 && GAN->getGlobal()->getPointerAlignment(DL) >= Size) return true; } if (CurDAG->isBaseWithConstantOffset(N)) { if (ConstantSDNode *RHS = dyn_cast(N.getOperand(1))) { int64_t RHSC = (int64_t)RHS->getZExtValue(); unsigned Scale = Log2_32(Size); if ((RHSC & (Size - 1)) == 0 && RHSC >= 0 && RHSC < (0x1000 << Scale)) { Base = N.getOperand(0); if (Base.getOpcode() == ISD::FrameIndex) { int FI = cast(Base)->getIndex(); Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL)); } OffImm = CurDAG->getTargetConstant(RHSC >> Scale, dl, MVT::i64); return true; } } } // Before falling back to our general case, check if the unscaled // instructions can handle this. If so, that's preferable. if (SelectAddrModeUnscaled(N, Size, Base, OffImm)) return false; // Base only. The address will be materialized into a register before // the memory is accessed. // add x0, Xbase, #offset // ldr x0, [x0] Base = N; OffImm = CurDAG->getTargetConstant(0, dl, MVT::i64); return true; } /// SelectAddrModeUnscaled - Select a "register plus unscaled signed 9-bit /// immediate" address. This should only match when there is an offset that /// is not valid for a scaled immediate addressing mode. The "Size" argument /// is the size in bytes of the memory reference, which is needed here to know /// what is valid for a scaled immediate. bool AArch64DAGToDAGISel::SelectAddrModeUnscaled(SDValue N, unsigned Size, SDValue &Base, SDValue &OffImm) { if (!CurDAG->isBaseWithConstantOffset(N)) return false; if (ConstantSDNode *RHS = dyn_cast(N.getOperand(1))) { int64_t RHSC = RHS->getSExtValue(); // If the offset is valid as a scaled immediate, don't match here. if ((RHSC & (Size - 1)) == 0 && RHSC >= 0 && RHSC < (0x1000 << Log2_32(Size))) return false; if (RHSC >= -256 && RHSC < 256) { Base = N.getOperand(0); if (Base.getOpcode() == ISD::FrameIndex) { int FI = cast(Base)->getIndex(); const TargetLowering *TLI = getTargetLowering(); Base = CurDAG->getTargetFrameIndex( FI, TLI->getPointerTy(CurDAG->getDataLayout())); } OffImm = CurDAG->getTargetConstant(RHSC, SDLoc(N), MVT::i64); return true; } } return false; } static SDValue Widen(SelectionDAG *CurDAG, SDValue N) { SDLoc dl(N); SDValue SubReg = CurDAG->getTargetConstant(AArch64::sub_32, dl, MVT::i32); SDValue ImpDef = SDValue( CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, MVT::i64), 0); MachineSDNode *Node = CurDAG->getMachineNode( TargetOpcode::INSERT_SUBREG, dl, MVT::i64, ImpDef, N, SubReg); return SDValue(Node, 0); } /// Check if the given SHL node (\p N), can be used to form an /// extended register for an addressing mode. bool AArch64DAGToDAGISel::SelectExtendedSHL(SDValue N, unsigned Size, bool WantExtend, SDValue &Offset, SDValue &SignExtend) { assert(N.getOpcode() == ISD::SHL && "Invalid opcode."); ConstantSDNode *CSD = dyn_cast(N.getOperand(1)); if (!CSD || (CSD->getZExtValue() & 0x7) != CSD->getZExtValue()) return false; SDLoc dl(N); if (WantExtend) { AArch64_AM::ShiftExtendType Ext = getExtendTypeForNode(N.getOperand(0), true); if (Ext == AArch64_AM::InvalidShiftExtend) return false; Offset = narrowIfNeeded(CurDAG, N.getOperand(0).getOperand(0)); SignExtend = CurDAG->getTargetConstant(Ext == AArch64_AM::SXTW, dl, MVT::i32); } else { Offset = N.getOperand(0); SignExtend = CurDAG->getTargetConstant(0, dl, MVT::i32); } unsigned LegalShiftVal = Log2_32(Size); unsigned ShiftVal = CSD->getZExtValue(); if (ShiftVal != 0 && ShiftVal != LegalShiftVal) return false; return isWorthFolding(N); } bool AArch64DAGToDAGISel::SelectAddrModeWRO(SDValue N, unsigned Size, SDValue &Base, SDValue &Offset, SDValue &SignExtend, SDValue &DoShift) { if (N.getOpcode() != ISD::ADD) return false; SDValue LHS = N.getOperand(0); SDValue RHS = N.getOperand(1); SDLoc dl(N); // We don't want to match immediate adds here, because they are better lowered // to the register-immediate addressing modes. if (isa(LHS) || isa(RHS)) return false; // Check if this particular node is reused in any non-memory related // operation. If yes, do not try to fold this node into the address // computation, since the computation will be kept. const SDNode *Node = N.getNode(); for (SDNode *UI : Node->uses()) { if (!isa(*UI)) return false; } // Remember if it is worth folding N when it produces extended register. bool IsExtendedRegisterWorthFolding = isWorthFolding(N); // Try to match a shifted extend on the RHS. if (IsExtendedRegisterWorthFolding && RHS.getOpcode() == ISD::SHL && SelectExtendedSHL(RHS, Size, true, Offset, SignExtend)) { Base = LHS; DoShift = CurDAG->getTargetConstant(true, dl, MVT::i32); return true; } // Try to match a shifted extend on the LHS. if (IsExtendedRegisterWorthFolding && LHS.getOpcode() == ISD::SHL && SelectExtendedSHL(LHS, Size, true, Offset, SignExtend)) { Base = RHS; DoShift = CurDAG->getTargetConstant(true, dl, MVT::i32); return true; } // There was no shift, whatever else we find. DoShift = CurDAG->getTargetConstant(false, dl, MVT::i32); AArch64_AM::ShiftExtendType Ext = AArch64_AM::InvalidShiftExtend; // Try to match an unshifted extend on the LHS. if (IsExtendedRegisterWorthFolding && (Ext = getExtendTypeForNode(LHS, true)) != AArch64_AM::InvalidShiftExtend) { Base = RHS; Offset = narrowIfNeeded(CurDAG, LHS.getOperand(0)); SignExtend = CurDAG->getTargetConstant(Ext == AArch64_AM::SXTW, dl, MVT::i32); if (isWorthFolding(LHS)) return true; } // Try to match an unshifted extend on the RHS. if (IsExtendedRegisterWorthFolding && (Ext = getExtendTypeForNode(RHS, true)) != AArch64_AM::InvalidShiftExtend) { Base = LHS; Offset = narrowIfNeeded(CurDAG, RHS.getOperand(0)); SignExtend = CurDAG->getTargetConstant(Ext == AArch64_AM::SXTW, dl, MVT::i32); if (isWorthFolding(RHS)) return true; } return false; } // Check if the given immediate is preferred by ADD. If an immediate can be // encoded in an ADD, or it can be encoded in an "ADD LSL #12" and can not be // encoded by one MOVZ, return true. static bool isPreferredADD(int64_t ImmOff) { // Constant in [0x0, 0xfff] can be encoded in ADD. if ((ImmOff & 0xfffffffffffff000LL) == 0x0LL) return true; // Check if it can be encoded in an "ADD LSL #12". if ((ImmOff & 0xffffffffff000fffLL) == 0x0LL) // As a single MOVZ is faster than a "ADD of LSL #12", ignore such constant. return (ImmOff & 0xffffffffff00ffffLL) != 0x0LL && (ImmOff & 0xffffffffffff0fffLL) != 0x0LL; return false; } bool AArch64DAGToDAGISel::SelectAddrModeXRO(SDValue N, unsigned Size, SDValue &Base, SDValue &Offset, SDValue &SignExtend, SDValue &DoShift) { if (N.getOpcode() != ISD::ADD) return false; SDValue LHS = N.getOperand(0); SDValue RHS = N.getOperand(1); SDLoc DL(N); // Check if this particular node is reused in any non-memory related // operation. If yes, do not try to fold this node into the address // computation, since the computation will be kept. const SDNode *Node = N.getNode(); for (SDNode *UI : Node->uses()) { if (!isa(*UI)) return false; } // Watch out if RHS is a wide immediate, it can not be selected into // [BaseReg+Imm] addressing mode. Also it may not be able to be encoded into // ADD/SUB. Instead it will use [BaseReg + 0] address mode and generate // instructions like: // MOV X0, WideImmediate // ADD X1, BaseReg, X0 // LDR X2, [X1, 0] // For such situation, using [BaseReg, XReg] addressing mode can save one // ADD/SUB: // MOV X0, WideImmediate // LDR X2, [BaseReg, X0] if (isa(RHS)) { int64_t ImmOff = (int64_t)cast(RHS)->getZExtValue(); unsigned Scale = Log2_32(Size); // Skip the immediate can be selected by load/store addressing mode. // Also skip the immediate can be encoded by a single ADD (SUB is also // checked by using -ImmOff). if ((ImmOff % Size == 0 && ImmOff >= 0 && ImmOff < (0x1000 << Scale)) || isPreferredADD(ImmOff) || isPreferredADD(-ImmOff)) return false; SDValue Ops[] = { RHS }; SDNode *MOVI = CurDAG->getMachineNode(AArch64::MOVi64imm, DL, MVT::i64, Ops); SDValue MOVIV = SDValue(MOVI, 0); // This ADD of two X register will be selected into [Reg+Reg] mode. N = CurDAG->getNode(ISD::ADD, DL, MVT::i64, LHS, MOVIV); } // Remember if it is worth folding N when it produces extended register. bool IsExtendedRegisterWorthFolding = isWorthFolding(N); // Try to match a shifted extend on the RHS. if (IsExtendedRegisterWorthFolding && RHS.getOpcode() == ISD::SHL && SelectExtendedSHL(RHS, Size, false, Offset, SignExtend)) { Base = LHS; DoShift = CurDAG->getTargetConstant(true, DL, MVT::i32); return true; } // Try to match a shifted extend on the LHS. if (IsExtendedRegisterWorthFolding && LHS.getOpcode() == ISD::SHL && SelectExtendedSHL(LHS, Size, false, Offset, SignExtend)) { Base = RHS; DoShift = CurDAG->getTargetConstant(true, DL, MVT::i32); return true; } // Match any non-shifted, non-extend, non-immediate add expression. Base = LHS; Offset = RHS; SignExtend = CurDAG->getTargetConstant(false, DL, MVT::i32); DoShift = CurDAG->getTargetConstant(false, DL, MVT::i32); // Reg1 + Reg2 is free: no check needed. return true; } SDValue AArch64DAGToDAGISel::createDTuple(ArrayRef Regs) { static const unsigned RegClassIDs[] = { AArch64::DDRegClassID, AArch64::DDDRegClassID, AArch64::DDDDRegClassID}; static const unsigned SubRegs[] = {AArch64::dsub0, AArch64::dsub1, AArch64::dsub2, AArch64::dsub3}; return createTuple(Regs, RegClassIDs, SubRegs); } SDValue AArch64DAGToDAGISel::createQTuple(ArrayRef Regs) { static const unsigned RegClassIDs[] = { AArch64::QQRegClassID, AArch64::QQQRegClassID, AArch64::QQQQRegClassID}; static const unsigned SubRegs[] = {AArch64::qsub0, AArch64::qsub1, AArch64::qsub2, AArch64::qsub3}; return createTuple(Regs, RegClassIDs, SubRegs); } SDValue AArch64DAGToDAGISel::createZTuple(ArrayRef Regs) { static const unsigned RegClassIDs[] = {AArch64::ZPR2RegClassID, AArch64::ZPR3RegClassID, AArch64::ZPR4RegClassID}; static const unsigned SubRegs[] = {AArch64::zsub0, AArch64::zsub1, AArch64::zsub2, AArch64::zsub3}; return createTuple(Regs, RegClassIDs, SubRegs); } SDValue AArch64DAGToDAGISel::createTuple(ArrayRef Regs, const unsigned RegClassIDs[], const unsigned SubRegs[]) { // There's no special register-class for a vector-list of 1 element: it's just // a vector. if (Regs.size() == 1) return Regs[0]; assert(Regs.size() >= 2 && Regs.size() <= 4); SDLoc DL(Regs[0]); SmallVector Ops; // First operand of REG_SEQUENCE is the desired RegClass. Ops.push_back( CurDAG->getTargetConstant(RegClassIDs[Regs.size() - 2], DL, MVT::i32)); // Then we get pairs of source & subregister-position for the components. for (unsigned i = 0; i < Regs.size(); ++i) { Ops.push_back(Regs[i]); Ops.push_back(CurDAG->getTargetConstant(SubRegs[i], DL, MVT::i32)); } SDNode *N = CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, DL, MVT::Untyped, Ops); return SDValue(N, 0); } void AArch64DAGToDAGISel::SelectTable(SDNode *N, unsigned NumVecs, unsigned Opc, bool isExt) { SDLoc dl(N); EVT VT = N->getValueType(0); unsigned ExtOff = isExt; // Form a REG_SEQUENCE to force register allocation. unsigned Vec0Off = ExtOff + 1; SmallVector Regs(N->op_begin() + Vec0Off, N->op_begin() + Vec0Off + NumVecs); SDValue RegSeq = createQTuple(Regs); SmallVector Ops; if (isExt) Ops.push_back(N->getOperand(1)); Ops.push_back(RegSeq); Ops.push_back(N->getOperand(NumVecs + ExtOff + 1)); ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, VT, Ops)); } bool AArch64DAGToDAGISel::tryIndexedLoad(SDNode *N) { LoadSDNode *LD = cast(N); if (LD->isUnindexed()) return false; EVT VT = LD->getMemoryVT(); EVT DstVT = N->getValueType(0); ISD::MemIndexedMode AM = LD->getAddressingMode(); bool IsPre = AM == ISD::PRE_INC || AM == ISD::PRE_DEC; // We're not doing validity checking here. That was done when checking // if we should mark the load as indexed or not. We're just selecting // the right instruction. unsigned Opcode = 0; ISD::LoadExtType ExtType = LD->getExtensionType(); bool InsertTo64 = false; if (VT == MVT::i64) Opcode = IsPre ? AArch64::LDRXpre : AArch64::LDRXpost; else if (VT == MVT::i32) { if (ExtType == ISD::NON_EXTLOAD) Opcode = IsPre ? AArch64::LDRWpre : AArch64::LDRWpost; else if (ExtType == ISD::SEXTLOAD) Opcode = IsPre ? AArch64::LDRSWpre : AArch64::LDRSWpost; else { Opcode = IsPre ? AArch64::LDRWpre : AArch64::LDRWpost; InsertTo64 = true; // The result of the load is only i32. It's the subreg_to_reg that makes // it into an i64. DstVT = MVT::i32; } } else if (VT == MVT::i16) { if (ExtType == ISD::SEXTLOAD) { if (DstVT == MVT::i64) Opcode = IsPre ? AArch64::LDRSHXpre : AArch64::LDRSHXpost; else Opcode = IsPre ? AArch64::LDRSHWpre : AArch64::LDRSHWpost; } else { Opcode = IsPre ? AArch64::LDRHHpre : AArch64::LDRHHpost; InsertTo64 = DstVT == MVT::i64; // The result of the load is only i32. It's the subreg_to_reg that makes // it into an i64. DstVT = MVT::i32; } } else if (VT == MVT::i8) { if (ExtType == ISD::SEXTLOAD) { if (DstVT == MVT::i64) Opcode = IsPre ? AArch64::LDRSBXpre : AArch64::LDRSBXpost; else Opcode = IsPre ? AArch64::LDRSBWpre : AArch64::LDRSBWpost; } else { Opcode = IsPre ? AArch64::LDRBBpre : AArch64::LDRBBpost; InsertTo64 = DstVT == MVT::i64; // The result of the load is only i32. It's the subreg_to_reg that makes // it into an i64. DstVT = MVT::i32; } } else if (VT == MVT::f16) { Opcode = IsPre ? AArch64::LDRHpre : AArch64::LDRHpost; } else if (VT == MVT::bf16) { Opcode = IsPre ? AArch64::LDRHpre : AArch64::LDRHpost; } else if (VT == MVT::f32) { Opcode = IsPre ? AArch64::LDRSpre : AArch64::LDRSpost; } else if (VT == MVT::f64 || VT.is64BitVector()) { Opcode = IsPre ? AArch64::LDRDpre : AArch64::LDRDpost; } else if (VT.is128BitVector()) { Opcode = IsPre ? AArch64::LDRQpre : AArch64::LDRQpost; } else return false; SDValue Chain = LD->getChain(); SDValue Base = LD->getBasePtr(); ConstantSDNode *OffsetOp = cast(LD->getOffset()); int OffsetVal = (int)OffsetOp->getZExtValue(); SDLoc dl(N); SDValue Offset = CurDAG->getTargetConstant(OffsetVal, dl, MVT::i64); SDValue Ops[] = { Base, Offset, Chain }; SDNode *Res = CurDAG->getMachineNode(Opcode, dl, MVT::i64, DstVT, MVT::Other, Ops); // Either way, we're replacing the node, so tell the caller that. SDValue LoadedVal = SDValue(Res, 1); if (InsertTo64) { SDValue SubReg = CurDAG->getTargetConstant(AArch64::sub_32, dl, MVT::i32); LoadedVal = SDValue(CurDAG->getMachineNode( AArch64::SUBREG_TO_REG, dl, MVT::i64, CurDAG->getTargetConstant(0, dl, MVT::i64), LoadedVal, SubReg), 0); } ReplaceUses(SDValue(N, 0), LoadedVal); ReplaceUses(SDValue(N, 1), SDValue(Res, 0)); ReplaceUses(SDValue(N, 2), SDValue(Res, 2)); CurDAG->RemoveDeadNode(N); return true; } void AArch64DAGToDAGISel::SelectLoad(SDNode *N, unsigned NumVecs, unsigned Opc, unsigned SubRegIdx) { SDLoc dl(N); EVT VT = N->getValueType(0); SDValue Chain = N->getOperand(0); SDValue Ops[] = {N->getOperand(2), // Mem operand; Chain}; const EVT ResTys[] = {MVT::Untyped, MVT::Other}; SDNode *Ld = CurDAG->getMachineNode(Opc, dl, ResTys, Ops); SDValue SuperReg = SDValue(Ld, 0); for (unsigned i = 0; i < NumVecs; ++i) ReplaceUses(SDValue(N, i), CurDAG->getTargetExtractSubreg(SubRegIdx + i, dl, VT, SuperReg)); ReplaceUses(SDValue(N, NumVecs), SDValue(Ld, 1)); // Transfer memoperands. MachineMemOperand *MemOp = cast(N)->getMemOperand(); CurDAG->setNodeMemRefs(cast(Ld), {MemOp}); CurDAG->RemoveDeadNode(N); } void AArch64DAGToDAGISel::SelectPostLoad(SDNode *N, unsigned NumVecs, unsigned Opc, unsigned SubRegIdx) { SDLoc dl(N); EVT VT = N->getValueType(0); SDValue Chain = N->getOperand(0); SDValue Ops[] = {N->getOperand(1), // Mem operand N->getOperand(2), // Incremental Chain}; const EVT ResTys[] = {MVT::i64, // Type of the write back register MVT::Untyped, MVT::Other}; SDNode *Ld = CurDAG->getMachineNode(Opc, dl, ResTys, Ops); // Update uses of write back register ReplaceUses(SDValue(N, NumVecs), SDValue(Ld, 0)); // Update uses of vector list SDValue SuperReg = SDValue(Ld, 1); if (NumVecs == 1) ReplaceUses(SDValue(N, 0), SuperReg); else for (unsigned i = 0; i < NumVecs; ++i) ReplaceUses(SDValue(N, i), CurDAG->getTargetExtractSubreg(SubRegIdx + i, dl, VT, SuperReg)); // Update the chain ReplaceUses(SDValue(N, NumVecs + 1), SDValue(Ld, 2)); CurDAG->RemoveDeadNode(N); } /// Optimize \param OldBase and \param OldOffset selecting the best addressing /// mode. Returns a tuple consisting of an Opcode, an SDValue representing the /// new Base and an SDValue representing the new offset. std::tuple AArch64DAGToDAGISel::findAddrModeSVELoadStore(SDNode *N, unsigned Opc_rr, unsigned Opc_ri, const SDValue &OldBase, const SDValue &OldOffset, unsigned Scale) { SDValue NewBase = OldBase; SDValue NewOffset = OldOffset; // Detect a possible Reg+Imm addressing mode. const bool IsRegImm = SelectAddrModeIndexedSVE( N, OldBase, NewBase, NewOffset); // Detect a possible reg+reg addressing mode, but only if we haven't already // detected a Reg+Imm one. const bool IsRegReg = !IsRegImm && SelectSVERegRegAddrMode(OldBase, Scale, NewBase, NewOffset); // Select the instruction. return std::make_tuple(IsRegReg ? Opc_rr : Opc_ri, NewBase, NewOffset); } void AArch64DAGToDAGISel::SelectPredicatedLoad(SDNode *N, unsigned NumVecs, unsigned Scale, unsigned Opc_ri, unsigned Opc_rr) { assert(Scale < 4 && "Invalid scaling value."); SDLoc DL(N); EVT VT = N->getValueType(0); SDValue Chain = N->getOperand(0); // Optimize addressing mode. SDValue Base, Offset; unsigned Opc; std::tie(Opc, Base, Offset) = findAddrModeSVELoadStore( N, Opc_rr, Opc_ri, N->getOperand(2), CurDAG->getTargetConstant(0, DL, MVT::i64), Scale); SDValue Ops[] = {N->getOperand(1), // Predicate Base, // Memory operand Offset, Chain}; const EVT ResTys[] = {MVT::Untyped, MVT::Other}; SDNode *Load = CurDAG->getMachineNode(Opc, DL, ResTys, Ops); SDValue SuperReg = SDValue(Load, 0); for (unsigned i = 0; i < NumVecs; ++i) ReplaceUses(SDValue(N, i), CurDAG->getTargetExtractSubreg( AArch64::zsub0 + i, DL, VT, SuperReg)); // Copy chain unsigned ChainIdx = NumVecs; ReplaceUses(SDValue(N, ChainIdx), SDValue(Load, 1)); CurDAG->RemoveDeadNode(N); } void AArch64DAGToDAGISel::SelectStore(SDNode *N, unsigned NumVecs, unsigned Opc) { SDLoc dl(N); EVT VT = N->getOperand(2)->getValueType(0); // Form a REG_SEQUENCE to force register allocation. bool Is128Bit = VT.getSizeInBits() == 128; SmallVector Regs(N->op_begin() + 2, N->op_begin() + 2 + NumVecs); SDValue RegSeq = Is128Bit ? createQTuple(Regs) : createDTuple(Regs); SDValue Ops[] = {RegSeq, N->getOperand(NumVecs + 2), N->getOperand(0)}; SDNode *St = CurDAG->getMachineNode(Opc, dl, N->getValueType(0), Ops); // Transfer memoperands. MachineMemOperand *MemOp = cast(N)->getMemOperand(); CurDAG->setNodeMemRefs(cast(St), {MemOp}); ReplaceNode(N, St); } void AArch64DAGToDAGISel::SelectPredicatedStore(SDNode *N, unsigned NumVecs, unsigned Scale, unsigned Opc_rr, unsigned Opc_ri) { SDLoc dl(N); // Form a REG_SEQUENCE to force register allocation. SmallVector Regs(N->op_begin() + 2, N->op_begin() + 2 + NumVecs); SDValue RegSeq = createZTuple(Regs); // Optimize addressing mode. unsigned Opc; SDValue Offset, Base; std::tie(Opc, Base, Offset) = findAddrModeSVELoadStore( N, Opc_rr, Opc_ri, N->getOperand(NumVecs + 3), CurDAG->getTargetConstant(0, dl, MVT::i64), Scale); SDValue Ops[] = {RegSeq, N->getOperand(NumVecs + 2), // predicate Base, // address Offset, // offset N->getOperand(0)}; // chain SDNode *St = CurDAG->getMachineNode(Opc, dl, N->getValueType(0), Ops); ReplaceNode(N, St); } bool AArch64DAGToDAGISel::SelectAddrModeFrameIndexSVE(SDValue N, SDValue &Base, SDValue &OffImm) { SDLoc dl(N); const DataLayout &DL = CurDAG->getDataLayout(); const TargetLowering *TLI = getTargetLowering(); // Try to match it for the frame address if (auto FINode = dyn_cast(N)) { int FI = FINode->getIndex(); Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL)); OffImm = CurDAG->getTargetConstant(0, dl, MVT::i64); return true; } return false; } void AArch64DAGToDAGISel::SelectPostStore(SDNode *N, unsigned NumVecs, unsigned Opc) { SDLoc dl(N); EVT VT = N->getOperand(2)->getValueType(0); const EVT ResTys[] = {MVT::i64, // Type of the write back register MVT::Other}; // Type for the Chain // Form a REG_SEQUENCE to force register allocation. bool Is128Bit = VT.getSizeInBits() == 128; SmallVector Regs(N->op_begin() + 1, N->op_begin() + 1 + NumVecs); SDValue RegSeq = Is128Bit ? createQTuple(Regs) : createDTuple(Regs); SDValue Ops[] = {RegSeq, N->getOperand(NumVecs + 1), // base register N->getOperand(NumVecs + 2), // Incremental N->getOperand(0)}; // Chain SDNode *St = CurDAG->getMachineNode(Opc, dl, ResTys, Ops); ReplaceNode(N, St); } namespace { /// WidenVector - Given a value in the V64 register class, produce the /// equivalent value in the V128 register class. class WidenVector { SelectionDAG &DAG; public: WidenVector(SelectionDAG &DAG) : DAG(DAG) {} SDValue operator()(SDValue V64Reg) { EVT VT = V64Reg.getValueType(); unsigned NarrowSize = VT.getVectorNumElements(); MVT EltTy = VT.getVectorElementType().getSimpleVT(); MVT WideTy = MVT::getVectorVT(EltTy, 2 * NarrowSize); SDLoc DL(V64Reg); SDValue Undef = SDValue(DAG.getMachineNode(TargetOpcode::IMPLICIT_DEF, DL, WideTy), 0); return DAG.getTargetInsertSubreg(AArch64::dsub, DL, WideTy, Undef, V64Reg); } }; } // namespace /// NarrowVector - Given a value in the V128 register class, produce the /// equivalent value in the V64 register class. static SDValue NarrowVector(SDValue V128Reg, SelectionDAG &DAG) { EVT VT = V128Reg.getValueType(); unsigned WideSize = VT.getVectorNumElements(); MVT EltTy = VT.getVectorElementType().getSimpleVT(); MVT NarrowTy = MVT::getVectorVT(EltTy, WideSize / 2); return DAG.getTargetExtractSubreg(AArch64::dsub, SDLoc(V128Reg), NarrowTy, V128Reg); } void AArch64DAGToDAGISel::SelectLoadLane(SDNode *N, unsigned NumVecs, unsigned Opc) { SDLoc dl(N); EVT VT = N->getValueType(0); bool Narrow = VT.getSizeInBits() == 64; // Form a REG_SEQUENCE to force register allocation. SmallVector Regs(N->op_begin() + 2, N->op_begin() + 2 + NumVecs); if (Narrow) transform(Regs, Regs.begin(), WidenVector(*CurDAG)); SDValue RegSeq = createQTuple(Regs); const EVT ResTys[] = {MVT::Untyped, MVT::Other}; unsigned LaneNo = cast(N->getOperand(NumVecs + 2))->getZExtValue(); SDValue Ops[] = {RegSeq, CurDAG->getTargetConstant(LaneNo, dl, MVT::i64), N->getOperand(NumVecs + 3), N->getOperand(0)}; SDNode *Ld = CurDAG->getMachineNode(Opc, dl, ResTys, Ops); SDValue SuperReg = SDValue(Ld, 0); EVT WideVT = RegSeq.getOperand(1)->getValueType(0); static const unsigned QSubs[] = { AArch64::qsub0, AArch64::qsub1, AArch64::qsub2, AArch64::qsub3 }; for (unsigned i = 0; i < NumVecs; ++i) { SDValue NV = CurDAG->getTargetExtractSubreg(QSubs[i], dl, WideVT, SuperReg); if (Narrow) NV = NarrowVector(NV, *CurDAG); ReplaceUses(SDValue(N, i), NV); } ReplaceUses(SDValue(N, NumVecs), SDValue(Ld, 1)); CurDAG->RemoveDeadNode(N); } void AArch64DAGToDAGISel::SelectPostLoadLane(SDNode *N, unsigned NumVecs, unsigned Opc) { SDLoc dl(N); EVT VT = N->getValueType(0); bool Narrow = VT.getSizeInBits() == 64; // Form a REG_SEQUENCE to force register allocation. SmallVector Regs(N->op_begin() + 1, N->op_begin() + 1 + NumVecs); if (Narrow) transform(Regs, Regs.begin(), WidenVector(*CurDAG)); SDValue RegSeq = createQTuple(Regs); const EVT ResTys[] = {MVT::i64, // Type of the write back register RegSeq->getValueType(0), MVT::Other}; unsigned LaneNo = cast(N->getOperand(NumVecs + 1))->getZExtValue(); SDValue Ops[] = {RegSeq, CurDAG->getTargetConstant(LaneNo, dl, MVT::i64), // Lane Number N->getOperand(NumVecs + 2), // Base register N->getOperand(NumVecs + 3), // Incremental N->getOperand(0)}; SDNode *Ld = CurDAG->getMachineNode(Opc, dl, ResTys, Ops); // Update uses of the write back register ReplaceUses(SDValue(N, NumVecs), SDValue(Ld, 0)); // Update uses of the vector list SDValue SuperReg = SDValue(Ld, 1); if (NumVecs == 1) { ReplaceUses(SDValue(N, 0), Narrow ? NarrowVector(SuperReg, *CurDAG) : SuperReg); } else { EVT WideVT = RegSeq.getOperand(1)->getValueType(0); static const unsigned QSubs[] = { AArch64::qsub0, AArch64::qsub1, AArch64::qsub2, AArch64::qsub3 }; for (unsigned i = 0; i < NumVecs; ++i) { SDValue NV = CurDAG->getTargetExtractSubreg(QSubs[i], dl, WideVT, SuperReg); if (Narrow) NV = NarrowVector(NV, *CurDAG); ReplaceUses(SDValue(N, i), NV); } } // Update the Chain ReplaceUses(SDValue(N, NumVecs + 1), SDValue(Ld, 2)); CurDAG->RemoveDeadNode(N); } void AArch64DAGToDAGISel::SelectStoreLane(SDNode *N, unsigned NumVecs, unsigned Opc) { SDLoc dl(N); EVT VT = N->getOperand(2)->getValueType(0); bool Narrow = VT.getSizeInBits() == 64; // Form a REG_SEQUENCE to force register allocation. SmallVector Regs(N->op_begin() + 2, N->op_begin() + 2 + NumVecs); if (Narrow) transform(Regs, Regs.begin(), WidenVector(*CurDAG)); SDValue RegSeq = createQTuple(Regs); unsigned LaneNo = cast(N->getOperand(NumVecs + 2))->getZExtValue(); SDValue Ops[] = {RegSeq, CurDAG->getTargetConstant(LaneNo, dl, MVT::i64), N->getOperand(NumVecs + 3), N->getOperand(0)}; SDNode *St = CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops); // Transfer memoperands. MachineMemOperand *MemOp = cast(N)->getMemOperand(); CurDAG->setNodeMemRefs(cast(St), {MemOp}); ReplaceNode(N, St); } void AArch64DAGToDAGISel::SelectPostStoreLane(SDNode *N, unsigned NumVecs, unsigned Opc) { SDLoc dl(N); EVT VT = N->getOperand(2)->getValueType(0); bool Narrow = VT.getSizeInBits() == 64; // Form a REG_SEQUENCE to force register allocation. SmallVector Regs(N->op_begin() + 1, N->op_begin() + 1 + NumVecs); if (Narrow) transform(Regs, Regs.begin(), WidenVector(*CurDAG)); SDValue RegSeq = createQTuple(Regs); const EVT ResTys[] = {MVT::i64, // Type of the write back register MVT::Other}; unsigned LaneNo = cast(N->getOperand(NumVecs + 1))->getZExtValue(); SDValue Ops[] = {RegSeq, CurDAG->getTargetConstant(LaneNo, dl, MVT::i64), N->getOperand(NumVecs + 2), // Base Register N->getOperand(NumVecs + 3), // Incremental N->getOperand(0)}; SDNode *St = CurDAG->getMachineNode(Opc, dl, ResTys, Ops); // Transfer memoperands. MachineMemOperand *MemOp = cast(N)->getMemOperand(); CurDAG->setNodeMemRefs(cast(St), {MemOp}); ReplaceNode(N, St); } static bool isBitfieldExtractOpFromAnd(SelectionDAG *CurDAG, SDNode *N, unsigned &Opc, SDValue &Opd0, unsigned &LSB, unsigned &MSB, unsigned NumberOfIgnoredLowBits, bool BiggerPattern) { assert(N->getOpcode() == ISD::AND && "N must be a AND operation to call this function"); EVT VT = N->getValueType(0); // Here we can test the type of VT and return false when the type does not // match, but since it is done prior to that call in the current context // we turned that into an assert to avoid redundant code. assert((VT == MVT::i32 || VT == MVT::i64) && "Type checking must have been done before calling this function"); // FIXME: simplify-demanded-bits in DAGCombine will probably have // changed the AND node to a 32-bit mask operation. We'll have to // undo that as part of the transform here if we want to catch all // the opportunities. // Currently the NumberOfIgnoredLowBits argument helps to recover // form these situations when matching bigger pattern (bitfield insert). // For unsigned extracts, check for a shift right and mask uint64_t AndImm = 0; if (!isOpcWithIntImmediate(N, ISD::AND, AndImm)) return false; const SDNode *Op0 = N->getOperand(0).getNode(); // Because of simplify-demanded-bits in DAGCombine, the mask may have been // simplified. Try to undo that AndImm |= maskTrailingOnes(NumberOfIgnoredLowBits); // The immediate is a mask of the low bits iff imm & (imm+1) == 0 if (AndImm & (AndImm + 1)) return false; bool ClampMSB = false; uint64_t SrlImm = 0; // Handle the SRL + ANY_EXTEND case. if (VT == MVT::i64 && Op0->getOpcode() == ISD::ANY_EXTEND && isOpcWithIntImmediate(Op0->getOperand(0).getNode(), ISD::SRL, SrlImm)) { // Extend the incoming operand of the SRL to 64-bit. Opd0 = Widen(CurDAG, Op0->getOperand(0).getOperand(0)); // Make sure to clamp the MSB so that we preserve the semantics of the // original operations. ClampMSB = true; } else if (VT == MVT::i32 && Op0->getOpcode() == ISD::TRUNCATE && isOpcWithIntImmediate(Op0->getOperand(0).getNode(), ISD::SRL, SrlImm)) { // If the shift result was truncated, we can still combine them. Opd0 = Op0->getOperand(0).getOperand(0); // Use the type of SRL node. VT = Opd0->getValueType(0); } else if (isOpcWithIntImmediate(Op0, ISD::SRL, SrlImm)) { Opd0 = Op0->getOperand(0); } else if (BiggerPattern) { // Let's pretend a 0 shift right has been performed. // The resulting code will be at least as good as the original one // plus it may expose more opportunities for bitfield insert pattern. // FIXME: Currently we limit this to the bigger pattern, because // some optimizations expect AND and not UBFM. Opd0 = N->getOperand(0); } else return false; // Bail out on large immediates. This happens when no proper // combining/constant folding was performed. if (!BiggerPattern && (SrlImm <= 0 || SrlImm >= VT.getSizeInBits())) { LLVM_DEBUG( (dbgs() << N << ": Found large shift immediate, this should not happen\n")); return false; } LSB = SrlImm; MSB = SrlImm + (VT == MVT::i32 ? countTrailingOnes(AndImm) : countTrailingOnes(AndImm)) - 1; if (ClampMSB) // Since we're moving the extend before the right shift operation, we need // to clamp the MSB to make sure we don't shift in undefined bits instead of // the zeros which would get shifted in with the original right shift // operation. MSB = MSB > 31 ? 31 : MSB; Opc = VT == MVT::i32 ? AArch64::UBFMWri : AArch64::UBFMXri; return true; } static bool isBitfieldExtractOpFromSExtInReg(SDNode *N, unsigned &Opc, SDValue &Opd0, unsigned &Immr, unsigned &Imms) { assert(N->getOpcode() == ISD::SIGN_EXTEND_INREG); EVT VT = N->getValueType(0); unsigned BitWidth = VT.getSizeInBits(); assert((VT == MVT::i32 || VT == MVT::i64) && "Type checking must have been done before calling this function"); SDValue Op = N->getOperand(0); if (Op->getOpcode() == ISD::TRUNCATE) { Op = Op->getOperand(0); VT = Op->getValueType(0); BitWidth = VT.getSizeInBits(); } uint64_t ShiftImm; if (!isOpcWithIntImmediate(Op.getNode(), ISD::SRL, ShiftImm) && !isOpcWithIntImmediate(Op.getNode(), ISD::SRA, ShiftImm)) return false; unsigned Width = cast(N->getOperand(1))->getVT().getSizeInBits(); if (ShiftImm + Width > BitWidth) return false; Opc = (VT == MVT::i32) ? AArch64::SBFMWri : AArch64::SBFMXri; Opd0 = Op.getOperand(0); Immr = ShiftImm; Imms = ShiftImm + Width - 1; return true; } static bool isSeveralBitsExtractOpFromShr(SDNode *N, unsigned &Opc, SDValue &Opd0, unsigned &LSB, unsigned &MSB) { // We are looking for the following pattern which basically extracts several // continuous bits from the source value and places it from the LSB of the // destination value, all other bits of the destination value or set to zero: // // Value2 = AND Value, MaskImm // SRL Value2, ShiftImm // // with MaskImm >> ShiftImm to search for the bit width. // // This gets selected into a single UBFM: // // UBFM Value, ShiftImm, BitWide + SrlImm -1 // if (N->getOpcode() != ISD::SRL) return false; uint64_t AndMask = 0; if (!isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::AND, AndMask)) return false; Opd0 = N->getOperand(0).getOperand(0); uint64_t SrlImm = 0; if (!isIntImmediate(N->getOperand(1), SrlImm)) return false; // Check whether we really have several bits extract here. unsigned BitWide = 64 - countLeadingOnes(~(AndMask >> SrlImm)); if (BitWide && isMask_64(AndMask >> SrlImm)) { if (N->getValueType(0) == MVT::i32) Opc = AArch64::UBFMWri; else Opc = AArch64::UBFMXri; LSB = SrlImm; MSB = BitWide + SrlImm - 1; return true; } return false; } static bool isBitfieldExtractOpFromShr(SDNode *N, unsigned &Opc, SDValue &Opd0, unsigned &Immr, unsigned &Imms, bool BiggerPattern) { assert((N->getOpcode() == ISD::SRA || N->getOpcode() == ISD::SRL) && "N must be a SHR/SRA operation to call this function"); EVT VT = N->getValueType(0); // Here we can test the type of VT and return false when the type does not // match, but since it is done prior to that call in the current context // we turned that into an assert to avoid redundant code. assert((VT == MVT::i32 || VT == MVT::i64) && "Type checking must have been done before calling this function"); // Check for AND + SRL doing several bits extract. if (isSeveralBitsExtractOpFromShr(N, Opc, Opd0, Immr, Imms)) return true; // We're looking for a shift of a shift. uint64_t ShlImm = 0; uint64_t TruncBits = 0; if (isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::SHL, ShlImm)) { Opd0 = N->getOperand(0).getOperand(0); } else if (VT == MVT::i32 && N->getOpcode() == ISD::SRL && N->getOperand(0).getNode()->getOpcode() == ISD::TRUNCATE) { // We are looking for a shift of truncate. Truncate from i64 to i32 could // be considered as setting high 32 bits as zero. Our strategy here is to // always generate 64bit UBFM. This consistency will help the CSE pass // later find more redundancy. Opd0 = N->getOperand(0).getOperand(0); TruncBits = Opd0->getValueType(0).getSizeInBits() - VT.getSizeInBits(); VT = Opd0.getValueType(); assert(VT == MVT::i64 && "the promoted type should be i64"); } else if (BiggerPattern) { // Let's pretend a 0 shift left has been performed. // FIXME: Currently we limit this to the bigger pattern case, // because some optimizations expect AND and not UBFM Opd0 = N->getOperand(0); } else return false; // Missing combines/constant folding may have left us with strange // constants. if (ShlImm >= VT.getSizeInBits()) { LLVM_DEBUG( (dbgs() << N << ": Found large shift immediate, this should not happen\n")); return false; } uint64_t SrlImm = 0; if (!isIntImmediate(N->getOperand(1), SrlImm)) return false; assert(SrlImm > 0 && SrlImm < VT.getSizeInBits() && "bad amount in shift node!"); int immr = SrlImm - ShlImm; Immr = immr < 0 ? immr + VT.getSizeInBits() : immr; Imms = VT.getSizeInBits() - ShlImm - TruncBits - 1; // SRA requires a signed extraction if (VT == MVT::i32) Opc = N->getOpcode() == ISD::SRA ? AArch64::SBFMWri : AArch64::UBFMWri; else Opc = N->getOpcode() == ISD::SRA ? AArch64::SBFMXri : AArch64::UBFMXri; return true; } bool AArch64DAGToDAGISel::tryBitfieldExtractOpFromSExt(SDNode *N) { assert(N->getOpcode() == ISD::SIGN_EXTEND); EVT VT = N->getValueType(0); EVT NarrowVT = N->getOperand(0)->getValueType(0); if (VT != MVT::i64 || NarrowVT != MVT::i32) return false; uint64_t ShiftImm; SDValue Op = N->getOperand(0); if (!isOpcWithIntImmediate(Op.getNode(), ISD::SRA, ShiftImm)) return false; SDLoc dl(N); // Extend the incoming operand of the shift to 64-bits. SDValue Opd0 = Widen(CurDAG, Op.getOperand(0)); unsigned Immr = ShiftImm; unsigned Imms = NarrowVT.getSizeInBits() - 1; SDValue Ops[] = {Opd0, CurDAG->getTargetConstant(Immr, dl, VT), CurDAG->getTargetConstant(Imms, dl, VT)}; CurDAG->SelectNodeTo(N, AArch64::SBFMXri, VT, Ops); return true; } /// Try to form fcvtl2 instructions from a floating-point extend of a high-half /// extract of a subvector. bool AArch64DAGToDAGISel::tryHighFPExt(SDNode *N) { assert(N->getOpcode() == ISD::FP_EXTEND); // There are 2 forms of fcvtl2 - extend to double or extend to float. SDValue Extract = N->getOperand(0); EVT VT = N->getValueType(0); EVT NarrowVT = Extract.getValueType(); if ((VT != MVT::v2f64 || NarrowVT != MVT::v2f32) && (VT != MVT::v4f32 || NarrowVT != MVT::v4f16)) return false; // Optionally look past a bitcast. Extract = peekThroughBitcasts(Extract); if (Extract.getOpcode() != ISD::EXTRACT_SUBVECTOR) return false; // Match extract from start of high half index. // Example: v8i16 -> v4i16 means the extract must begin at index 4. unsigned ExtractIndex = Extract.getConstantOperandVal(1); if (ExtractIndex != Extract.getValueType().getVectorNumElements()) return false; auto Opcode = VT == MVT::v2f64 ? AArch64::FCVTLv4i32 : AArch64::FCVTLv8i16; CurDAG->SelectNodeTo(N, Opcode, VT, Extract.getOperand(0)); return true; } static bool isBitfieldExtractOp(SelectionDAG *CurDAG, SDNode *N, unsigned &Opc, SDValue &Opd0, unsigned &Immr, unsigned &Imms, unsigned NumberOfIgnoredLowBits = 0, bool BiggerPattern = false) { if (N->getValueType(0) != MVT::i32 && N->getValueType(0) != MVT::i64) return false; switch (N->getOpcode()) { default: if (!N->isMachineOpcode()) return false; break; case ISD::AND: return isBitfieldExtractOpFromAnd(CurDAG, N, Opc, Opd0, Immr, Imms, NumberOfIgnoredLowBits, BiggerPattern); case ISD::SRL: case ISD::SRA: return isBitfieldExtractOpFromShr(N, Opc, Opd0, Immr, Imms, BiggerPattern); case ISD::SIGN_EXTEND_INREG: return isBitfieldExtractOpFromSExtInReg(N, Opc, Opd0, Immr, Imms); } unsigned NOpc = N->getMachineOpcode(); switch (NOpc) { default: return false; case AArch64::SBFMWri: case AArch64::UBFMWri: case AArch64::SBFMXri: case AArch64::UBFMXri: Opc = NOpc; Opd0 = N->getOperand(0); Immr = cast(N->getOperand(1).getNode())->getZExtValue(); Imms = cast(N->getOperand(2).getNode())->getZExtValue(); return true; } // Unreachable return false; } bool AArch64DAGToDAGISel::tryBitfieldExtractOp(SDNode *N) { unsigned Opc, Immr, Imms; SDValue Opd0; if (!isBitfieldExtractOp(CurDAG, N, Opc, Opd0, Immr, Imms)) return false; EVT VT = N->getValueType(0); SDLoc dl(N); // If the bit extract operation is 64bit but the original type is 32bit, we // need to add one EXTRACT_SUBREG. if ((Opc == AArch64::SBFMXri || Opc == AArch64::UBFMXri) && VT == MVT::i32) { SDValue Ops64[] = {Opd0, CurDAG->getTargetConstant(Immr, dl, MVT::i64), CurDAG->getTargetConstant(Imms, dl, MVT::i64)}; SDNode *BFM = CurDAG->getMachineNode(Opc, dl, MVT::i64, Ops64); SDValue SubReg = CurDAG->getTargetConstant(AArch64::sub_32, dl, MVT::i32); ReplaceNode(N, CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG, dl, MVT::i32, SDValue(BFM, 0), SubReg)); return true; } SDValue Ops[] = {Opd0, CurDAG->getTargetConstant(Immr, dl, VT), CurDAG->getTargetConstant(Imms, dl, VT)}; CurDAG->SelectNodeTo(N, Opc, VT, Ops); return true; } /// Does DstMask form a complementary pair with the mask provided by /// BitsToBeInserted, suitable for use in a BFI instruction. Roughly speaking, /// this asks whether DstMask zeroes precisely those bits that will be set by /// the other half. static bool isBitfieldDstMask(uint64_t DstMask, const APInt &BitsToBeInserted, unsigned NumberOfIgnoredHighBits, EVT VT) { assert((VT == MVT::i32 || VT == MVT::i64) && "i32 or i64 mask type expected!"); unsigned BitWidth = VT.getSizeInBits() - NumberOfIgnoredHighBits; APInt SignificantDstMask = APInt(BitWidth, DstMask); APInt SignificantBitsToBeInserted = BitsToBeInserted.zextOrTrunc(BitWidth); return (SignificantDstMask & SignificantBitsToBeInserted) == 0 && (SignificantDstMask | SignificantBitsToBeInserted).isAllOnesValue(); } // Look for bits that will be useful for later uses. // A bit is consider useless as soon as it is dropped and never used // before it as been dropped. // E.g., looking for useful bit of x // 1. y = x & 0x7 // 2. z = y >> 2 // After #1, x useful bits are 0x7, then the useful bits of x, live through // y. // After #2, the useful bits of x are 0x4. // However, if x is used on an unpredicatable instruction, then all its bits // are useful. // E.g. // 1. y = x & 0x7 // 2. z = y >> 2 // 3. str x, [@x] static void getUsefulBits(SDValue Op, APInt &UsefulBits, unsigned Depth = 0); static void getUsefulBitsFromAndWithImmediate(SDValue Op, APInt &UsefulBits, unsigned Depth) { uint64_t Imm = cast(Op.getOperand(1).getNode())->getZExtValue(); Imm = AArch64_AM::decodeLogicalImmediate(Imm, UsefulBits.getBitWidth()); UsefulBits &= APInt(UsefulBits.getBitWidth(), Imm); getUsefulBits(Op, UsefulBits, Depth + 1); } static void getUsefulBitsFromBitfieldMoveOpd(SDValue Op, APInt &UsefulBits, uint64_t Imm, uint64_t MSB, unsigned Depth) { // inherit the bitwidth value APInt OpUsefulBits(UsefulBits); OpUsefulBits = 1; if (MSB >= Imm) { OpUsefulBits <<= MSB - Imm + 1; --OpUsefulBits; // The interesting part will be in the lower part of the result getUsefulBits(Op, OpUsefulBits, Depth + 1); // The interesting part was starting at Imm in the argument OpUsefulBits <<= Imm; } else { OpUsefulBits <<= MSB + 1; --OpUsefulBits; // The interesting part will be shifted in the result OpUsefulBits <<= OpUsefulBits.getBitWidth() - Imm; getUsefulBits(Op, OpUsefulBits, Depth + 1); // The interesting part was at zero in the argument OpUsefulBits.lshrInPlace(OpUsefulBits.getBitWidth() - Imm); } UsefulBits &= OpUsefulBits; } static void getUsefulBitsFromUBFM(SDValue Op, APInt &UsefulBits, unsigned Depth) { uint64_t Imm = cast(Op.getOperand(1).getNode())->getZExtValue(); uint64_t MSB = cast(Op.getOperand(2).getNode())->getZExtValue(); getUsefulBitsFromBitfieldMoveOpd(Op, UsefulBits, Imm, MSB, Depth); } static void getUsefulBitsFromOrWithShiftedReg(SDValue Op, APInt &UsefulBits, unsigned Depth) { uint64_t ShiftTypeAndValue = cast(Op.getOperand(2).getNode())->getZExtValue(); APInt Mask(UsefulBits); Mask.clearAllBits(); Mask.flipAllBits(); if (AArch64_AM::getShiftType(ShiftTypeAndValue) == AArch64_AM::LSL) { // Shift Left uint64_t ShiftAmt = AArch64_AM::getShiftValue(ShiftTypeAndValue); Mask <<= ShiftAmt; getUsefulBits(Op, Mask, Depth + 1); Mask.lshrInPlace(ShiftAmt); } else if (AArch64_AM::getShiftType(ShiftTypeAndValue) == AArch64_AM::LSR) { // Shift Right // We do not handle AArch64_AM::ASR, because the sign will change the // number of useful bits uint64_t ShiftAmt = AArch64_AM::getShiftValue(ShiftTypeAndValue); Mask.lshrInPlace(ShiftAmt); getUsefulBits(Op, Mask, Depth + 1); Mask <<= ShiftAmt; } else return; UsefulBits &= Mask; } static void getUsefulBitsFromBFM(SDValue Op, SDValue Orig, APInt &UsefulBits, unsigned Depth) { uint64_t Imm = cast(Op.getOperand(2).getNode())->getZExtValue(); uint64_t MSB = cast(Op.getOperand(3).getNode())->getZExtValue(); APInt OpUsefulBits(UsefulBits); OpUsefulBits = 1; APInt ResultUsefulBits(UsefulBits.getBitWidth(), 0); ResultUsefulBits.flipAllBits(); APInt Mask(UsefulBits.getBitWidth(), 0); getUsefulBits(Op, ResultUsefulBits, Depth + 1); if (MSB >= Imm) { // The instruction is a BFXIL. uint64_t Width = MSB - Imm + 1; uint64_t LSB = Imm; OpUsefulBits <<= Width; --OpUsefulBits; if (Op.getOperand(1) == Orig) { // Copy the low bits from the result to bits starting from LSB. Mask = ResultUsefulBits & OpUsefulBits; Mask <<= LSB; } if (Op.getOperand(0) == Orig) // Bits starting from LSB in the input contribute to the result. Mask |= (ResultUsefulBits & ~OpUsefulBits); } else { // The instruction is a BFI. uint64_t Width = MSB + 1; uint64_t LSB = UsefulBits.getBitWidth() - Imm; OpUsefulBits <<= Width; --OpUsefulBits; OpUsefulBits <<= LSB; if (Op.getOperand(1) == Orig) { // Copy the bits from the result to the zero bits. Mask = ResultUsefulBits & OpUsefulBits; Mask.lshrInPlace(LSB); } if (Op.getOperand(0) == Orig) Mask |= (ResultUsefulBits & ~OpUsefulBits); } UsefulBits &= Mask; } static void getUsefulBitsForUse(SDNode *UserNode, APInt &UsefulBits, SDValue Orig, unsigned Depth) { // Users of this node should have already been instruction selected // FIXME: Can we turn that into an assert? if (!UserNode->isMachineOpcode()) return; switch (UserNode->getMachineOpcode()) { default: return; case AArch64::ANDSWri: case AArch64::ANDSXri: case AArch64::ANDWri: case AArch64::ANDXri: // We increment Depth only when we call the getUsefulBits return getUsefulBitsFromAndWithImmediate(SDValue(UserNode, 0), UsefulBits, Depth); case AArch64::UBFMWri: case AArch64::UBFMXri: return getUsefulBitsFromUBFM(SDValue(UserNode, 0), UsefulBits, Depth); case AArch64::ORRWrs: case AArch64::ORRXrs: if (UserNode->getOperand(1) != Orig) return; return getUsefulBitsFromOrWithShiftedReg(SDValue(UserNode, 0), UsefulBits, Depth); case AArch64::BFMWri: case AArch64::BFMXri: return getUsefulBitsFromBFM(SDValue(UserNode, 0), Orig, UsefulBits, Depth); case AArch64::STRBBui: case AArch64::STURBBi: if (UserNode->getOperand(0) != Orig) return; UsefulBits &= APInt(UsefulBits.getBitWidth(), 0xff); return; case AArch64::STRHHui: case AArch64::STURHHi: if (UserNode->getOperand(0) != Orig) return; UsefulBits &= APInt(UsefulBits.getBitWidth(), 0xffff); return; } } static void getUsefulBits(SDValue Op, APInt &UsefulBits, unsigned Depth) { if (Depth >= SelectionDAG::MaxRecursionDepth) return; // Initialize UsefulBits if (!Depth) { unsigned Bitwidth = Op.getScalarValueSizeInBits(); // At the beginning, assume every produced bits is useful UsefulBits = APInt(Bitwidth, 0); UsefulBits.flipAllBits(); } APInt UsersUsefulBits(UsefulBits.getBitWidth(), 0); for (SDNode *Node : Op.getNode()->uses()) { // A use cannot produce useful bits APInt UsefulBitsForUse = APInt(UsefulBits); getUsefulBitsForUse(Node, UsefulBitsForUse, Op, Depth); UsersUsefulBits |= UsefulBitsForUse; } // UsefulBits contains the produced bits that are meaningful for the // current definition, thus a user cannot make a bit meaningful at // this point UsefulBits &= UsersUsefulBits; } /// Create a machine node performing a notional SHL of Op by ShlAmount. If /// ShlAmount is negative, do a (logical) right-shift instead. If ShlAmount is /// 0, return Op unchanged. static SDValue getLeftShift(SelectionDAG *CurDAG, SDValue Op, int ShlAmount) { if (ShlAmount == 0) return Op; EVT VT = Op.getValueType(); SDLoc dl(Op); unsigned BitWidth = VT.getSizeInBits(); unsigned UBFMOpc = BitWidth == 32 ? AArch64::UBFMWri : AArch64::UBFMXri; SDNode *ShiftNode; if (ShlAmount > 0) { // LSL wD, wN, #Amt == UBFM wD, wN, #32-Amt, #31-Amt ShiftNode = CurDAG->getMachineNode( UBFMOpc, dl, VT, Op, CurDAG->getTargetConstant(BitWidth - ShlAmount, dl, VT), CurDAG->getTargetConstant(BitWidth - 1 - ShlAmount, dl, VT)); } else { // LSR wD, wN, #Amt == UBFM wD, wN, #Amt, #32-1 assert(ShlAmount < 0 && "expected right shift"); int ShrAmount = -ShlAmount; ShiftNode = CurDAG->getMachineNode( UBFMOpc, dl, VT, Op, CurDAG->getTargetConstant(ShrAmount, dl, VT), CurDAG->getTargetConstant(BitWidth - 1, dl, VT)); } return SDValue(ShiftNode, 0); } /// Does this tree qualify as an attempt to move a bitfield into position, /// essentially "(and (shl VAL, N), Mask)". static bool isBitfieldPositioningOp(SelectionDAG *CurDAG, SDValue Op, bool BiggerPattern, SDValue &Src, int &ShiftAmount, int &MaskWidth) { EVT VT = Op.getValueType(); unsigned BitWidth = VT.getSizeInBits(); (void)BitWidth; assert(BitWidth == 32 || BitWidth == 64); KnownBits Known = CurDAG->computeKnownBits(Op); // Non-zero in the sense that they're not provably zero, which is the key // point if we want to use this value uint64_t NonZeroBits = (~Known.Zero).getZExtValue(); // Discard a constant AND mask if present. It's safe because the node will // already have been factored into the computeKnownBits calculation above. uint64_t AndImm; if (isOpcWithIntImmediate(Op.getNode(), ISD::AND, AndImm)) { assert((~APInt(BitWidth, AndImm) & ~Known.Zero) == 0); Op = Op.getOperand(0); } // Don't match if the SHL has more than one use, since then we'll end up // generating SHL+UBFIZ instead of just keeping SHL+AND. if (!BiggerPattern && !Op.hasOneUse()) return false; uint64_t ShlImm; if (!isOpcWithIntImmediate(Op.getNode(), ISD::SHL, ShlImm)) return false; Op = Op.getOperand(0); if (!isShiftedMask_64(NonZeroBits)) return false; ShiftAmount = countTrailingZeros(NonZeroBits); MaskWidth = countTrailingOnes(NonZeroBits >> ShiftAmount); // BFI encompasses sufficiently many nodes that it's worth inserting an extra // LSL/LSR if the mask in NonZeroBits doesn't quite match up with the ISD::SHL // amount. BiggerPattern is true when this pattern is being matched for BFI, // BiggerPattern is false when this pattern is being matched for UBFIZ, in // which case it is not profitable to insert an extra shift. if (ShlImm - ShiftAmount != 0 && !BiggerPattern) return false; Src = getLeftShift(CurDAG, Op, ShlImm - ShiftAmount); return true; } static bool isShiftedMask(uint64_t Mask, EVT VT) { assert(VT == MVT::i32 || VT == MVT::i64); if (VT == MVT::i32) return isShiftedMask_32(Mask); return isShiftedMask_64(Mask); } // Generate a BFI/BFXIL from 'or (and X, MaskImm), OrImm' iff the value being // inserted only sets known zero bits. static bool tryBitfieldInsertOpFromOrAndImm(SDNode *N, SelectionDAG *CurDAG) { assert(N->getOpcode() == ISD::OR && "Expect a OR operation"); EVT VT = N->getValueType(0); if (VT != MVT::i32 && VT != MVT::i64) return false; unsigned BitWidth = VT.getSizeInBits(); uint64_t OrImm; if (!isOpcWithIntImmediate(N, ISD::OR, OrImm)) return false; // Skip this transformation if the ORR immediate can be encoded in the ORR. // Otherwise, we'll trade an AND+ORR for ORR+BFI/BFXIL, which is most likely // performance neutral. if (AArch64_AM::isLogicalImmediate(OrImm, BitWidth)) return false; uint64_t MaskImm; SDValue And = N->getOperand(0); // Must be a single use AND with an immediate operand. if (!And.hasOneUse() || !isOpcWithIntImmediate(And.getNode(), ISD::AND, MaskImm)) return false; // Compute the Known Zero for the AND as this allows us to catch more general // cases than just looking for AND with imm. KnownBits Known = CurDAG->computeKnownBits(And); // Non-zero in the sense that they're not provably zero, which is the key // point if we want to use this value. uint64_t NotKnownZero = (~Known.Zero).getZExtValue(); // The KnownZero mask must be a shifted mask (e.g., 1110..011, 11100..00). if (!isShiftedMask(Known.Zero.getZExtValue(), VT)) return false; // The bits being inserted must only set those bits that are known to be zero. if ((OrImm & NotKnownZero) != 0) { // FIXME: It's okay if the OrImm sets NotKnownZero bits to 1, but we don't // currently handle this case. return false; } // BFI/BFXIL dst, src, #lsb, #width. int LSB = countTrailingOnes(NotKnownZero); int Width = BitWidth - APInt(BitWidth, NotKnownZero).countPopulation(); // BFI/BFXIL is an alias of BFM, so translate to BFM operands. unsigned ImmR = (BitWidth - LSB) % BitWidth; unsigned ImmS = Width - 1; // If we're creating a BFI instruction avoid cases where we need more // instructions to materialize the BFI constant as compared to the original // ORR. A BFXIL will use the same constant as the original ORR, so the code // should be no worse in this case. bool IsBFI = LSB != 0; uint64_t BFIImm = OrImm >> LSB; if (IsBFI && !AArch64_AM::isLogicalImmediate(BFIImm, BitWidth)) { // We have a BFI instruction and we know the constant can't be materialized // with a ORR-immediate with the zero register. unsigned OrChunks = 0, BFIChunks = 0; for (unsigned Shift = 0; Shift < BitWidth; Shift += 16) { if (((OrImm >> Shift) & 0xFFFF) != 0) ++OrChunks; if (((BFIImm >> Shift) & 0xFFFF) != 0) ++BFIChunks; } if (BFIChunks > OrChunks) return false; } // Materialize the constant to be inserted. SDLoc DL(N); unsigned MOVIOpc = VT == MVT::i32 ? AArch64::MOVi32imm : AArch64::MOVi64imm; SDNode *MOVI = CurDAG->getMachineNode( MOVIOpc, DL, VT, CurDAG->getTargetConstant(BFIImm, DL, VT)); // Create the BFI/BFXIL instruction. SDValue Ops[] = {And.getOperand(0), SDValue(MOVI, 0), CurDAG->getTargetConstant(ImmR, DL, VT), CurDAG->getTargetConstant(ImmS, DL, VT)}; unsigned Opc = (VT == MVT::i32) ? AArch64::BFMWri : AArch64::BFMXri; CurDAG->SelectNodeTo(N, Opc, VT, Ops); return true; } static bool tryBitfieldInsertOpFromOr(SDNode *N, const APInt &UsefulBits, SelectionDAG *CurDAG) { assert(N->getOpcode() == ISD::OR && "Expect a OR operation"); EVT VT = N->getValueType(0); if (VT != MVT::i32 && VT != MVT::i64) return false; unsigned BitWidth = VT.getSizeInBits(); // Because of simplify-demanded-bits in DAGCombine, involved masks may not // have the expected shape. Try to undo that. unsigned NumberOfIgnoredLowBits = UsefulBits.countTrailingZeros(); unsigned NumberOfIgnoredHighBits = UsefulBits.countLeadingZeros(); // Given a OR operation, check if we have the following pattern // ubfm c, b, imm, imm2 (or something that does the same jobs, see // isBitfieldExtractOp) // d = e & mask2 ; where mask is a binary sequence of 1..10..0 and // countTrailingZeros(mask2) == imm2 - imm + 1 // f = d | c // if yes, replace the OR instruction with: // f = BFM Opd0, Opd1, LSB, MSB ; where LSB = imm, and MSB = imm2 // OR is commutative, check all combinations of operand order and values of // BiggerPattern, i.e. // Opd0, Opd1, BiggerPattern=false // Opd1, Opd0, BiggerPattern=false // Opd0, Opd1, BiggerPattern=true // Opd1, Opd0, BiggerPattern=true // Several of these combinations may match, so check with BiggerPattern=false // first since that will produce better results by matching more instructions // and/or inserting fewer extra instructions. for (int I = 0; I < 4; ++I) { SDValue Dst, Src; unsigned ImmR, ImmS; bool BiggerPattern = I / 2; SDValue OrOpd0Val = N->getOperand(I % 2); SDNode *OrOpd0 = OrOpd0Val.getNode(); SDValue OrOpd1Val = N->getOperand((I + 1) % 2); SDNode *OrOpd1 = OrOpd1Val.getNode(); unsigned BFXOpc; int DstLSB, Width; if (isBitfieldExtractOp(CurDAG, OrOpd0, BFXOpc, Src, ImmR, ImmS, NumberOfIgnoredLowBits, BiggerPattern)) { // Check that the returned opcode is compatible with the pattern, // i.e., same type and zero extended (U and not S) if ((BFXOpc != AArch64::UBFMXri && VT == MVT::i64) || (BFXOpc != AArch64::UBFMWri && VT == MVT::i32)) continue; // Compute the width of the bitfield insertion DstLSB = 0; Width = ImmS - ImmR + 1; // FIXME: This constraint is to catch bitfield insertion we may // want to widen the pattern if we want to grab general bitfied // move case if (Width <= 0) continue; // If the mask on the insertee is correct, we have a BFXIL operation. We // can share the ImmR and ImmS values from the already-computed UBFM. } else if (isBitfieldPositioningOp(CurDAG, OrOpd0Val, BiggerPattern, Src, DstLSB, Width)) { ImmR = (BitWidth - DstLSB) % BitWidth; ImmS = Width - 1; } else continue; // Check the second part of the pattern EVT VT = OrOpd1Val.getValueType(); assert((VT == MVT::i32 || VT == MVT::i64) && "unexpected OR operand"); // Compute the Known Zero for the candidate of the first operand. // This allows to catch more general case than just looking for // AND with imm. Indeed, simplify-demanded-bits may have removed // the AND instruction because it proves it was useless. KnownBits Known = CurDAG->computeKnownBits(OrOpd1Val); // Check if there is enough room for the second operand to appear // in the first one APInt BitsToBeInserted = APInt::getBitsSet(Known.getBitWidth(), DstLSB, DstLSB + Width); if ((BitsToBeInserted & ~Known.Zero) != 0) continue; // Set the first operand uint64_t Imm; if (isOpcWithIntImmediate(OrOpd1, ISD::AND, Imm) && isBitfieldDstMask(Imm, BitsToBeInserted, NumberOfIgnoredHighBits, VT)) // In that case, we can eliminate the AND Dst = OrOpd1->getOperand(0); else // Maybe the AND has been removed by simplify-demanded-bits // or is useful because it discards more bits Dst = OrOpd1Val; // both parts match SDLoc DL(N); SDValue Ops[] = {Dst, Src, CurDAG->getTargetConstant(ImmR, DL, VT), CurDAG->getTargetConstant(ImmS, DL, VT)}; unsigned Opc = (VT == MVT::i32) ? AArch64::BFMWri : AArch64::BFMXri; CurDAG->SelectNodeTo(N, Opc, VT, Ops); return true; } // Generate a BFXIL from 'or (and X, Mask0Imm), (and Y, Mask1Imm)' iff // Mask0Imm and ~Mask1Imm are equivalent and one of the MaskImms is a shifted // mask (e.g., 0x000ffff0). uint64_t Mask0Imm, Mask1Imm; SDValue And0 = N->getOperand(0); SDValue And1 = N->getOperand(1); if (And0.hasOneUse() && And1.hasOneUse() && isOpcWithIntImmediate(And0.getNode(), ISD::AND, Mask0Imm) && isOpcWithIntImmediate(And1.getNode(), ISD::AND, Mask1Imm) && APInt(BitWidth, Mask0Imm) == ~APInt(BitWidth, Mask1Imm) && (isShiftedMask(Mask0Imm, VT) || isShiftedMask(Mask1Imm, VT))) { // ORR is commutative, so canonicalize to the form 'or (and X, Mask0Imm), // (and Y, Mask1Imm)' where Mask1Imm is the shifted mask masking off the // bits to be inserted. if (isShiftedMask(Mask0Imm, VT)) { std::swap(And0, And1); std::swap(Mask0Imm, Mask1Imm); } SDValue Src = And1->getOperand(0); SDValue Dst = And0->getOperand(0); unsigned LSB = countTrailingZeros(Mask1Imm); int Width = BitWidth - APInt(BitWidth, Mask0Imm).countPopulation(); // The BFXIL inserts the low-order bits from a source register, so right // shift the needed bits into place. SDLoc DL(N); unsigned ShiftOpc = (VT == MVT::i32) ? AArch64::UBFMWri : AArch64::UBFMXri; SDNode *LSR = CurDAG->getMachineNode( ShiftOpc, DL, VT, Src, CurDAG->getTargetConstant(LSB, DL, VT), CurDAG->getTargetConstant(BitWidth - 1, DL, VT)); // BFXIL is an alias of BFM, so translate to BFM operands. unsigned ImmR = (BitWidth - LSB) % BitWidth; unsigned ImmS = Width - 1; // Create the BFXIL instruction. SDValue Ops[] = {Dst, SDValue(LSR, 0), CurDAG->getTargetConstant(ImmR, DL, VT), CurDAG->getTargetConstant(ImmS, DL, VT)}; unsigned Opc = (VT == MVT::i32) ? AArch64::BFMWri : AArch64::BFMXri; CurDAG->SelectNodeTo(N, Opc, VT, Ops); return true; } return false; } bool AArch64DAGToDAGISel::tryBitfieldInsertOp(SDNode *N) { if (N->getOpcode() != ISD::OR) return false; APInt NUsefulBits; getUsefulBits(SDValue(N, 0), NUsefulBits); // If all bits are not useful, just return UNDEF. if (!NUsefulBits) { CurDAG->SelectNodeTo(N, TargetOpcode::IMPLICIT_DEF, N->getValueType(0)); return true; } if (tryBitfieldInsertOpFromOr(N, NUsefulBits, CurDAG)) return true; return tryBitfieldInsertOpFromOrAndImm(N, CurDAG); } /// SelectBitfieldInsertInZeroOp - Match a UBFIZ instruction that is the /// equivalent of a left shift by a constant amount followed by an and masking /// out a contiguous set of bits. bool AArch64DAGToDAGISel::tryBitfieldInsertInZeroOp(SDNode *N) { if (N->getOpcode() != ISD::AND) return false; EVT VT = N->getValueType(0); if (VT != MVT::i32 && VT != MVT::i64) return false; SDValue Op0; int DstLSB, Width; if (!isBitfieldPositioningOp(CurDAG, SDValue(N, 0), /*BiggerPattern=*/false, Op0, DstLSB, Width)) return false; // ImmR is the rotate right amount. unsigned ImmR = (VT.getSizeInBits() - DstLSB) % VT.getSizeInBits(); // ImmS is the most significant bit of the source to be moved. unsigned ImmS = Width - 1; SDLoc DL(N); SDValue Ops[] = {Op0, CurDAG->getTargetConstant(ImmR, DL, VT), CurDAG->getTargetConstant(ImmS, DL, VT)}; unsigned Opc = (VT == MVT::i32) ? AArch64::UBFMWri : AArch64::UBFMXri; CurDAG->SelectNodeTo(N, Opc, VT, Ops); return true; } /// tryShiftAmountMod - Take advantage of built-in mod of shift amount in /// variable shift/rotate instructions. bool AArch64DAGToDAGISel::tryShiftAmountMod(SDNode *N) { EVT VT = N->getValueType(0); unsigned Opc; switch (N->getOpcode()) { case ISD::ROTR: Opc = (VT == MVT::i32) ? AArch64::RORVWr : AArch64::RORVXr; break; case ISD::SHL: Opc = (VT == MVT::i32) ? AArch64::LSLVWr : AArch64::LSLVXr; break; case ISD::SRL: Opc = (VT == MVT::i32) ? AArch64::LSRVWr : AArch64::LSRVXr; break; case ISD::SRA: Opc = (VT == MVT::i32) ? AArch64::ASRVWr : AArch64::ASRVXr; break; default: return false; } uint64_t Size; uint64_t Bits; if (VT == MVT::i32) { Bits = 5; Size = 32; } else if (VT == MVT::i64) { Bits = 6; Size = 64; } else return false; SDValue ShiftAmt = N->getOperand(1); SDLoc DL(N); SDValue NewShiftAmt; // Skip over an extend of the shift amount. if (ShiftAmt->getOpcode() == ISD::ZERO_EXTEND || ShiftAmt->getOpcode() == ISD::ANY_EXTEND) ShiftAmt = ShiftAmt->getOperand(0); if (ShiftAmt->getOpcode() == ISD::ADD || ShiftAmt->getOpcode() == ISD::SUB) { SDValue Add0 = ShiftAmt->getOperand(0); SDValue Add1 = ShiftAmt->getOperand(1); uint64_t Add0Imm; uint64_t Add1Imm; // If we are shifting by X+/-N where N == 0 mod Size, then just shift by X // to avoid the ADD/SUB. if (isIntImmediate(Add1, Add1Imm) && (Add1Imm % Size == 0)) NewShiftAmt = Add0; // If we are shifting by N-X where N == 0 mod Size, then just shift by -X to // generate a NEG instead of a SUB of a constant. else if (ShiftAmt->getOpcode() == ISD::SUB && isIntImmediate(Add0, Add0Imm) && Add0Imm != 0 && (Add0Imm % Size == 0)) { unsigned NegOpc; unsigned ZeroReg; EVT SubVT = ShiftAmt->getValueType(0); if (SubVT == MVT::i32) { NegOpc = AArch64::SUBWrr; ZeroReg = AArch64::WZR; } else { assert(SubVT == MVT::i64); NegOpc = AArch64::SUBXrr; ZeroReg = AArch64::XZR; } SDValue Zero = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), DL, ZeroReg, SubVT); MachineSDNode *Neg = CurDAG->getMachineNode(NegOpc, DL, SubVT, Zero, Add1); NewShiftAmt = SDValue(Neg, 0); } else return false; } else { // If the shift amount is masked with an AND, check that the mask covers the // bits that are implicitly ANDed off by the above opcodes and if so, skip // the AND. uint64_t MaskImm; if (!isOpcWithIntImmediate(ShiftAmt.getNode(), ISD::AND, MaskImm) && !isOpcWithIntImmediate(ShiftAmt.getNode(), AArch64ISD::ANDS, MaskImm)) return false; if (countTrailingOnes(MaskImm) < Bits) return false; NewShiftAmt = ShiftAmt->getOperand(0); } // Narrow/widen the shift amount to match the size of the shift operation. if (VT == MVT::i32) NewShiftAmt = narrowIfNeeded(CurDAG, NewShiftAmt); else if (VT == MVT::i64 && NewShiftAmt->getValueType(0) == MVT::i32) { SDValue SubReg = CurDAG->getTargetConstant(AArch64::sub_32, DL, MVT::i32); MachineSDNode *Ext = CurDAG->getMachineNode( AArch64::SUBREG_TO_REG, DL, VT, CurDAG->getTargetConstant(0, DL, MVT::i64), NewShiftAmt, SubReg); NewShiftAmt = SDValue(Ext, 0); } SDValue Ops[] = {N->getOperand(0), NewShiftAmt}; CurDAG->SelectNodeTo(N, Opc, VT, Ops); return true; } bool AArch64DAGToDAGISel::SelectCVTFixedPosOperand(SDValue N, SDValue &FixedPos, unsigned RegWidth) { APFloat FVal(0.0); if (ConstantFPSDNode *CN = dyn_cast(N)) FVal = CN->getValueAPF(); else if (LoadSDNode *LN = dyn_cast(N)) { // Some otherwise illegal constants are allowed in this case. if (LN->getOperand(1).getOpcode() != AArch64ISD::ADDlow || !isa(LN->getOperand(1)->getOperand(1))) return false; ConstantPoolSDNode *CN = dyn_cast(LN->getOperand(1)->getOperand(1)); FVal = cast(CN->getConstVal())->getValueAPF(); } else return false; // An FCVT[SU] instruction performs: convertToInt(Val * 2^fbits) where fbits // is between 1 and 32 for a destination w-register, or 1 and 64 for an // x-register. // // By this stage, we've detected (fp_to_[su]int (fmul Val, THIS_NODE)) so we // want THIS_NODE to be 2^fbits. This is much easier to deal with using // integers. bool IsExact; // fbits is between 1 and 64 in the worst-case, which means the fmul // could have 2^64 as an actual operand. Need 65 bits of precision. APSInt IntVal(65, true); FVal.convertToInteger(IntVal, APFloat::rmTowardZero, &IsExact); // N.b. isPowerOf2 also checks for > 0. if (!IsExact || !IntVal.isPowerOf2()) return false; unsigned FBits = IntVal.logBase2(); // Checks above should have guaranteed that we haven't lost information in // finding FBits, but it must still be in range. if (FBits == 0 || FBits > RegWidth) return false; FixedPos = CurDAG->getTargetConstant(FBits, SDLoc(N), MVT::i32); return true; } // Inspects a register string of the form o0:op1:CRn:CRm:op2 gets the fields // of the string and obtains the integer values from them and combines these // into a single value to be used in the MRS/MSR instruction. static int getIntOperandFromRegisterString(StringRef RegString) { SmallVector Fields; RegString.split(Fields, ':'); if (Fields.size() == 1) return -1; assert(Fields.size() == 5 && "Invalid number of fields in read register string"); SmallVector Ops; bool AllIntFields = true; for (StringRef Field : Fields) { unsigned IntField; AllIntFields &= !Field.getAsInteger(10, IntField); Ops.push_back(IntField); } assert(AllIntFields && "Unexpected non-integer value in special register string."); // Need to combine the integer fields of the string into a single value // based on the bit encoding of MRS/MSR instruction. return (Ops[0] << 14) | (Ops[1] << 11) | (Ops[2] << 7) | (Ops[3] << 3) | (Ops[4]); } // Lower the read_register intrinsic to an MRS instruction node if the special // register string argument is either of the form detailed in the ALCE (the // form described in getIntOperandsFromRegsterString) or is a named register // known by the MRS SysReg mapper. bool AArch64DAGToDAGISel::tryReadRegister(SDNode *N) { const MDNodeSDNode *MD = dyn_cast(N->getOperand(1)); const MDString *RegString = dyn_cast(MD->getMD()->getOperand(0)); SDLoc DL(N); int Reg = getIntOperandFromRegisterString(RegString->getString()); if (Reg != -1) { ReplaceNode(N, CurDAG->getMachineNode( AArch64::MRS, DL, N->getSimpleValueType(0), MVT::Other, CurDAG->getTargetConstant(Reg, DL, MVT::i32), N->getOperand(0))); return true; } // Use the sysreg mapper to map the remaining possible strings to the // value for the register to be used for the instruction operand. auto TheReg = AArch64SysReg::lookupSysRegByName(RegString->getString()); if (TheReg && TheReg->Readable && TheReg->haveFeatures(Subtarget->getFeatureBits())) Reg = TheReg->Encoding; else Reg = AArch64SysReg::parseGenericRegister(RegString->getString()); if (Reg != -1) { ReplaceNode(N, CurDAG->getMachineNode( AArch64::MRS, DL, N->getSimpleValueType(0), MVT::Other, CurDAG->getTargetConstant(Reg, DL, MVT::i32), N->getOperand(0))); return true; } if (RegString->getString() == "pc") { ReplaceNode(N, CurDAG->getMachineNode( AArch64::ADR, DL, N->getSimpleValueType(0), MVT::Other, CurDAG->getTargetConstant(0, DL, MVT::i32), N->getOperand(0))); return true; } return false; } // Lower the write_register intrinsic to an MSR instruction node if the special // register string argument is either of the form detailed in the ALCE (the // form described in getIntOperandsFromRegsterString) or is a named register // known by the MSR SysReg mapper. bool AArch64DAGToDAGISel::tryWriteRegister(SDNode *N) { const MDNodeSDNode *MD = dyn_cast(N->getOperand(1)); const MDString *RegString = dyn_cast(MD->getMD()->getOperand(0)); SDLoc DL(N); int Reg = getIntOperandFromRegisterString(RegString->getString()); if (Reg != -1) { ReplaceNode( N, CurDAG->getMachineNode(AArch64::MSR, DL, MVT::Other, CurDAG->getTargetConstant(Reg, DL, MVT::i32), N->getOperand(2), N->getOperand(0))); return true; } // Check if the register was one of those allowed as the pstatefield value in // the MSR (immediate) instruction. To accept the values allowed in the // pstatefield for the MSR (immediate) instruction, we also require that an // immediate value has been provided as an argument, we know that this is // the case as it has been ensured by semantic checking. auto PMapper = AArch64PState::lookupPStateByName(RegString->getString()); if (PMapper) { assert (isa(N->getOperand(2)) && "Expected a constant integer expression."); unsigned Reg = PMapper->Encoding; uint64_t Immed = cast(N->getOperand(2))->getZExtValue(); unsigned State; if (Reg == AArch64PState::PAN || Reg == AArch64PState::UAO || Reg == AArch64PState::SSBS) { assert(Immed < 2 && "Bad imm"); State = AArch64::MSRpstateImm1; } else { assert(Immed < 16 && "Bad imm"); State = AArch64::MSRpstateImm4; } ReplaceNode(N, CurDAG->getMachineNode( State, DL, MVT::Other, CurDAG->getTargetConstant(Reg, DL, MVT::i32), CurDAG->getTargetConstant(Immed, DL, MVT::i16), N->getOperand(0))); return true; } // Use the sysreg mapper to attempt to map the remaining possible strings // to the value for the register to be used for the MSR (register) // instruction operand. auto TheReg = AArch64SysReg::lookupSysRegByName(RegString->getString()); if (TheReg && TheReg->Writeable && TheReg->haveFeatures(Subtarget->getFeatureBits())) Reg = TheReg->Encoding; else Reg = AArch64SysReg::parseGenericRegister(RegString->getString()); if (Reg != -1) { ReplaceNode(N, CurDAG->getMachineNode( AArch64::MSR, DL, MVT::Other, CurDAG->getTargetConstant(Reg, DL, MVT::i32), N->getOperand(2), N->getOperand(0))); return true; } return false; } /// We've got special pseudo-instructions for these bool AArch64DAGToDAGISel::SelectCMP_SWAP(SDNode *N) { unsigned Opcode; EVT MemTy = cast(N)->getMemoryVT(); // Leave IR for LSE if subtarget supports it. if (Subtarget->hasLSE()) return false; if (MemTy == MVT::i8) Opcode = AArch64::CMP_SWAP_8; else if (MemTy == MVT::i16) Opcode = AArch64::CMP_SWAP_16; else if (MemTy == MVT::i32) Opcode = AArch64::CMP_SWAP_32; else if (MemTy == MVT::i64) Opcode = AArch64::CMP_SWAP_64; else llvm_unreachable("Unknown AtomicCmpSwap type"); MVT RegTy = MemTy == MVT::i64 ? MVT::i64 : MVT::i32; SDValue Ops[] = {N->getOperand(1), N->getOperand(2), N->getOperand(3), N->getOperand(0)}; SDNode *CmpSwap = CurDAG->getMachineNode( Opcode, SDLoc(N), CurDAG->getVTList(RegTy, MVT::i32, MVT::Other), Ops); MachineMemOperand *MemOp = cast(N)->getMemOperand(); CurDAG->setNodeMemRefs(cast(CmpSwap), {MemOp}); ReplaceUses(SDValue(N, 0), SDValue(CmpSwap, 0)); ReplaceUses(SDValue(N, 1), SDValue(CmpSwap, 2)); CurDAG->RemoveDeadNode(N); return true; } bool AArch64DAGToDAGISel::SelectSVE8BitLslImm(SDValue N, SDValue &Base, SDValue &Offset) { auto C = dyn_cast(N); if (!C) return false; auto Ty = N->getValueType(0); int64_t Imm = C->getSExtValue(); SDLoc DL(N); if ((Imm >= -128) && (Imm <= 127)) { Base = CurDAG->getTargetConstant(Imm, DL, Ty); Offset = CurDAG->getTargetConstant(0, DL, Ty); return true; } if (((Imm % 256) == 0) && (Imm >= -32768) && (Imm <= 32512)) { Base = CurDAG->getTargetConstant(Imm/256, DL, Ty); Offset = CurDAG->getTargetConstant(8, DL, Ty); return true; } return false; } bool AArch64DAGToDAGISel::SelectSVEAddSubImm(SDValue N, MVT VT, SDValue &Imm, SDValue &Shift) { if (auto CNode = dyn_cast(N)) { const int64_t ImmVal = CNode->getZExtValue(); SDLoc DL(N); switch (VT.SimpleTy) { case MVT::i8: if ((ImmVal & 0xFF) == ImmVal) { Shift = CurDAG->getTargetConstant(0, DL, MVT::i32); Imm = CurDAG->getTargetConstant(ImmVal, DL, MVT::i32); return true; } break; case MVT::i16: case MVT::i32: case MVT::i64: if ((ImmVal & 0xFF) == ImmVal) { Shift = CurDAG->getTargetConstant(0, DL, MVT::i32); Imm = CurDAG->getTargetConstant(ImmVal, DL, MVT::i32); return true; } else if ((ImmVal & 0xFF00) == ImmVal) { Shift = CurDAG->getTargetConstant(8, DL, MVT::i32); Imm = CurDAG->getTargetConstant(ImmVal >> 8, DL, MVT::i32); return true; } break; default: break; } } return false; } bool AArch64DAGToDAGISel::SelectSVESignedArithImm(SDValue N, SDValue &Imm) { if (auto CNode = dyn_cast(N)) { int64_t ImmVal = CNode->getSExtValue(); SDLoc DL(N); if (ImmVal >= -128 && ImmVal < 128) { Imm = CurDAG->getTargetConstant(ImmVal, DL, MVT::i32); return true; } } return false; } bool AArch64DAGToDAGISel::SelectSVEArithImm(SDValue N, SDValue &Imm) { if (auto CNode = dyn_cast(N)) { uint64_t ImmVal = CNode->getSExtValue(); SDLoc DL(N); ImmVal = ImmVal & 0xFF; if (ImmVal < 256) { Imm = CurDAG->getTargetConstant(ImmVal, DL, MVT::i32); return true; } } return false; } bool AArch64DAGToDAGISel::SelectSVELogicalImm(SDValue N, MVT VT, SDValue &Imm) { if (auto CNode = dyn_cast(N)) { uint64_t ImmVal = CNode->getZExtValue(); SDLoc DL(N); // Shift mask depending on type size. switch (VT.SimpleTy) { case MVT::i8: ImmVal &= 0xFF; ImmVal |= ImmVal << 8; ImmVal |= ImmVal << 16; ImmVal |= ImmVal << 32; break; case MVT::i16: ImmVal &= 0xFFFF; ImmVal |= ImmVal << 16; ImmVal |= ImmVal << 32; break; case MVT::i32: ImmVal &= 0xFFFFFFFF; ImmVal |= ImmVal << 32; break; case MVT::i64: break; default: llvm_unreachable("Unexpected type"); } uint64_t encoding; if (AArch64_AM::processLogicalImmediate(ImmVal, 64, encoding)) { Imm = CurDAG->getTargetConstant(encoding, DL, MVT::i64); return true; } } return false; } // SVE shift intrinsics allow shift amounts larger than the element's bitwidth. // Rather than attempt to normalise everything we can sometimes saturate the // shift amount during selection. This function also allows for consistent // isel patterns by ensuring the resulting "Imm" node is of the i32 type // required by the instructions. bool AArch64DAGToDAGISel::SelectSVEShiftImm(SDValue N, uint64_t Low, uint64_t High, bool AllowSaturation, SDValue &Imm) { if (auto *CN = dyn_cast(N)) { uint64_t ImmVal = CN->getZExtValue(); // Reject shift amounts that are too small. if (ImmVal < Low) return false; // Reject or saturate shift amounts that are too big. if (ImmVal > High) { if (!AllowSaturation) return false; ImmVal = High; } Imm = CurDAG->getTargetConstant(ImmVal, SDLoc(N), MVT::i32); return true; } return false; } bool AArch64DAGToDAGISel::trySelectStackSlotTagP(SDNode *N) { // tagp(FrameIndex, IRGstack, tag_offset): // since the offset between FrameIndex and IRGstack is a compile-time // constant, this can be lowered to a single ADDG instruction. if (!(isa(N->getOperand(1)))) { return false; } SDValue IRG_SP = N->getOperand(2); if (IRG_SP->getOpcode() != ISD::INTRINSIC_W_CHAIN || cast(IRG_SP->getOperand(1))->getZExtValue() != Intrinsic::aarch64_irg_sp) { return false; } const TargetLowering *TLI = getTargetLowering(); SDLoc DL(N); int FI = cast(N->getOperand(1))->getIndex(); SDValue FiOp = CurDAG->getTargetFrameIndex( FI, TLI->getPointerTy(CurDAG->getDataLayout())); int TagOffset = cast(N->getOperand(3))->getZExtValue(); SDNode *Out = CurDAG->getMachineNode( AArch64::TAGPstack, DL, MVT::i64, {FiOp, CurDAG->getTargetConstant(0, DL, MVT::i64), N->getOperand(2), CurDAG->getTargetConstant(TagOffset, DL, MVT::i64)}); ReplaceNode(N, Out); return true; } void AArch64DAGToDAGISel::SelectTagP(SDNode *N) { assert(isa(N->getOperand(3)) && "llvm.aarch64.tagp third argument must be an immediate"); if (trySelectStackSlotTagP(N)) return; // FIXME: above applies in any case when offset between Op1 and Op2 is a // compile-time constant, not just for stack allocations. // General case for unrelated pointers in Op1 and Op2. SDLoc DL(N); int TagOffset = cast(N->getOperand(3))->getZExtValue(); SDNode *N1 = CurDAG->getMachineNode(AArch64::SUBP, DL, MVT::i64, {N->getOperand(1), N->getOperand(2)}); SDNode *N2 = CurDAG->getMachineNode(AArch64::ADDXrr, DL, MVT::i64, {SDValue(N1, 0), N->getOperand(2)}); SDNode *N3 = CurDAG->getMachineNode( AArch64::ADDG, DL, MVT::i64, {SDValue(N2, 0), CurDAG->getTargetConstant(0, DL, MVT::i64), CurDAG->getTargetConstant(TagOffset, DL, MVT::i64)}); ReplaceNode(N, N3); } // NOTE: We cannot use EXTRACT_SUBREG in all cases because the fixed length // vector types larger than NEON don't have a matching SubRegIndex. static SDNode *extractSubReg(SelectionDAG *DAG, EVT VT, SDValue V) { assert(V.getValueType().isScalableVector() && V.getValueType().getSizeInBits().getKnownMinSize() == AArch64::SVEBitsPerBlock && "Expected to extract from a packed scalable vector!"); assert(VT.isFixedLengthVector() && "Expected to extract a fixed length vector!"); SDLoc DL(V); switch (VT.getSizeInBits()) { case 64: { auto SubReg = DAG->getTargetConstant(AArch64::dsub, DL, MVT::i32); return DAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG, DL, VT, V, SubReg); } case 128: { auto SubReg = DAG->getTargetConstant(AArch64::zsub, DL, MVT::i32); return DAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG, DL, VT, V, SubReg); } default: { auto RC = DAG->getTargetConstant(AArch64::ZPRRegClassID, DL, MVT::i64); return DAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS, DL, VT, V, RC); } } } // NOTE: We cannot use INSERT_SUBREG in all cases because the fixed length // vector types larger than NEON don't have a matching SubRegIndex. static SDNode *insertSubReg(SelectionDAG *DAG, EVT VT, SDValue V) { assert(VT.isScalableVector() && VT.getSizeInBits().getKnownMinSize() == AArch64::SVEBitsPerBlock && "Expected to insert into a packed scalable vector!"); assert(V.getValueType().isFixedLengthVector() && "Expected to insert a fixed length vector!"); SDLoc DL(V); switch (V.getValueType().getSizeInBits()) { case 64: { auto SubReg = DAG->getTargetConstant(AArch64::dsub, DL, MVT::i32); auto Container = DAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, DL, VT); return DAG->getMachineNode(TargetOpcode::INSERT_SUBREG, DL, VT, SDValue(Container, 0), V, SubReg); } case 128: { auto SubReg = DAG->getTargetConstant(AArch64::zsub, DL, MVT::i32); auto Container = DAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, DL, VT); return DAG->getMachineNode(TargetOpcode::INSERT_SUBREG, DL, VT, SDValue(Container, 0), V, SubReg); } default: { auto RC = DAG->getTargetConstant(AArch64::ZPRRegClassID, DL, MVT::i64); return DAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS, DL, VT, V, RC); } } } void AArch64DAGToDAGISel::Select(SDNode *Node) { // If we have a custom node, we already have selected! if (Node->isMachineOpcode()) { LLVM_DEBUG(errs() << "== "; Node->dump(CurDAG); errs() << "\n"); Node->setNodeId(-1); return; } // Few custom selection stuff. EVT VT = Node->getValueType(0); switch (Node->getOpcode()) { default: break; case ISD::ATOMIC_CMP_SWAP: if (SelectCMP_SWAP(Node)) return; break; case ISD::READ_REGISTER: if (tryReadRegister(Node)) return; break; case ISD::WRITE_REGISTER: if (tryWriteRegister(Node)) return; break; case ISD::ADD: if (tryMLAV64LaneV128(Node)) return; break; case ISD::LOAD: { // Try to select as an indexed load. Fall through to normal processing // if we can't. if (tryIndexedLoad(Node)) return; break; } case ISD::SRL: case ISD::AND: case ISD::SRA: case ISD::SIGN_EXTEND_INREG: if (tryBitfieldExtractOp(Node)) return; if (tryBitfieldInsertInZeroOp(Node)) return; LLVM_FALLTHROUGH; case ISD::ROTR: case ISD::SHL: if (tryShiftAmountMod(Node)) return; break; case ISD::SIGN_EXTEND: if (tryBitfieldExtractOpFromSExt(Node)) return; break; case ISD::FP_EXTEND: if (tryHighFPExt(Node)) return; break; case ISD::OR: if (tryBitfieldInsertOp(Node)) return; break; case ISD::EXTRACT_SUBVECTOR: { // Bail when not a "cast" like extract_subvector. if (cast(Node->getOperand(1))->getZExtValue() != 0) break; // Bail when normal isel can do the job. EVT InVT = Node->getOperand(0).getValueType(); if (VT.isScalableVector() || InVT.isFixedLengthVector()) break; // NOTE: We can only get here when doing fixed length SVE code generation. // We do manual selection because the types involved are not linked to real // registers (despite being legal) and must be coerced into SVE registers. // // NOTE: If the above changes, be aware that selection will still not work // because the td definition of extract_vector does not support extracting // a fixed length vector from a scalable vector. ReplaceNode(Node, extractSubReg(CurDAG, VT, Node->getOperand(0))); return; } case ISD::INSERT_SUBVECTOR: { // Bail when not a "cast" like insert_subvector. if (cast(Node->getOperand(2))->getZExtValue() != 0) break; if (!Node->getOperand(0).isUndef()) break; // Bail when normal isel should do the job. EVT InVT = Node->getOperand(1).getValueType(); if (VT.isFixedLengthVector() || InVT.isScalableVector()) break; // NOTE: We can only get here when doing fixed length SVE code generation. // We do manual selection because the types involved are not linked to real // registers (despite being legal) and must be coerced into SVE registers. // // NOTE: If the above changes, be aware that selection will still not work // because the td definition of insert_vector does not support inserting a // fixed length vector into a scalable vector. ReplaceNode(Node, insertSubReg(CurDAG, VT, Node->getOperand(1))); return; } case ISD::Constant: { // Materialize zero constants as copies from WZR/XZR. This allows // the coalescer to propagate these into other instructions. ConstantSDNode *ConstNode = cast(Node); if (ConstNode->isNullValue()) { if (VT == MVT::i32) { SDValue New = CurDAG->getCopyFromReg( CurDAG->getEntryNode(), SDLoc(Node), AArch64::WZR, MVT::i32); ReplaceNode(Node, New.getNode()); return; } else if (VT == MVT::i64) { SDValue New = CurDAG->getCopyFromReg( CurDAG->getEntryNode(), SDLoc(Node), AArch64::XZR, MVT::i64); ReplaceNode(Node, New.getNode()); return; } } break; } case ISD::FrameIndex: { // Selects to ADDXri FI, 0 which in turn will become ADDXri SP, imm. int FI = cast(Node)->getIndex(); unsigned Shifter = AArch64_AM::getShifterImm(AArch64_AM::LSL, 0); const TargetLowering *TLI = getTargetLowering(); SDValue TFI = CurDAG->getTargetFrameIndex( FI, TLI->getPointerTy(CurDAG->getDataLayout())); SDLoc DL(Node); SDValue Ops[] = { TFI, CurDAG->getTargetConstant(0, DL, MVT::i32), CurDAG->getTargetConstant(Shifter, DL, MVT::i32) }; CurDAG->SelectNodeTo(Node, AArch64::ADDXri, MVT::i64, Ops); return; } case ISD::INTRINSIC_W_CHAIN: { unsigned IntNo = cast(Node->getOperand(1))->getZExtValue(); switch (IntNo) { default: break; case Intrinsic::aarch64_ldaxp: case Intrinsic::aarch64_ldxp: { unsigned Op = IntNo == Intrinsic::aarch64_ldaxp ? AArch64::LDAXPX : AArch64::LDXPX; SDValue MemAddr = Node->getOperand(2); SDLoc DL(Node); SDValue Chain = Node->getOperand(0); SDNode *Ld = CurDAG->getMachineNode(Op, DL, MVT::i64, MVT::i64, MVT::Other, MemAddr, Chain); // Transfer memoperands. MachineMemOperand *MemOp = cast(Node)->getMemOperand(); CurDAG->setNodeMemRefs(cast(Ld), {MemOp}); ReplaceNode(Node, Ld); return; } case Intrinsic::aarch64_stlxp: case Intrinsic::aarch64_stxp: { unsigned Op = IntNo == Intrinsic::aarch64_stlxp ? AArch64::STLXPX : AArch64::STXPX; SDLoc DL(Node); SDValue Chain = Node->getOperand(0); SDValue ValLo = Node->getOperand(2); SDValue ValHi = Node->getOperand(3); SDValue MemAddr = Node->getOperand(4); // Place arguments in the right order. SDValue Ops[] = {ValLo, ValHi, MemAddr, Chain}; SDNode *St = CurDAG->getMachineNode(Op, DL, MVT::i32, MVT::Other, Ops); // Transfer memoperands. MachineMemOperand *MemOp = cast(Node)->getMemOperand(); CurDAG->setNodeMemRefs(cast(St), {MemOp}); ReplaceNode(Node, St); return; } case Intrinsic::aarch64_neon_ld1x2: if (VT == MVT::v8i8) { SelectLoad(Node, 2, AArch64::LD1Twov8b, AArch64::dsub0); return; } else if (VT == MVT::v16i8) { SelectLoad(Node, 2, AArch64::LD1Twov16b, AArch64::qsub0); return; } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) { SelectLoad(Node, 2, AArch64::LD1Twov4h, AArch64::dsub0); return; } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) { SelectLoad(Node, 2, AArch64::LD1Twov8h, AArch64::qsub0); return; } else if (VT == MVT::v2i32 || VT == MVT::v2f32) { SelectLoad(Node, 2, AArch64::LD1Twov2s, AArch64::dsub0); return; } else if (VT == MVT::v4i32 || VT == MVT::v4f32) { SelectLoad(Node, 2, AArch64::LD1Twov4s, AArch64::qsub0); return; } else if (VT == MVT::v1i64 || VT == MVT::v1f64) { SelectLoad(Node, 2, AArch64::LD1Twov1d, AArch64::dsub0); return; } else if (VT == MVT::v2i64 || VT == MVT::v2f64) { SelectLoad(Node, 2, AArch64::LD1Twov2d, AArch64::qsub0); return; } break; case Intrinsic::aarch64_neon_ld1x3: if (VT == MVT::v8i8) { SelectLoad(Node, 3, AArch64::LD1Threev8b, AArch64::dsub0); return; } else if (VT == MVT::v16i8) { SelectLoad(Node, 3, AArch64::LD1Threev16b, AArch64::qsub0); return; } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) { SelectLoad(Node, 3, AArch64::LD1Threev4h, AArch64::dsub0); return; } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) { SelectLoad(Node, 3, AArch64::LD1Threev8h, AArch64::qsub0); return; } else if (VT == MVT::v2i32 || VT == MVT::v2f32) { SelectLoad(Node, 3, AArch64::LD1Threev2s, AArch64::dsub0); return; } else if (VT == MVT::v4i32 || VT == MVT::v4f32) { SelectLoad(Node, 3, AArch64::LD1Threev4s, AArch64::qsub0); return; } else if (VT == MVT::v1i64 || VT == MVT::v1f64) { SelectLoad(Node, 3, AArch64::LD1Threev1d, AArch64::dsub0); return; } else if (VT == MVT::v2i64 || VT == MVT::v2f64) { SelectLoad(Node, 3, AArch64::LD1Threev2d, AArch64::qsub0); return; } break; case Intrinsic::aarch64_neon_ld1x4: if (VT == MVT::v8i8) { SelectLoad(Node, 4, AArch64::LD1Fourv8b, AArch64::dsub0); return; } else if (VT == MVT::v16i8) { SelectLoad(Node, 4, AArch64::LD1Fourv16b, AArch64::qsub0); return; } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) { SelectLoad(Node, 4, AArch64::LD1Fourv4h, AArch64::dsub0); return; } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) { SelectLoad(Node, 4, AArch64::LD1Fourv8h, AArch64::qsub0); return; } else if (VT == MVT::v2i32 || VT == MVT::v2f32) { SelectLoad(Node, 4, AArch64::LD1Fourv2s, AArch64::dsub0); return; } else if (VT == MVT::v4i32 || VT == MVT::v4f32) { SelectLoad(Node, 4, AArch64::LD1Fourv4s, AArch64::qsub0); return; } else if (VT == MVT::v1i64 || VT == MVT::v1f64) { SelectLoad(Node, 4, AArch64::LD1Fourv1d, AArch64::dsub0); return; } else if (VT == MVT::v2i64 || VT == MVT::v2f64) { SelectLoad(Node, 4, AArch64::LD1Fourv2d, AArch64::qsub0); return; } break; case Intrinsic::aarch64_neon_ld2: if (VT == MVT::v8i8) { SelectLoad(Node, 2, AArch64::LD2Twov8b, AArch64::dsub0); return; } else if (VT == MVT::v16i8) { SelectLoad(Node, 2, AArch64::LD2Twov16b, AArch64::qsub0); return; } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) { SelectLoad(Node, 2, AArch64::LD2Twov4h, AArch64::dsub0); return; } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) { SelectLoad(Node, 2, AArch64::LD2Twov8h, AArch64::qsub0); return; } else if (VT == MVT::v2i32 || VT == MVT::v2f32) { SelectLoad(Node, 2, AArch64::LD2Twov2s, AArch64::dsub0); return; } else if (VT == MVT::v4i32 || VT == MVT::v4f32) { SelectLoad(Node, 2, AArch64::LD2Twov4s, AArch64::qsub0); return; } else if (VT == MVT::v1i64 || VT == MVT::v1f64) { SelectLoad(Node, 2, AArch64::LD1Twov1d, AArch64::dsub0); return; } else if (VT == MVT::v2i64 || VT == MVT::v2f64) { SelectLoad(Node, 2, AArch64::LD2Twov2d, AArch64::qsub0); return; } break; case Intrinsic::aarch64_neon_ld3: if (VT == MVT::v8i8) { SelectLoad(Node, 3, AArch64::LD3Threev8b, AArch64::dsub0); return; } else if (VT == MVT::v16i8) { SelectLoad(Node, 3, AArch64::LD3Threev16b, AArch64::qsub0); return; } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) { SelectLoad(Node, 3, AArch64::LD3Threev4h, AArch64::dsub0); return; } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) { SelectLoad(Node, 3, AArch64::LD3Threev8h, AArch64::qsub0); return; } else if (VT == MVT::v2i32 || VT == MVT::v2f32) { SelectLoad(Node, 3, AArch64::LD3Threev2s, AArch64::dsub0); return; } else if (VT == MVT::v4i32 || VT == MVT::v4f32) { SelectLoad(Node, 3, AArch64::LD3Threev4s, AArch64::qsub0); return; } else if (VT == MVT::v1i64 || VT == MVT::v1f64) { SelectLoad(Node, 3, AArch64::LD1Threev1d, AArch64::dsub0); return; } else if (VT == MVT::v2i64 || VT == MVT::v2f64) { SelectLoad(Node, 3, AArch64::LD3Threev2d, AArch64::qsub0); return; } break; case Intrinsic::aarch64_neon_ld4: if (VT == MVT::v8i8) { SelectLoad(Node, 4, AArch64::LD4Fourv8b, AArch64::dsub0); return; } else if (VT == MVT::v16i8) { SelectLoad(Node, 4, AArch64::LD4Fourv16b, AArch64::qsub0); return; } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) { SelectLoad(Node, 4, AArch64::LD4Fourv4h, AArch64::dsub0); return; } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) { SelectLoad(Node, 4, AArch64::LD4Fourv8h, AArch64::qsub0); return; } else if (VT == MVT::v2i32 || VT == MVT::v2f32) { SelectLoad(Node, 4, AArch64::LD4Fourv2s, AArch64::dsub0); return; } else if (VT == MVT::v4i32 || VT == MVT::v4f32) { SelectLoad(Node, 4, AArch64::LD4Fourv4s, AArch64::qsub0); return; } else if (VT == MVT::v1i64 || VT == MVT::v1f64) { SelectLoad(Node, 4, AArch64::LD1Fourv1d, AArch64::dsub0); return; } else if (VT == MVT::v2i64 || VT == MVT::v2f64) { SelectLoad(Node, 4, AArch64::LD4Fourv2d, AArch64::qsub0); return; } break; case Intrinsic::aarch64_neon_ld2r: if (VT == MVT::v8i8) { SelectLoad(Node, 2, AArch64::LD2Rv8b, AArch64::dsub0); return; } else if (VT == MVT::v16i8) { SelectLoad(Node, 2, AArch64::LD2Rv16b, AArch64::qsub0); return; } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) { SelectLoad(Node, 2, AArch64::LD2Rv4h, AArch64::dsub0); return; } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) { SelectLoad(Node, 2, AArch64::LD2Rv8h, AArch64::qsub0); return; } else if (VT == MVT::v2i32 || VT == MVT::v2f32) { SelectLoad(Node, 2, AArch64::LD2Rv2s, AArch64::dsub0); return; } else if (VT == MVT::v4i32 || VT == MVT::v4f32) { SelectLoad(Node, 2, AArch64::LD2Rv4s, AArch64::qsub0); return; } else if (VT == MVT::v1i64 || VT == MVT::v1f64) { SelectLoad(Node, 2, AArch64::LD2Rv1d, AArch64::dsub0); return; } else if (VT == MVT::v2i64 || VT == MVT::v2f64) { SelectLoad(Node, 2, AArch64::LD2Rv2d, AArch64::qsub0); return; } break; case Intrinsic::aarch64_neon_ld3r: if (VT == MVT::v8i8) { SelectLoad(Node, 3, AArch64::LD3Rv8b, AArch64::dsub0); return; } else if (VT == MVT::v16i8) { SelectLoad(Node, 3, AArch64::LD3Rv16b, AArch64::qsub0); return; } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) { SelectLoad(Node, 3, AArch64::LD3Rv4h, AArch64::dsub0); return; } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) { SelectLoad(Node, 3, AArch64::LD3Rv8h, AArch64::qsub0); return; } else if (VT == MVT::v2i32 || VT == MVT::v2f32) { SelectLoad(Node, 3, AArch64::LD3Rv2s, AArch64::dsub0); return; } else if (VT == MVT::v4i32 || VT == MVT::v4f32) { SelectLoad(Node, 3, AArch64::LD3Rv4s, AArch64::qsub0); return; } else if (VT == MVT::v1i64 || VT == MVT::v1f64) { SelectLoad(Node, 3, AArch64::LD3Rv1d, AArch64::dsub0); return; } else if (VT == MVT::v2i64 || VT == MVT::v2f64) { SelectLoad(Node, 3, AArch64::LD3Rv2d, AArch64::qsub0); return; } break; case Intrinsic::aarch64_neon_ld4r: if (VT == MVT::v8i8) { SelectLoad(Node, 4, AArch64::LD4Rv8b, AArch64::dsub0); return; } else if (VT == MVT::v16i8) { SelectLoad(Node, 4, AArch64::LD4Rv16b, AArch64::qsub0); return; } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) { SelectLoad(Node, 4, AArch64::LD4Rv4h, AArch64::dsub0); return; } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) { SelectLoad(Node, 4, AArch64::LD4Rv8h, AArch64::qsub0); return; } else if (VT == MVT::v2i32 || VT == MVT::v2f32) { SelectLoad(Node, 4, AArch64::LD4Rv2s, AArch64::dsub0); return; } else if (VT == MVT::v4i32 || VT == MVT::v4f32) { SelectLoad(Node, 4, AArch64::LD4Rv4s, AArch64::qsub0); return; } else if (VT == MVT::v1i64 || VT == MVT::v1f64) { SelectLoad(Node, 4, AArch64::LD4Rv1d, AArch64::dsub0); return; } else if (VT == MVT::v2i64 || VT == MVT::v2f64) { SelectLoad(Node, 4, AArch64::LD4Rv2d, AArch64::qsub0); return; } break; case Intrinsic::aarch64_neon_ld2lane: if (VT == MVT::v16i8 || VT == MVT::v8i8) { SelectLoadLane(Node, 2, AArch64::LD2i8); return; } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) { SelectLoadLane(Node, 2, AArch64::LD2i16); return; } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 || VT == MVT::v2f32) { SelectLoadLane(Node, 2, AArch64::LD2i32); return; } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 || VT == MVT::v1f64) { SelectLoadLane(Node, 2, AArch64::LD2i64); return; } break; case Intrinsic::aarch64_neon_ld3lane: if (VT == MVT::v16i8 || VT == MVT::v8i8) { SelectLoadLane(Node, 3, AArch64::LD3i8); return; } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) { SelectLoadLane(Node, 3, AArch64::LD3i16); return; } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 || VT == MVT::v2f32) { SelectLoadLane(Node, 3, AArch64::LD3i32); return; } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 || VT == MVT::v1f64) { SelectLoadLane(Node, 3, AArch64::LD3i64); return; } break; case Intrinsic::aarch64_neon_ld4lane: if (VT == MVT::v16i8 || VT == MVT::v8i8) { SelectLoadLane(Node, 4, AArch64::LD4i8); return; } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) { SelectLoadLane(Node, 4, AArch64::LD4i16); return; } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 || VT == MVT::v2f32) { SelectLoadLane(Node, 4, AArch64::LD4i32); return; } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 || VT == MVT::v1f64) { SelectLoadLane(Node, 4, AArch64::LD4i64); return; } break; } } break; case ISD::INTRINSIC_WO_CHAIN: { unsigned IntNo = cast(Node->getOperand(0))->getZExtValue(); switch (IntNo) { default: break; case Intrinsic::aarch64_tagp: SelectTagP(Node); return; case Intrinsic::aarch64_neon_tbl2: SelectTable(Node, 2, VT == MVT::v8i8 ? AArch64::TBLv8i8Two : AArch64::TBLv16i8Two, false); return; case Intrinsic::aarch64_neon_tbl3: SelectTable(Node, 3, VT == MVT::v8i8 ? AArch64::TBLv8i8Three : AArch64::TBLv16i8Three, false); return; case Intrinsic::aarch64_neon_tbl4: SelectTable(Node, 4, VT == MVT::v8i8 ? AArch64::TBLv8i8Four : AArch64::TBLv16i8Four, false); return; case Intrinsic::aarch64_neon_tbx2: SelectTable(Node, 2, VT == MVT::v8i8 ? AArch64::TBXv8i8Two : AArch64::TBXv16i8Two, true); return; case Intrinsic::aarch64_neon_tbx3: SelectTable(Node, 3, VT == MVT::v8i8 ? AArch64::TBXv8i8Three : AArch64::TBXv16i8Three, true); return; case Intrinsic::aarch64_neon_tbx4: SelectTable(Node, 4, VT == MVT::v8i8 ? AArch64::TBXv8i8Four : AArch64::TBXv16i8Four, true); return; case Intrinsic::aarch64_neon_smull: case Intrinsic::aarch64_neon_umull: if (tryMULLV64LaneV128(IntNo, Node)) return; break; } break; } case ISD::INTRINSIC_VOID: { unsigned IntNo = cast(Node->getOperand(1))->getZExtValue(); if (Node->getNumOperands() >= 3) VT = Node->getOperand(2)->getValueType(0); switch (IntNo) { default: break; case Intrinsic::aarch64_neon_st1x2: { if (VT == MVT::v8i8) { SelectStore(Node, 2, AArch64::ST1Twov8b); return; } else if (VT == MVT::v16i8) { SelectStore(Node, 2, AArch64::ST1Twov16b); return; } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) { SelectStore(Node, 2, AArch64::ST1Twov4h); return; } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) { SelectStore(Node, 2, AArch64::ST1Twov8h); return; } else if (VT == MVT::v2i32 || VT == MVT::v2f32) { SelectStore(Node, 2, AArch64::ST1Twov2s); return; } else if (VT == MVT::v4i32 || VT == MVT::v4f32) { SelectStore(Node, 2, AArch64::ST1Twov4s); return; } else if (VT == MVT::v2i64 || VT == MVT::v2f64) { SelectStore(Node, 2, AArch64::ST1Twov2d); return; } else if (VT == MVT::v1i64 || VT == MVT::v1f64) { SelectStore(Node, 2, AArch64::ST1Twov1d); return; } break; } case Intrinsic::aarch64_neon_st1x3: { if (VT == MVT::v8i8) { SelectStore(Node, 3, AArch64::ST1Threev8b); return; } else if (VT == MVT::v16i8) { SelectStore(Node, 3, AArch64::ST1Threev16b); return; } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) { SelectStore(Node, 3, AArch64::ST1Threev4h); return; } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) { SelectStore(Node, 3, AArch64::ST1Threev8h); return; } else if (VT == MVT::v2i32 || VT == MVT::v2f32) { SelectStore(Node, 3, AArch64::ST1Threev2s); return; } else if (VT == MVT::v4i32 || VT == MVT::v4f32) { SelectStore(Node, 3, AArch64::ST1Threev4s); return; } else if (VT == MVT::v2i64 || VT == MVT::v2f64) { SelectStore(Node, 3, AArch64::ST1Threev2d); return; } else if (VT == MVT::v1i64 || VT == MVT::v1f64) { SelectStore(Node, 3, AArch64::ST1Threev1d); return; } break; } case Intrinsic::aarch64_neon_st1x4: { if (VT == MVT::v8i8) { SelectStore(Node, 4, AArch64::ST1Fourv8b); return; } else if (VT == MVT::v16i8) { SelectStore(Node, 4, AArch64::ST1Fourv16b); return; } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) { SelectStore(Node, 4, AArch64::ST1Fourv4h); return; } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) { SelectStore(Node, 4, AArch64::ST1Fourv8h); return; } else if (VT == MVT::v2i32 || VT == MVT::v2f32) { SelectStore(Node, 4, AArch64::ST1Fourv2s); return; } else if (VT == MVT::v4i32 || VT == MVT::v4f32) { SelectStore(Node, 4, AArch64::ST1Fourv4s); return; } else if (VT == MVT::v2i64 || VT == MVT::v2f64) { SelectStore(Node, 4, AArch64::ST1Fourv2d); return; } else if (VT == MVT::v1i64 || VT == MVT::v1f64) { SelectStore(Node, 4, AArch64::ST1Fourv1d); return; } break; } case Intrinsic::aarch64_neon_st2: { if (VT == MVT::v8i8) { SelectStore(Node, 2, AArch64::ST2Twov8b); return; } else if (VT == MVT::v16i8) { SelectStore(Node, 2, AArch64::ST2Twov16b); return; } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) { SelectStore(Node, 2, AArch64::ST2Twov4h); return; } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) { SelectStore(Node, 2, AArch64::ST2Twov8h); return; } else if (VT == MVT::v2i32 || VT == MVT::v2f32) { SelectStore(Node, 2, AArch64::ST2Twov2s); return; } else if (VT == MVT::v4i32 || VT == MVT::v4f32) { SelectStore(Node, 2, AArch64::ST2Twov4s); return; } else if (VT == MVT::v2i64 || VT == MVT::v2f64) { SelectStore(Node, 2, AArch64::ST2Twov2d); return; } else if (VT == MVT::v1i64 || VT == MVT::v1f64) { SelectStore(Node, 2, AArch64::ST1Twov1d); return; } break; } case Intrinsic::aarch64_neon_st3: { if (VT == MVT::v8i8) { SelectStore(Node, 3, AArch64::ST3Threev8b); return; } else if (VT == MVT::v16i8) { SelectStore(Node, 3, AArch64::ST3Threev16b); return; } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) { SelectStore(Node, 3, AArch64::ST3Threev4h); return; } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) { SelectStore(Node, 3, AArch64::ST3Threev8h); return; } else if (VT == MVT::v2i32 || VT == MVT::v2f32) { SelectStore(Node, 3, AArch64::ST3Threev2s); return; } else if (VT == MVT::v4i32 || VT == MVT::v4f32) { SelectStore(Node, 3, AArch64::ST3Threev4s); return; } else if (VT == MVT::v2i64 || VT == MVT::v2f64) { SelectStore(Node, 3, AArch64::ST3Threev2d); return; } else if (VT == MVT::v1i64 || VT == MVT::v1f64) { SelectStore(Node, 3, AArch64::ST1Threev1d); return; } break; } case Intrinsic::aarch64_neon_st4: { if (VT == MVT::v8i8) { SelectStore(Node, 4, AArch64::ST4Fourv8b); return; } else if (VT == MVT::v16i8) { SelectStore(Node, 4, AArch64::ST4Fourv16b); return; } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) { SelectStore(Node, 4, AArch64::ST4Fourv4h); return; } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) { SelectStore(Node, 4, AArch64::ST4Fourv8h); return; } else if (VT == MVT::v2i32 || VT == MVT::v2f32) { SelectStore(Node, 4, AArch64::ST4Fourv2s); return; } else if (VT == MVT::v4i32 || VT == MVT::v4f32) { SelectStore(Node, 4, AArch64::ST4Fourv4s); return; } else if (VT == MVT::v2i64 || VT == MVT::v2f64) { SelectStore(Node, 4, AArch64::ST4Fourv2d); return; } else if (VT == MVT::v1i64 || VT == MVT::v1f64) { SelectStore(Node, 4, AArch64::ST1Fourv1d); return; } break; } case Intrinsic::aarch64_neon_st2lane: { if (VT == MVT::v16i8 || VT == MVT::v8i8) { SelectStoreLane(Node, 2, AArch64::ST2i8); return; } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) { SelectStoreLane(Node, 2, AArch64::ST2i16); return; } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 || VT == MVT::v2f32) { SelectStoreLane(Node, 2, AArch64::ST2i32); return; } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 || VT == MVT::v1f64) { SelectStoreLane(Node, 2, AArch64::ST2i64); return; } break; } case Intrinsic::aarch64_neon_st3lane: { if (VT == MVT::v16i8 || VT == MVT::v8i8) { SelectStoreLane(Node, 3, AArch64::ST3i8); return; } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) { SelectStoreLane(Node, 3, AArch64::ST3i16); return; } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 || VT == MVT::v2f32) { SelectStoreLane(Node, 3, AArch64::ST3i32); return; } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 || VT == MVT::v1f64) { SelectStoreLane(Node, 3, AArch64::ST3i64); return; } break; } case Intrinsic::aarch64_neon_st4lane: { if (VT == MVT::v16i8 || VT == MVT::v8i8) { SelectStoreLane(Node, 4, AArch64::ST4i8); return; } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) { SelectStoreLane(Node, 4, AArch64::ST4i16); return; } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 || VT == MVT::v2f32) { SelectStoreLane(Node, 4, AArch64::ST4i32); return; } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 || VT == MVT::v1f64) { SelectStoreLane(Node, 4, AArch64::ST4i64); return; } break; } case Intrinsic::aarch64_sve_st2: { if (VT == MVT::nxv16i8) { SelectPredicatedStore(Node, 2, 0, AArch64::ST2B, AArch64::ST2B_IMM); return; } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 || (VT == MVT::nxv8bf16 && Subtarget->hasBF16())) { SelectPredicatedStore(Node, 2, 1, AArch64::ST2H, AArch64::ST2H_IMM); return; } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) { SelectPredicatedStore(Node, 2, 2, AArch64::ST2W, AArch64::ST2W_IMM); return; } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) { SelectPredicatedStore(Node, 2, 3, AArch64::ST2D, AArch64::ST2D_IMM); return; } break; } case Intrinsic::aarch64_sve_st3: { if (VT == MVT::nxv16i8) { SelectPredicatedStore(Node, 3, 0, AArch64::ST3B, AArch64::ST3B_IMM); return; } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 || (VT == MVT::nxv8bf16 && Subtarget->hasBF16())) { SelectPredicatedStore(Node, 3, 1, AArch64::ST3H, AArch64::ST3H_IMM); return; } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) { SelectPredicatedStore(Node, 3, 2, AArch64::ST3W, AArch64::ST3W_IMM); return; } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) { SelectPredicatedStore(Node, 3, 3, AArch64::ST3D, AArch64::ST3D_IMM); return; } break; } case Intrinsic::aarch64_sve_st4: { if (VT == MVT::nxv16i8) { SelectPredicatedStore(Node, 4, 0, AArch64::ST4B, AArch64::ST4B_IMM); return; } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 || (VT == MVT::nxv8bf16 && Subtarget->hasBF16())) { SelectPredicatedStore(Node, 4, 1, AArch64::ST4H, AArch64::ST4H_IMM); return; } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) { SelectPredicatedStore(Node, 4, 2, AArch64::ST4W, AArch64::ST4W_IMM); return; } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) { SelectPredicatedStore(Node, 4, 3, AArch64::ST4D, AArch64::ST4D_IMM); return; } break; } } break; } case AArch64ISD::LD2post: { if (VT == MVT::v8i8) { SelectPostLoad(Node, 2, AArch64::LD2Twov8b_POST, AArch64::dsub0); return; } else if (VT == MVT::v16i8) { SelectPostLoad(Node, 2, AArch64::LD2Twov16b_POST, AArch64::qsub0); return; } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) { SelectPostLoad(Node, 2, AArch64::LD2Twov4h_POST, AArch64::dsub0); return; } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) { SelectPostLoad(Node, 2, AArch64::LD2Twov8h_POST, AArch64::qsub0); return; } else if (VT == MVT::v2i32 || VT == MVT::v2f32) { SelectPostLoad(Node, 2, AArch64::LD2Twov2s_POST, AArch64::dsub0); return; } else if (VT == MVT::v4i32 || VT == MVT::v4f32) { SelectPostLoad(Node, 2, AArch64::LD2Twov4s_POST, AArch64::qsub0); return; } else if (VT == MVT::v1i64 || VT == MVT::v1f64) { SelectPostLoad(Node, 2, AArch64::LD1Twov1d_POST, AArch64::dsub0); return; } else if (VT == MVT::v2i64 || VT == MVT::v2f64) { SelectPostLoad(Node, 2, AArch64::LD2Twov2d_POST, AArch64::qsub0); return; } break; } case AArch64ISD::LD3post: { if (VT == MVT::v8i8) { SelectPostLoad(Node, 3, AArch64::LD3Threev8b_POST, AArch64::dsub0); return; } else if (VT == MVT::v16i8) { SelectPostLoad(Node, 3, AArch64::LD3Threev16b_POST, AArch64::qsub0); return; } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) { SelectPostLoad(Node, 3, AArch64::LD3Threev4h_POST, AArch64::dsub0); return; } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) { SelectPostLoad(Node, 3, AArch64::LD3Threev8h_POST, AArch64::qsub0); return; } else if (VT == MVT::v2i32 || VT == MVT::v2f32) { SelectPostLoad(Node, 3, AArch64::LD3Threev2s_POST, AArch64::dsub0); return; } else if (VT == MVT::v4i32 || VT == MVT::v4f32) { SelectPostLoad(Node, 3, AArch64::LD3Threev4s_POST, AArch64::qsub0); return; } else if (VT == MVT::v1i64 || VT == MVT::v1f64) { SelectPostLoad(Node, 3, AArch64::LD1Threev1d_POST, AArch64::dsub0); return; } else if (VT == MVT::v2i64 || VT == MVT::v2f64) { SelectPostLoad(Node, 3, AArch64::LD3Threev2d_POST, AArch64::qsub0); return; } break; } case AArch64ISD::LD4post: { if (VT == MVT::v8i8) { SelectPostLoad(Node, 4, AArch64::LD4Fourv8b_POST, AArch64::dsub0); return; } else if (VT == MVT::v16i8) { SelectPostLoad(Node, 4, AArch64::LD4Fourv16b_POST, AArch64::qsub0); return; } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) { SelectPostLoad(Node, 4, AArch64::LD4Fourv4h_POST, AArch64::dsub0); return; } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) { SelectPostLoad(Node, 4, AArch64::LD4Fourv8h_POST, AArch64::qsub0); return; } else if (VT == MVT::v2i32 || VT == MVT::v2f32) { SelectPostLoad(Node, 4, AArch64::LD4Fourv2s_POST, AArch64::dsub0); return; } else if (VT == MVT::v4i32 || VT == MVT::v4f32) { SelectPostLoad(Node, 4, AArch64::LD4Fourv4s_POST, AArch64::qsub0); return; } else if (VT == MVT::v1i64 || VT == MVT::v1f64) { SelectPostLoad(Node, 4, AArch64::LD1Fourv1d_POST, AArch64::dsub0); return; } else if (VT == MVT::v2i64 || VT == MVT::v2f64) { SelectPostLoad(Node, 4, AArch64::LD4Fourv2d_POST, AArch64::qsub0); return; } break; } case AArch64ISD::LD1x2post: { if (VT == MVT::v8i8) { SelectPostLoad(Node, 2, AArch64::LD1Twov8b_POST, AArch64::dsub0); return; } else if (VT == MVT::v16i8) { SelectPostLoad(Node, 2, AArch64::LD1Twov16b_POST, AArch64::qsub0); return; } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) { SelectPostLoad(Node, 2, AArch64::LD1Twov4h_POST, AArch64::dsub0); return; } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) { SelectPostLoad(Node, 2, AArch64::LD1Twov8h_POST, AArch64::qsub0); return; } else if (VT == MVT::v2i32 || VT == MVT::v2f32) { SelectPostLoad(Node, 2, AArch64::LD1Twov2s_POST, AArch64::dsub0); return; } else if (VT == MVT::v4i32 || VT == MVT::v4f32) { SelectPostLoad(Node, 2, AArch64::LD1Twov4s_POST, AArch64::qsub0); return; } else if (VT == MVT::v1i64 || VT == MVT::v1f64) { SelectPostLoad(Node, 2, AArch64::LD1Twov1d_POST, AArch64::dsub0); return; } else if (VT == MVT::v2i64 || VT == MVT::v2f64) { SelectPostLoad(Node, 2, AArch64::LD1Twov2d_POST, AArch64::qsub0); return; } break; } case AArch64ISD::LD1x3post: { if (VT == MVT::v8i8) { SelectPostLoad(Node, 3, AArch64::LD1Threev8b_POST, AArch64::dsub0); return; } else if (VT == MVT::v16i8) { SelectPostLoad(Node, 3, AArch64::LD1Threev16b_POST, AArch64::qsub0); return; } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) { SelectPostLoad(Node, 3, AArch64::LD1Threev4h_POST, AArch64::dsub0); return; } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) { SelectPostLoad(Node, 3, AArch64::LD1Threev8h_POST, AArch64::qsub0); return; } else if (VT == MVT::v2i32 || VT == MVT::v2f32) { SelectPostLoad(Node, 3, AArch64::LD1Threev2s_POST, AArch64::dsub0); return; } else if (VT == MVT::v4i32 || VT == MVT::v4f32) { SelectPostLoad(Node, 3, AArch64::LD1Threev4s_POST, AArch64::qsub0); return; } else if (VT == MVT::v1i64 || VT == MVT::v1f64) { SelectPostLoad(Node, 3, AArch64::LD1Threev1d_POST, AArch64::dsub0); return; } else if (VT == MVT::v2i64 || VT == MVT::v2f64) { SelectPostLoad(Node, 3, AArch64::LD1Threev2d_POST, AArch64::qsub0); return; } break; } case AArch64ISD::LD1x4post: { if (VT == MVT::v8i8) { SelectPostLoad(Node, 4, AArch64::LD1Fourv8b_POST, AArch64::dsub0); return; } else if (VT == MVT::v16i8) { SelectPostLoad(Node, 4, AArch64::LD1Fourv16b_POST, AArch64::qsub0); return; } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) { SelectPostLoad(Node, 4, AArch64::LD1Fourv4h_POST, AArch64::dsub0); return; } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) { SelectPostLoad(Node, 4, AArch64::LD1Fourv8h_POST, AArch64::qsub0); return; } else if (VT == MVT::v2i32 || VT == MVT::v2f32) { SelectPostLoad(Node, 4, AArch64::LD1Fourv2s_POST, AArch64::dsub0); return; } else if (VT == MVT::v4i32 || VT == MVT::v4f32) { SelectPostLoad(Node, 4, AArch64::LD1Fourv4s_POST, AArch64::qsub0); return; } else if (VT == MVT::v1i64 || VT == MVT::v1f64) { SelectPostLoad(Node, 4, AArch64::LD1Fourv1d_POST, AArch64::dsub0); return; } else if (VT == MVT::v2i64 || VT == MVT::v2f64) { SelectPostLoad(Node, 4, AArch64::LD1Fourv2d_POST, AArch64::qsub0); return; } break; } case AArch64ISD::LD1DUPpost: { if (VT == MVT::v8i8) { SelectPostLoad(Node, 1, AArch64::LD1Rv8b_POST, AArch64::dsub0); return; } else if (VT == MVT::v16i8) { SelectPostLoad(Node, 1, AArch64::LD1Rv16b_POST, AArch64::qsub0); return; } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) { SelectPostLoad(Node, 1, AArch64::LD1Rv4h_POST, AArch64::dsub0); return; } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) { SelectPostLoad(Node, 1, AArch64::LD1Rv8h_POST, AArch64::qsub0); return; } else if (VT == MVT::v2i32 || VT == MVT::v2f32) { SelectPostLoad(Node, 1, AArch64::LD1Rv2s_POST, AArch64::dsub0); return; } else if (VT == MVT::v4i32 || VT == MVT::v4f32) { SelectPostLoad(Node, 1, AArch64::LD1Rv4s_POST, AArch64::qsub0); return; } else if (VT == MVT::v1i64 || VT == MVT::v1f64) { SelectPostLoad(Node, 1, AArch64::LD1Rv1d_POST, AArch64::dsub0); return; } else if (VT == MVT::v2i64 || VT == MVT::v2f64) { SelectPostLoad(Node, 1, AArch64::LD1Rv2d_POST, AArch64::qsub0); return; } break; } case AArch64ISD::LD2DUPpost: { if (VT == MVT::v8i8) { SelectPostLoad(Node, 2, AArch64::LD2Rv8b_POST, AArch64::dsub0); return; } else if (VT == MVT::v16i8) { SelectPostLoad(Node, 2, AArch64::LD2Rv16b_POST, AArch64::qsub0); return; } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) { SelectPostLoad(Node, 2, AArch64::LD2Rv4h_POST, AArch64::dsub0); return; } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) { SelectPostLoad(Node, 2, AArch64::LD2Rv8h_POST, AArch64::qsub0); return; } else if (VT == MVT::v2i32 || VT == MVT::v2f32) { SelectPostLoad(Node, 2, AArch64::LD2Rv2s_POST, AArch64::dsub0); return; } else if (VT == MVT::v4i32 || VT == MVT::v4f32) { SelectPostLoad(Node, 2, AArch64::LD2Rv4s_POST, AArch64::qsub0); return; } else if (VT == MVT::v1i64 || VT == MVT::v1f64) { SelectPostLoad(Node, 2, AArch64::LD2Rv1d_POST, AArch64::dsub0); return; } else if (VT == MVT::v2i64 || VT == MVT::v2f64) { SelectPostLoad(Node, 2, AArch64::LD2Rv2d_POST, AArch64::qsub0); return; } break; } case AArch64ISD::LD3DUPpost: { if (VT == MVT::v8i8) { SelectPostLoad(Node, 3, AArch64::LD3Rv8b_POST, AArch64::dsub0); return; } else if (VT == MVT::v16i8) { SelectPostLoad(Node, 3, AArch64::LD3Rv16b_POST, AArch64::qsub0); return; } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) { SelectPostLoad(Node, 3, AArch64::LD3Rv4h_POST, AArch64::dsub0); return; } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) { SelectPostLoad(Node, 3, AArch64::LD3Rv8h_POST, AArch64::qsub0); return; } else if (VT == MVT::v2i32 || VT == MVT::v2f32) { SelectPostLoad(Node, 3, AArch64::LD3Rv2s_POST, AArch64::dsub0); return; } else if (VT == MVT::v4i32 || VT == MVT::v4f32) { SelectPostLoad(Node, 3, AArch64::LD3Rv4s_POST, AArch64::qsub0); return; } else if (VT == MVT::v1i64 || VT == MVT::v1f64) { SelectPostLoad(Node, 3, AArch64::LD3Rv1d_POST, AArch64::dsub0); return; } else if (VT == MVT::v2i64 || VT == MVT::v2f64) { SelectPostLoad(Node, 3, AArch64::LD3Rv2d_POST, AArch64::qsub0); return; } break; } case AArch64ISD::LD4DUPpost: { if (VT == MVT::v8i8) { SelectPostLoad(Node, 4, AArch64::LD4Rv8b_POST, AArch64::dsub0); return; } else if (VT == MVT::v16i8) { SelectPostLoad(Node, 4, AArch64::LD4Rv16b_POST, AArch64::qsub0); return; } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) { SelectPostLoad(Node, 4, AArch64::LD4Rv4h_POST, AArch64::dsub0); return; } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) { SelectPostLoad(Node, 4, AArch64::LD4Rv8h_POST, AArch64::qsub0); return; } else if (VT == MVT::v2i32 || VT == MVT::v2f32) { SelectPostLoad(Node, 4, AArch64::LD4Rv2s_POST, AArch64::dsub0); return; } else if (VT == MVT::v4i32 || VT == MVT::v4f32) { SelectPostLoad(Node, 4, AArch64::LD4Rv4s_POST, AArch64::qsub0); return; } else if (VT == MVT::v1i64 || VT == MVT::v1f64) { SelectPostLoad(Node, 4, AArch64::LD4Rv1d_POST, AArch64::dsub0); return; } else if (VT == MVT::v2i64 || VT == MVT::v2f64) { SelectPostLoad(Node, 4, AArch64::LD4Rv2d_POST, AArch64::qsub0); return; } break; } case AArch64ISD::LD1LANEpost: { if (VT == MVT::v16i8 || VT == MVT::v8i8) { SelectPostLoadLane(Node, 1, AArch64::LD1i8_POST); return; } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) { SelectPostLoadLane(Node, 1, AArch64::LD1i16_POST); return; } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 || VT == MVT::v2f32) { SelectPostLoadLane(Node, 1, AArch64::LD1i32_POST); return; } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 || VT == MVT::v1f64) { SelectPostLoadLane(Node, 1, AArch64::LD1i64_POST); return; } break; } case AArch64ISD::LD2LANEpost: { if (VT == MVT::v16i8 || VT == MVT::v8i8) { SelectPostLoadLane(Node, 2, AArch64::LD2i8_POST); return; } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) { SelectPostLoadLane(Node, 2, AArch64::LD2i16_POST); return; } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 || VT == MVT::v2f32) { SelectPostLoadLane(Node, 2, AArch64::LD2i32_POST); return; } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 || VT == MVT::v1f64) { SelectPostLoadLane(Node, 2, AArch64::LD2i64_POST); return; } break; } case AArch64ISD::LD3LANEpost: { if (VT == MVT::v16i8 || VT == MVT::v8i8) { SelectPostLoadLane(Node, 3, AArch64::LD3i8_POST); return; } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) { SelectPostLoadLane(Node, 3, AArch64::LD3i16_POST); return; } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 || VT == MVT::v2f32) { SelectPostLoadLane(Node, 3, AArch64::LD3i32_POST); return; } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 || VT == MVT::v1f64) { SelectPostLoadLane(Node, 3, AArch64::LD3i64_POST); return; } break; } case AArch64ISD::LD4LANEpost: { if (VT == MVT::v16i8 || VT == MVT::v8i8) { SelectPostLoadLane(Node, 4, AArch64::LD4i8_POST); return; } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) { SelectPostLoadLane(Node, 4, AArch64::LD4i16_POST); return; } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 || VT == MVT::v2f32) { SelectPostLoadLane(Node, 4, AArch64::LD4i32_POST); return; } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 || VT == MVT::v1f64) { SelectPostLoadLane(Node, 4, AArch64::LD4i64_POST); return; } break; } case AArch64ISD::ST2post: { VT = Node->getOperand(1).getValueType(); if (VT == MVT::v8i8) { SelectPostStore(Node, 2, AArch64::ST2Twov8b_POST); return; } else if (VT == MVT::v16i8) { SelectPostStore(Node, 2, AArch64::ST2Twov16b_POST); return; } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) { SelectPostStore(Node, 2, AArch64::ST2Twov4h_POST); return; } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) { SelectPostStore(Node, 2, AArch64::ST2Twov8h_POST); return; } else if (VT == MVT::v2i32 || VT == MVT::v2f32) { SelectPostStore(Node, 2, AArch64::ST2Twov2s_POST); return; } else if (VT == MVT::v4i32 || VT == MVT::v4f32) { SelectPostStore(Node, 2, AArch64::ST2Twov4s_POST); return; } else if (VT == MVT::v2i64 || VT == MVT::v2f64) { SelectPostStore(Node, 2, AArch64::ST2Twov2d_POST); return; } else if (VT == MVT::v1i64 || VT == MVT::v1f64) { SelectPostStore(Node, 2, AArch64::ST1Twov1d_POST); return; } break; } case AArch64ISD::ST3post: { VT = Node->getOperand(1).getValueType(); if (VT == MVT::v8i8) { SelectPostStore(Node, 3, AArch64::ST3Threev8b_POST); return; } else if (VT == MVT::v16i8) { SelectPostStore(Node, 3, AArch64::ST3Threev16b_POST); return; } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) { SelectPostStore(Node, 3, AArch64::ST3Threev4h_POST); return; } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) { SelectPostStore(Node, 3, AArch64::ST3Threev8h_POST); return; } else if (VT == MVT::v2i32 || VT == MVT::v2f32) { SelectPostStore(Node, 3, AArch64::ST3Threev2s_POST); return; } else if (VT == MVT::v4i32 || VT == MVT::v4f32) { SelectPostStore(Node, 3, AArch64::ST3Threev4s_POST); return; } else if (VT == MVT::v2i64 || VT == MVT::v2f64) { SelectPostStore(Node, 3, AArch64::ST3Threev2d_POST); return; } else if (VT == MVT::v1i64 || VT == MVT::v1f64) { SelectPostStore(Node, 3, AArch64::ST1Threev1d_POST); return; } break; } case AArch64ISD::ST4post: { VT = Node->getOperand(1).getValueType(); if (VT == MVT::v8i8) { SelectPostStore(Node, 4, AArch64::ST4Fourv8b_POST); return; } else if (VT == MVT::v16i8) { SelectPostStore(Node, 4, AArch64::ST4Fourv16b_POST); return; } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) { SelectPostStore(Node, 4, AArch64::ST4Fourv4h_POST); return; } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) { SelectPostStore(Node, 4, AArch64::ST4Fourv8h_POST); return; } else if (VT == MVT::v2i32 || VT == MVT::v2f32) { SelectPostStore(Node, 4, AArch64::ST4Fourv2s_POST); return; } else if (VT == MVT::v4i32 || VT == MVT::v4f32) { SelectPostStore(Node, 4, AArch64::ST4Fourv4s_POST); return; } else if (VT == MVT::v2i64 || VT == MVT::v2f64) { SelectPostStore(Node, 4, AArch64::ST4Fourv2d_POST); return; } else if (VT == MVT::v1i64 || VT == MVT::v1f64) { SelectPostStore(Node, 4, AArch64::ST1Fourv1d_POST); return; } break; } case AArch64ISD::ST1x2post: { VT = Node->getOperand(1).getValueType(); if (VT == MVT::v8i8) { SelectPostStore(Node, 2, AArch64::ST1Twov8b_POST); return; } else if (VT == MVT::v16i8) { SelectPostStore(Node, 2, AArch64::ST1Twov16b_POST); return; } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) { SelectPostStore(Node, 2, AArch64::ST1Twov4h_POST); return; } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) { SelectPostStore(Node, 2, AArch64::ST1Twov8h_POST); return; } else if (VT == MVT::v2i32 || VT == MVT::v2f32) { SelectPostStore(Node, 2, AArch64::ST1Twov2s_POST); return; } else if (VT == MVT::v4i32 || VT == MVT::v4f32) { SelectPostStore(Node, 2, AArch64::ST1Twov4s_POST); return; } else if (VT == MVT::v1i64 || VT == MVT::v1f64) { SelectPostStore(Node, 2, AArch64::ST1Twov1d_POST); return; } else if (VT == MVT::v2i64 || VT == MVT::v2f64) { SelectPostStore(Node, 2, AArch64::ST1Twov2d_POST); return; } break; } case AArch64ISD::ST1x3post: { VT = Node->getOperand(1).getValueType(); if (VT == MVT::v8i8) { SelectPostStore(Node, 3, AArch64::ST1Threev8b_POST); return; } else if (VT == MVT::v16i8) { SelectPostStore(Node, 3, AArch64::ST1Threev16b_POST); return; } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) { SelectPostStore(Node, 3, AArch64::ST1Threev4h_POST); return; } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16 ) { SelectPostStore(Node, 3, AArch64::ST1Threev8h_POST); return; } else if (VT == MVT::v2i32 || VT == MVT::v2f32) { SelectPostStore(Node, 3, AArch64::ST1Threev2s_POST); return; } else if (VT == MVT::v4i32 || VT == MVT::v4f32) { SelectPostStore(Node, 3, AArch64::ST1Threev4s_POST); return; } else if (VT == MVT::v1i64 || VT == MVT::v1f64) { SelectPostStore(Node, 3, AArch64::ST1Threev1d_POST); return; } else if (VT == MVT::v2i64 || VT == MVT::v2f64) { SelectPostStore(Node, 3, AArch64::ST1Threev2d_POST); return; } break; } case AArch64ISD::ST1x4post: { VT = Node->getOperand(1).getValueType(); if (VT == MVT::v8i8) { SelectPostStore(Node, 4, AArch64::ST1Fourv8b_POST); return; } else if (VT == MVT::v16i8) { SelectPostStore(Node, 4, AArch64::ST1Fourv16b_POST); return; } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) { SelectPostStore(Node, 4, AArch64::ST1Fourv4h_POST); return; } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) { SelectPostStore(Node, 4, AArch64::ST1Fourv8h_POST); return; } else if (VT == MVT::v2i32 || VT == MVT::v2f32) { SelectPostStore(Node, 4, AArch64::ST1Fourv2s_POST); return; } else if (VT == MVT::v4i32 || VT == MVT::v4f32) { SelectPostStore(Node, 4, AArch64::ST1Fourv4s_POST); return; } else if (VT == MVT::v1i64 || VT == MVT::v1f64) { SelectPostStore(Node, 4, AArch64::ST1Fourv1d_POST); return; } else if (VT == MVT::v2i64 || VT == MVT::v2f64) { SelectPostStore(Node, 4, AArch64::ST1Fourv2d_POST); return; } break; } case AArch64ISD::ST2LANEpost: { VT = Node->getOperand(1).getValueType(); if (VT == MVT::v16i8 || VT == MVT::v8i8) { SelectPostStoreLane(Node, 2, AArch64::ST2i8_POST); return; } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) { SelectPostStoreLane(Node, 2, AArch64::ST2i16_POST); return; } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 || VT == MVT::v2f32) { SelectPostStoreLane(Node, 2, AArch64::ST2i32_POST); return; } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 || VT == MVT::v1f64) { SelectPostStoreLane(Node, 2, AArch64::ST2i64_POST); return; } break; } case AArch64ISD::ST3LANEpost: { VT = Node->getOperand(1).getValueType(); if (VT == MVT::v16i8 || VT == MVT::v8i8) { SelectPostStoreLane(Node, 3, AArch64::ST3i8_POST); return; } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) { SelectPostStoreLane(Node, 3, AArch64::ST3i16_POST); return; } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 || VT == MVT::v2f32) { SelectPostStoreLane(Node, 3, AArch64::ST3i32_POST); return; } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 || VT == MVT::v1f64) { SelectPostStoreLane(Node, 3, AArch64::ST3i64_POST); return; } break; } case AArch64ISD::ST4LANEpost: { VT = Node->getOperand(1).getValueType(); if (VT == MVT::v16i8 || VT == MVT::v8i8) { SelectPostStoreLane(Node, 4, AArch64::ST4i8_POST); return; } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) { SelectPostStoreLane(Node, 4, AArch64::ST4i16_POST); return; } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 || VT == MVT::v2f32) { SelectPostStoreLane(Node, 4, AArch64::ST4i32_POST); return; } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 || VT == MVT::v1f64) { SelectPostStoreLane(Node, 4, AArch64::ST4i64_POST); return; } break; } case AArch64ISD::SVE_LD2_MERGE_ZERO: { if (VT == MVT::nxv16i8) { SelectPredicatedLoad(Node, 2, 0, AArch64::LD2B_IMM, AArch64::LD2B); return; } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 || (VT == MVT::nxv8bf16 && Subtarget->hasBF16())) { SelectPredicatedLoad(Node, 2, 1, AArch64::LD2H_IMM, AArch64::LD2H); return; } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) { SelectPredicatedLoad(Node, 2, 2, AArch64::LD2W_IMM, AArch64::LD2W); return; } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) { SelectPredicatedLoad(Node, 2, 3, AArch64::LD2D_IMM, AArch64::LD2D); return; } break; } case AArch64ISD::SVE_LD3_MERGE_ZERO: { if (VT == MVT::nxv16i8) { SelectPredicatedLoad(Node, 3, 0, AArch64::LD3B_IMM, AArch64::LD3B); return; } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 || (VT == MVT::nxv8bf16 && Subtarget->hasBF16())) { SelectPredicatedLoad(Node, 3, 1, AArch64::LD3H_IMM, AArch64::LD3H); return; } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) { SelectPredicatedLoad(Node, 3, 2, AArch64::LD3W_IMM, AArch64::LD3W); return; } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) { SelectPredicatedLoad(Node, 3, 3, AArch64::LD3D_IMM, AArch64::LD3D); return; } break; } case AArch64ISD::SVE_LD4_MERGE_ZERO: { if (VT == MVT::nxv16i8) { SelectPredicatedLoad(Node, 4, 0, AArch64::LD4B_IMM, AArch64::LD4B); return; } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 || (VT == MVT::nxv8bf16 && Subtarget->hasBF16())) { SelectPredicatedLoad(Node, 4, 1, AArch64::LD4H_IMM, AArch64::LD4H); return; } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) { SelectPredicatedLoad(Node, 4, 2, AArch64::LD4W_IMM, AArch64::LD4W); return; } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) { SelectPredicatedLoad(Node, 4, 3, AArch64::LD4D_IMM, AArch64::LD4D); return; } break; } } // Select the default instruction SelectCode(Node); } /// createAArch64ISelDag - This pass converts a legalized DAG into a /// AArch64-specific DAG, ready for instruction scheduling. FunctionPass *llvm::createAArch64ISelDag(AArch64TargetMachine &TM, CodeGenOpt::Level OptLevel) { return new AArch64DAGToDAGISel(TM, OptLevel); } /// When \p PredVT is a scalable vector predicate in the form /// MVT::nxxi1, it builds the correspondent scalable vector of /// integers MVT::nxxi s.t. M x bits = 128. When targeting /// structured vectors (NumVec >1), the output data type is /// MVT::nxxi s.t. M x bits = 128. If the input /// PredVT is not in the form MVT::nxxi1, it returns an invalid /// EVT. static EVT getPackedVectorTypeFromPredicateType(LLVMContext &Ctx, EVT PredVT, unsigned NumVec) { assert(NumVec > 0 && NumVec < 5 && "Invalid number of vectors."); if (!PredVT.isScalableVector() || PredVT.getVectorElementType() != MVT::i1) return EVT(); if (PredVT != MVT::nxv16i1 && PredVT != MVT::nxv8i1 && PredVT != MVT::nxv4i1 && PredVT != MVT::nxv2i1) return EVT(); ElementCount EC = PredVT.getVectorElementCount(); EVT ScalarVT = EVT::getIntegerVT(Ctx, AArch64::SVEBitsPerBlock / EC.getKnownMinValue()); EVT MemVT = EVT::getVectorVT(Ctx, ScalarVT, EC * NumVec); return MemVT; } /// Return the EVT of the data associated to a memory operation in \p /// Root. If such EVT cannot be retrived, it returns an invalid EVT. static EVT getMemVTFromNode(LLVMContext &Ctx, SDNode *Root) { if (isa(Root)) return cast(Root)->getMemoryVT(); if (isa(Root)) return cast(Root)->getMemoryVT(); const unsigned Opcode = Root->getOpcode(); // For custom ISD nodes, we have to look at them individually to extract the // type of the data moved to/from memory. switch (Opcode) { case AArch64ISD::LD1_MERGE_ZERO: case AArch64ISD::LD1S_MERGE_ZERO: case AArch64ISD::LDNF1_MERGE_ZERO: case AArch64ISD::LDNF1S_MERGE_ZERO: return cast(Root->getOperand(3))->getVT(); case AArch64ISD::ST1_PRED: return cast(Root->getOperand(4))->getVT(); case AArch64ISD::SVE_LD2_MERGE_ZERO: return getPackedVectorTypeFromPredicateType( Ctx, Root->getOperand(1)->getValueType(0), /*NumVec=*/2); case AArch64ISD::SVE_LD3_MERGE_ZERO: return getPackedVectorTypeFromPredicateType( Ctx, Root->getOperand(1)->getValueType(0), /*NumVec=*/3); case AArch64ISD::SVE_LD4_MERGE_ZERO: return getPackedVectorTypeFromPredicateType( Ctx, Root->getOperand(1)->getValueType(0), /*NumVec=*/4); default: break; } if (Opcode != ISD::INTRINSIC_VOID) return EVT(); const unsigned IntNo = cast(Root->getOperand(1))->getZExtValue(); if (IntNo != Intrinsic::aarch64_sve_prf) return EVT(); // We are using an SVE prefetch intrinsic. Type must be inferred // from the width of the predicate. return getPackedVectorTypeFromPredicateType( Ctx, Root->getOperand(2)->getValueType(0), /*NumVec=*/1); } /// SelectAddrModeIndexedSVE - Attempt selection of the addressing mode: /// Base + OffImm * sizeof(MemVT) for Min >= OffImm <= Max /// where Root is the memory access using N for its address. template bool AArch64DAGToDAGISel::SelectAddrModeIndexedSVE(SDNode *Root, SDValue N, SDValue &Base, SDValue &OffImm) { const EVT MemVT = getMemVTFromNode(*(CurDAG->getContext()), Root); if (MemVT == EVT()) return false; if (N.getOpcode() != ISD::ADD) return false; SDValue VScale = N.getOperand(1); if (VScale.getOpcode() != ISD::VSCALE) return false; TypeSize TS = MemVT.getSizeInBits(); int64_t MemWidthBytes = static_cast(TS.getKnownMinSize()) / 8; int64_t MulImm = cast(VScale.getOperand(0))->getSExtValue(); if ((MulImm % MemWidthBytes) != 0) return false; int64_t Offset = MulImm / MemWidthBytes; if (Offset < Min || Offset > Max) return false; Base = N.getOperand(0); OffImm = CurDAG->getTargetConstant(Offset, SDLoc(N), MVT::i64); return true; } /// Select register plus register addressing mode for SVE, with scaled /// offset. bool AArch64DAGToDAGISel::SelectSVERegRegAddrMode(SDValue N, unsigned Scale, SDValue &Base, SDValue &Offset) { if (N.getOpcode() != ISD::ADD) return false; // Process an ADD node. const SDValue LHS = N.getOperand(0); const SDValue RHS = N.getOperand(1); // 8 bit data does not come with the SHL node, so it is treated // separately. if (Scale == 0) { Base = LHS; Offset = RHS; return true; } // Check if the RHS is a shift node with a constant. if (RHS.getOpcode() != ISD::SHL) return false; const SDValue ShiftRHS = RHS.getOperand(1); if (auto *C = dyn_cast(ShiftRHS)) if (C->getZExtValue() == Scale) { Base = LHS; Offset = RHS.getOperand(0); return true; } return false; } diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp index cb9500c16f38..53ae3370c217 100644 --- a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp +++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp @@ -1,7080 +1,7001 @@ //===- AArch64InstrInfo.cpp - AArch64 Instruction Information -------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // // This file contains the AArch64 implementation of the TargetInstrInfo class. // //===----------------